Identifying the Authors’ National Variety of English in Social Media Texts

Research output: Chapter in Book/Report/Conference proceedingPaper in conference proceeding

Bibtex

@inproceedings{d6aee74bcf44485eaaaf9c7f9b1947d9,
title = "Identifying the Authors{\textquoteright} National Variety of English in Social Media Texts",
abstract = "In this paper, we present a study for the identification of authors{\textquoteright} national variety of English in texts from social media. In data from Facebook and Twitter, information about the author{\textquoteright}s social profile is annotated, and the national English variety (US, UK, AUS, CAN, NNS) that each author uses is attributed. We tested four feature types: formal linguistic features, POS features, lexicon-based features related to the different varieties, and data-based features from each English variety. We used various machine learning algorithms for the classification experiments, and we implemented a feature selectionprocess. The classification accuracy achieved, when the 31 highest rankedfeatures were used, was up to 77.32%. The experimental results are evaluated, and the efficacy of the ranked features discussed.",
author = "Vasiliki Simaki and Panagiotis Simakis and Carita Paradis and Kerren Andreas",
year = "2017",
doi = "10.26615/978-954-452-049-6_086",
language = "English",
isbn = "978-954-452-048-9",
pages = "671--678",
editor = "Galia Angelova and Kalina Bontcheva and Ruslan Metkov and Ivelina Nikolova and Irina Temnikova",
booktitle = "Recent Advances in Natural Language Processing",
publisher = "Association for Computational Linguistics",
address = "United States",
note = "The 11th Biennial Conference on Recent Advances In Natural Language Processing (RANLP '17), 2-8 September 2017, Varna, Bulgaria , RANLP '17 ; Conference date: 02-09-2017 Through 08-09-2017",
url = "http://lml.bas.bg/ranlp2017/start.php",

}