Další formáty:
BibTeX
LaTeX
RIS
@inproceedings{1210701, author = {Rygl, Jan and Medveď, Marek}, address = {Brno}, booktitle = {Eighth Workshop on Recent Advances in Slavonic Natural Language Processing}, keywords = {style marker; stop-word list; corpus}, howpublished = {tištěná verze "print"}, language = {eng}, location = {Brno}, pages = {85-89}, publisher = {Tribun EU}, title = {Style Markers Based on Stop-word List}, url = {https://nlp.fi.muni.cz/raslan/2014/8.pdf}, year = {2014} }
TY - JOUR ID - 1210701 AU - Rygl, Jan - Medveď, Marek PY - 2014 TI - Style Markers Based on Stop-word List PB - Tribun EU CY - Brno KW - style marker KW - stop-word list KW - corpus UR - https://nlp.fi.muni.cz/raslan/2014/8.pdf N2 - The analysis of author’s characteristic writing style and vocabulary has been used to uncover the identity of authors of documents by both manual linguistic approaches and automatic algorithmic methods. The revealing of the gender, name, or age can help to expose pedophiles in social networks, false product reviews on the Internet servers, or machine translations submitted as manually translated texts. These problems are predominantly solved by a combination of stylometry and machine learning techniques. Since the stylometry focuses on the author’s style, word n-grams cannot be used as a style marker. Stop words are not influenced by a topic of documents, therefore they can be used to create style markers. In this paper, we present a guidance on how to implement stop-word extraction and to include stop-words based style markers into a multilingual classification system based on the stylometry. ER -
RYGL, Jan a Marek MEDVEĎ. Style Markers Based on Stop-word List. In \textit{Eighth Workshop on Recent Advances in Slavonic Natural Language Processing}. Brno: Tribun EU, 2014, s.~85-89. ISSN~2336-4289.
|