Další formáty:
BibTeX
LaTeX
RIS
@inproceedings{1320593, author = {Nevěřilová, Zuzana}, address = {Brno}, booktitle = {Ninth Workshop on Recent Advances in Slavonic Natural Language Processing}, editor = {Horák, Aleš; Rychlý, Pavel; Rambousek, Adam}, keywords = {multi-word expressions; corpus; orthographical variants}, howpublished = {tištěná verze "print"}, language = {eng}, location = {Brno}, isbn = {978-80-263-0974-1}, pages = {103-112}, publisher = {Tribun EU}, title = {Annotation of Multi-Word Expressions in Czech Texts}, url = {https://nlp.fi.muni.cz/raslan/2015/paper02-Neverilova.pdf}, year = {2015} }
TY - JOUR ID - 1320593 AU - Nevěřilová, Zuzana PY - 2015 TI - Annotation of Multi-Word Expressions in Czech Texts PB - Tribun EU CY - Brno SN - 9788026309741 KW - multi-word expressions KW - corpus KW - orthographical variants UR - https://nlp.fi.muni.cz/raslan/2015/paper02-Neverilova.pdf N2 - Multi-word expressions (MWEs) are difficult to define and also difficult to annotate. Some of them cause serious errors in the traditional annotation pipeline tokenization - morphological analysis - morphological disambiguation. Many cases of incorrect annotation in Czech corpora are known. To narrow the research topic, we focus only in fixed MWEs – those with fixed word order and no ellidable components. In this paper, we propose a corpus-based method that reveals fixed MWE candidates. From the web-based corpus of Czech, we extracted 25,091 expressions, 2,140 of them were identified as MWEs, 332 as probable MWEs, and 174 of them can be either MWEs or one single word. Our method is based on corpus data observation that indicates that people are unsure when writing a MWE whether it is one word, a word with dashes, or several words. The result is a list of MWE candidates and also an application that classifies the input as MWE, probable MWE, or non-MWE. ER -
NEVĚŘILOVÁ, Zuzana. Annotation of Multi-Word Expressions in Czech Texts. In Horák, Aleš; Rychlý, Pavel; Rambousek, Adam. \textit{Ninth Workshop on Recent Advances in Slavonic Natural Language Processing}. Brno: Tribun EU, 2015, s.~103-112. ISBN~978-80-263-0974-1.
|