Other formats:
BibTeX
LaTeX
RIS
@inproceedings{982494, author = {Baisa, Vít and Suchomel, Vít}, address = {Istanbul, Turkey}, booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, editor = {Seniz Demir, Ilknur Durgar El-Kahlout, Mehmet Ugur Dogan}, keywords = {corpus; turkic languages; unsupervised morphological analysis}, howpublished = {elektronická verze "online"}, language = {eng}, location = {Istanbul, Turkey}, isbn = {978-2-9517408-7-7}, pages = {28-32}, publisher = {European Language Resources Association (ELRA)}, title = {Large Corpora for Turkic Languages and Unsupervised Morphological Analysis}, url = {http://www.lrec-conf.org/proceedings/lrec2012/workshops/02.Turkic%20Languages%20Proceedings.pdf}, year = {2012} }
TY - JOUR ID - 982494 AU - Baisa, Vít - Suchomel, Vít PY - 2012 TI - Large Corpora for Turkic Languages and Unsupervised Morphological Analysis PB - European Language Resources Association (ELRA) CY - Istanbul, Turkey SN - 9782951740877 KW - corpus KW - turkic languages KW - unsupervised morphological analysis UR - http://www.lrec-conf.org/proceedings/lrec2012/workshops/02.Turkic%20Languages%20Proceedings.pdf N2 - In this article we describe six new web corpora for Turkish, Azerbaijani, Kazakh, Turkmen, Kyrgyz and Uzbek languages. The data for these corpora was automatically crawled from the web by SpiderLing. Only minimal knowledge of these languages was required to obtain the data in raw form. Corpora are tokenized only since morphological analyzers and disambiguators for these languages are not available (except for Turkish). Subsequent experiment with unsupervised morphological segmentation was carried out on the Turkish corpus. In this experiment we achieved encouraging results. We used data provided for MorphoChallenge competition for the purpose of evaluation. ER -
BAISA, Vít and Vít SUCHOMEL. Large Corpora for Turkic Languages and Unsupervised Morphological Analysis. Online. In Seniz Demir, Ilknur Durgar El-Kahlout, Mehmet Ugur Dogan. \textit{Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}. Istanbul, Turkey: European Language Resources Association (ELRA), 2012, p.~28-32. ISBN~978-2-9517408-7-7.
|