Další formáty:
BibTeX
LaTeX
RIS
@inproceedings{568468, author = {Hudík, Tomáš and Žižka, Jan}, address = {Ostrava}, booktitle = {Znalosti 2005, sborník příspěvků}, edition = {1. vyd}, keywords = {text categorization; support vector machines}, language = {eng}, location = {Ostrava}, isbn = {80-248-0755-6}, pages = {210-217}, publisher = {VŠB--Technická univerzita Ostrava}, title = {Effects of Selected Basic Algorithm Parameters and Data Features on Text Categorization by Support Vector Machines}, year = {2005} }
TY - JOUR ID - 568468 AU - Hudík, Tomáš - Žižka, Jan PY - 2005 TI - Effects of Selected Basic Algorithm Parameters and Data Features on Text Categorization by Support Vector Machines PB - VŠB--Technická univerzita Ostrava CY - Ostrava SN - 8024807556 KW - text categorization KW - support vector machines N2 - This paper describes results acquired from testing influences of selected important parameters of Support Vector Machines (SVM) applied to text categorization. The main object was to verify whether results obtained with standard, publicly accessible datasets (the traditional Reuters text documents and the 20Newsgroups) could be applied to real medical text documents from various Internet resources utilized by physicians. The research also focused on features as document similarity, balance of categories, presence of common words (stop-words), and data volume. The results of experiments demonstrated that there could be typical problems with setting up parameters for some real data. Especially the medical documents provided worse outcomes because the real-data categories were not well balanced and the documents in different categories were mutually rather similar-i.e., overlapping classes. As a result, SVM could not always find sufficiently good separating hyperplanes as it mostly did for `trouble-free' datasets like Reuters or 20Newsgroups. ER -
HUDÍK, Tomáš a Jan ŽIŽKA. Effects of Selected Basic Algorithm Parameters and Data Features on Text Categorization by Support Vector Machines. In \textit{Znalosti 2005, sborník příspěvků}. 1. vyd. Ostrava: VŠB--Technická univerzita Ostrava, 2005, s.~210-217. ISBN~80-248-0755-6.
|