Další formáty:
BibTeX
LaTeX
RIS
@inproceedings{914342, author = {Řehůřek, Radim}, booktitle = {NIPS 2010 workshop on Low-rank Methods for Large-scale Machine Learning}, editor = {Michael Mahoney, Ameet Talwalkar, Mehryan Mohri, Arthur Gretton}, keywords = {svd lda lsi}, language = {eng}, title = {Fast and Faster: A Comparison of Two Streamed Matrix Decomposition Algorithms}, url = {http://www.eecs.berkeley.edu/~ameet/low-rank-nips10/}, year = {2010} }
TY - JOUR ID - 914342 AU - Řehůřek, Radim PY - 2010 TI - Fast and Faster: A Comparison of Two Streamed Matrix Decomposition Algorithms KW - svd lda lsi UR - http://www.eecs.berkeley.edu/~ameet/low-rank-nips10/ N2 - With the explosion of the size of digital dataset, the limiting factor for decomposition algorithms is the \emph{number of passes} over the input, as the input is often stored out-of-core or even off-site. Moreover, we're only interested in algorithms that operate in \emph{constant memory} w.r.t. to the input size, so that arbitrarily large input can be processed. In this paper, we present a practical comparison of two such algorithms: a distributed method that operates in a single pass over the input vs. a streamed two-pass stochastic algorithm. The experiments track the effect of distributed computing, oversampling and memory trade-offs on the accuracy and performance of the two algorithms. To ensure meaningful results, we choose the input to be a real dataset, namely the whole of the English Wikipedia, in the application settings of Latent Semantic Analysis. ER -
ŘEHŮŘEK, Radim. Fast and Faster: A Comparison of Two Streamed Matrix Decomposition Algorithms. In Michael Mahoney, Ameet Talwalkar, Mehryan Mohri, Arthur Gretton. \textit{NIPS 2010 workshop on Low-rank Methods for Large-scale Machine Learning}. 2010, 7 s.
|