Další formáty:
BibTeX
LaTeX
RIS
@inproceedings{2304817, author = {Míč, Vladimír and Sedmidubský, Jan and Zezula, Pavel}, address = {Cham}, booktitle = {16th International Conference on Similarity Search and Applications (SISAP)}, doi = {http://dx.doi.org/10.1007/978-3-031-46994-7_26}, editor = {Oscar Pedreira, Vladimir Estivill-Castro}, keywords = {approximate similarity searching;high-dimensional data;indexing;filtering;LAION dataset}, howpublished = {elektronická verze "online"}, language = {eng}, location = {Cham}, isbn = {978-3-031-46993-0}, pages = {300-308}, publisher = {Springer}, title = {CRANBERRY: Memory-Effective Search in 100M High-Dimensional CLIP Vectors}, url = {https://link.springer.com/chapter/10.1007/978-3-031-46994-7_26}, year = {2023} }
TY - JOUR ID - 2304817 AU - Míč, Vladimír - Sedmidubský, Jan - Zezula, Pavel PY - 2023 TI - CRANBERRY: Memory-Effective Search in 100M High-Dimensional CLIP Vectors PB - Springer CY - Cham SN - 9783031469930 KW - approximate similarity searching;high-dimensional data;indexing;filtering;LAION dataset UR - https://link.springer.com/chapter/10.1007/978-3-031-46994-7_26 N2 - Recent advances in cross-modal multimedia data analysis necessarily require efficient similarity search on the scales of hundreds of millions of high-dimensional vectors. We address this task by proposing the CRANBERRY algorithm that specifically combines and tunes several existing similarity search strategies. In particular, the algorithm: (1) employs the Voronoi partitioning to obtain a query-relevant candidate set in constant time, (2) applies filtering techniques to prune the obtained candidates significantly, and (3) re-rank the retained candidate vectors with respect to the query vector. Applied to the dataset of 100 million 768-dimensional vectors, the algorithm evaluates 10NN queries with 90% recall and query latency of 1.2s on average, all with a throughput of 15 queries per second on a server with 56 core-CPU, and 4.7q/sec. on a PC. ER -
MÍČ, Vladimír, Jan SEDMIDUBSKÝ a Pavel ZEZULA. CRANBERRY: Memory-Effective Search in 100M High-Dimensional CLIP Vectors. Online. In Oscar Pedreira, Vladimir Estivill-Castro. \textit{16th International Conference on Similarity Search and Applications (SISAP)}. Cham: Springer, 2023, s.~300-308. ISBN~978-3-031-46993-0. Dostupné z: https://dx.doi.org/10.1007/978-3-031-46994-7\_{}26.
|