Other formats:
BibTeX
LaTeX
RIS
@inproceedings{891688, author = {Sojka, Petr and Hatlapatka, Radim}, address = {Manchester, UK}, booktitle = {Proceedings of DocEng 2010 conference}, doi = {http://dx.doi.org/10.1145/1860559.1860563}, keywords = {Authoring tools and systems; Categorization; Classification; Document presentation; Representations/Standards; Character recognition; Digital mathematical library; Digitisation workflow}, howpublished = {elektronická verze "online"}, language = {eng}, location = {Manchester, UK}, isbn = {978-1-4503-0231-9}, pages = {3-12}, publisher = {ACM}, title = {Document Engineering for a Digital Library: PDF recompression using JBIG2 and other optimization of PDF documents}, url = {http://www.fi.muni.cz/usr/sojka/presentations/sojka-hatlapatka-doceng2010.pdf}, year = {2010} }
TY - JOUR ID - 891688 AU - Sojka, Petr - Hatlapatka, Radim PY - 2010 TI - Document Engineering for a Digital Library: PDF recompression using JBIG2 and other optimization of PDF documents PB - ACM CY - Manchester, UK SN - 9781450302319 KW - Authoring tools and systems KW - Categorization KW - Classification KW - Document presentation KW - Representations/Standards KW - Character recognition KW - Digital mathematical library KW - Digitisation workflow UR - http://www.fi.muni.cz/usr/sojka/presentations/sojka-hatlapatka-doceng2010.pdf L2 - http://dx.doi.org/10.1145/1860559.1860563 N2 - Several innovative document transformations and tools developed in the process of building the Digital Mathematical Library DML-CZ http://dml.cz are described. The main result is our new PDF re-compression tool, developed using a enhanced jbig2enc library. Together with pdfsizeopt.py by Péter Szabó, we have managed to decrease PDF storage size and transmission needs by 62%: using both programs we reduced the size of the original already compressed PDFs to 38%. We briefly describe workflow and tools developed for creating the digital library. The batch digital signature stamper, the document similarity metrics which uses four different methods, a [meta]data validation process and math OCR tools represent some of the main [by]products. Such document engineering, together with Google Scholar indexing optimization, have led to the success of serving digitized and born-digital scientific math documents to the public in DML-CZ, and are being employed also in The European Digital Mathematics Library, EuDML. ER -
SOJKA, Petr and Radim HATLAPATKA. Document Engineering for a Digital Library: PDF recompression using JBIG2 and other optimization of PDF documents. Online. In \textit{Proceedings of DocEng 2010 conference}. Manchester, UK: ACM, 2010, p.~3-12. ISBN~978-1-4503-0231-9. Available from: https://dx.doi.org/10.1145/1860559.1860563.
|