[Pkg-javascript-commits] [pdf.js] 86/139: Improve fingerprinting of documents

David Prévot taffit at moszumanska.debian.org
Fri Jan 9 21:18:30 UTC 2015


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit aa53319c875205ffe7cd8b41a0280e19445ca5fb
Author: Jordan Thoms <jordan at thoms.net.nz>
Date:   Sun Aug 3 01:19:55 2014 +1200

    Improve fingerprinting of documents
    
    Fixes two issues:
    - #4456 : The first 100 bytes are often not unique as they can be
    filled with standard PDF headers - so we use the first 200 KB instead.
    (This may be overkill)
    - Some documents we encountered have invalid xref ids, which were
    always coming out as ‘0000000000000000’ - so we detect that and use the
    MD5 instead.
---
 src/core/core.js | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/core/core.js b/src/core/core.js
index d61aedc..14ceddd 100644
--- a/src/core/core.js
+++ b/src/core/core.js
@@ -280,6 +280,10 @@ var Page = (function PageClosure() {
  * `PDFDocument` objects on the main thread created.
  */
 var PDFDocument = (function PDFDocumentClosure() {
+  var FINGERPRINT_FIRST_BYTES = 1024;
+
+  var EMPTY_FINGERPRINT = '\x00\x00\x00\x00\x00\x00\x00' +
+    '\x00\x00\x00\x00\x00\x00\x00\x00\x00';
   function PDFDocument(pdfManager, arg, password) {
     if (isStream(arg)) {
       init.call(this, pdfManager, arg, password);
@@ -493,14 +497,21 @@ var PDFDocument = (function PDFDocumentClosure() {
     get fingerprint() {
       var xref = this.xref, hash, fileID = '';
 
-      if (xref.trailer.has('ID')) {
+      if (xref.trailer.has('ID') &&
+          xref.trailer.get('ID')[0] !== EMPTY_FINGERPRINT) {
         hash = stringToBytes(xref.trailer.get('ID')[0]);
       } else {
-        hash = calculateMD5(this.stream.bytes.subarray(0, 100), 0, 100);
+        if (this.stream.ensureRange) {
+          this.stream.ensureRange(0,
+            Math.min(FINGERPRINT_FIRST_BYTES, this.stream.end));
+        }
+        hash = calculateMD5(this.stream.bytes.subarray(0,
+          FINGERPRINT_FIRST_BYTES), 0, FINGERPRINT_FIRST_BYTES);
       }
 
       for (var i = 0, n = hash.length; i < n; i++) {
-        fileID += hash[i].toString(16);
+        var hex = hash[i].toString(16);
+        fileID += hex.length === 1 ? '0' + hex : hex;
       }
 
       return shadow(this, 'fingerprint', fileID);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list