[Pkg-javascript-commits] [pdf.js] 39/109: Make `XRef_indexObjects` more robust against bad PDF files (issue 5752)

David Prévot taffit at moszumanska.debian.org
Fri Sep 25 03:04:15 UTC 2015


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 56a43a3181e4670c75ce977cea851a648e6c1fb5
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date:   Fri Aug 21 16:57:08 2015 +0200

    Make `XRef_indexObjects` more robust against bad PDF files (issue 5752)
    
    This patch improves the detection of `xref` in files where it is followed by an arbitrary whitespace character (not just a line-breaking char).
    It also adds a check for missing whitespace, e.g. `1 0 obj<<`, to speed up `readToken` for the PDF file in the referenced issue.
    Finally, the patch also replaces a bunch of magic numbers with suitably named constants.
    
    Fixes 5752.
    
    Also improves 6243, but there are still issues.
---
 src/core/obj.js              | 17 ++++++++++++-----
 test/pdfs/issue5752.pdf.link |  1 +
 test/test_manifest.json      |  9 +++++++++
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/core/obj.js b/src/core/obj.js
index a8bd5d3..161e8ca 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -1013,9 +1013,12 @@ var XRef = (function XRefClosure() {
     indexObjects: function XRef_indexObjects() {
       // Simple scan through the PDF content to find objects,
       // trailers and XRef streams.
+      var TAB = 0x9, LF = 0xA, CR = 0xD, SPACE = 0x20;
+      var PERCENT = 0x25, LT = 0x3C;
+
       function readToken(data, offset) {
         var token = '', ch = data[offset];
-        while (ch !== 13 && ch !== 10) {
+        while (ch !== LF && ch !== CR && ch !== LT) {
           if (++offset >= data.length) {
             break;
           }
@@ -1047,6 +1050,9 @@ var XRef = (function XRefClosure() {
       var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
       var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
 
+      // Clear out any existing entries, since they may be bogus.
+      this.entries.length = 0;
+
       var stream = this.stream;
       stream.pos = 0;
       var buffer = stream.getBytes();
@@ -1054,23 +1060,24 @@ var XRef = (function XRefClosure() {
       var trailers = [], xrefStms = [];
       while (position < length) {
         var ch = buffer[position];
-        if (ch === 32 || ch === 9 || ch === 13 || ch === 10) {
+        if (ch === TAB || ch === LF || ch === CR || ch === SPACE) {
           ++position;
           continue;
         }
-        if (ch === 37) { // %-comment
+        if (ch === PERCENT) { // %-comment
           do {
             ++position;
             if (position >= length) {
               break;
             }
             ch = buffer[position];
-          } while (ch !== 13 && ch !== 10);
+          } while (ch !== LF && ch !== CR);
           continue;
         }
         var token = readToken(buffer, position);
         var m;
-        if (token === 'xref') {
+        if (token.indexOf('xref') === 0 &&
+            (token.length === 4 || /\s/.test(token[4]))) {
           position += skipUntil(buffer, position, trailerBytes);
           trailers.push(position);
           position += skipUntil(buffer, position, startxrefBytes);
diff --git a/test/pdfs/issue5752.pdf.link b/test/pdfs/issue5752.pdf.link
new file mode 100644
index 0000000..0af8c7b
--- /dev/null
+++ b/test/pdfs/issue5752.pdf.link
@@ -0,0 +1 @@
+http://web.archive.org/web/20150821144004/http://222.247.54.152/Fulltext/qkyxlcyjy200504007.pdf
diff --git a/test/test_manifest.json b/test/test_manifest.json
index 9b6a9e8..220074c 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -1071,6 +1071,15 @@
        "lastPage": 1,
        "type": "eq"
     },
+    {  "id": "issue5752",
+       "file": "pdfs/issue5752.pdf",
+       "md5": "aa20ad7cff71e9481c0cd623ddbff3b7",
+       "rounds": 1,
+       "link": true,
+       "firstPage": 1,
+       "lastPage": 1,
+       "type": "eq"
+    },
     {  "id": "issue2931",
        "file": "pdfs/issue2931.pdf",
        "md5": "ea40940eaf3541b312bda9329167da11",

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list