[Pkg-javascript-commits] [pdf.js] 74/157: Improve robustness of stream parser (invalid length)

David Prévot taffit at moszumanska.debian.org
Tue Aug 11 06:46:38 UTC 2015


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit e211c25f069f1eba005a9c2f37c5eea13107c256
Author: Rob Wu <rob at robwu.nl>
Date:   Sat Jul 11 12:15:43 2015 +0200

    Improve robustness of stream parser (invalid length)
    
    When the parser finds a stream, it retrieves the Length from the stream
    dictionary and advances the lexer to the offset as specified in Length.
    If this Length is incorrect, the lexer could end up anywhere.
    
    When the lexer gets in an invalid state, it could throw errors. For
    example, in issue 6108, the lexer ends up inside the stream data. This
    stream has the ASCIIHexDecode filter, so all characters are made up from
    ASCII characters, and the lexer interprets it as a command token. Tokens
    cannot be longer than 127 bytes, so eventually 128 bytes are consumed
    and the lexer throws "Command token too long" error.
    
    Another possible error is "Illegal character: 41" when the lexer happens
    to end up at a ')' due to the length mismatch.
    
    These problems are solved by catching lexer errors and recovering the
    parser via the existing stream length detection branch.
---
 src/core/parser.js      | 17 ++++++++++++++---
 test/pdfs/.gitignore    |  1 +
 test/pdfs/issue6108.pdf | 28 ++++++++++++++++++++++++++++
 test/test_manifest.json |  6 ++++++
 4 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/src/core/parser.js b/src/core/parser.js
index 3fc9034..6c98b08 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -53,6 +53,16 @@ var Parser = (function ParserClosure() {
         this.buf2 = this.lexer.getObj();
       }
     },
+    tryShift: function Parser_tryShift() {
+      try {
+        this.shift();
+        return true;
+      } catch (e) {
+        // Upon failure, the caller should reset this.lexer.pos to a known good
+        // state and call this.shift() twice to reset the buffers.
+        return false;
+      }
+    },
     getObj: function Parser_getObj(cipherTransform) {
       var buf1 = this.buf1;
       this.shift();
@@ -426,9 +436,10 @@ var Parser = (function ParserClosure() {
       stream.pos = pos + length;
       lexer.nextChar();
 
-      this.shift(); // '>>'
-      this.shift(); // 'stream'
-      if (!isCmd(this.buf1, 'endstream')) {
+      // Shift '>>' and check whether the new object marks the end of the stream
+      if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
+        this.shift(); // 'stream'
+      } else {
         // bad stream length, scanning for endstream
         stream.pos = pos;
         var SCAN_BLOCK_SIZE = 2048;
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index d428da4..a79dc29 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -146,3 +146,4 @@
 !issue6068.pdf
 !issue6081.pdf
 !issue6069.pdf
+!issue6108.pdf
diff --git a/test/pdfs/issue6108.pdf b/test/pdfs/issue6108.pdf
new file mode 100644
index 0000000..ccf374c
--- /dev/null
+++ b/test/pdfs/issue6108.pdf
@@ -0,0 +1,28 @@
+%PDF-1.0
+1 0 obj
+<</Type/Catalog/Pages 2 0 R>>
+endobj
+2 0 obj
+<</Type/Pages/Count 1/Kids[3 0 R]/MediaBox [0 0 400 50]>>
+endobj
+3 0 obj
+<</Type/Page/Parent 2 0 R/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Arial>>>>>>/Contents 4 0 R>>
+endobj
+4 0 obj
+<</Length 9/Filter/ASCIIHexDecode>>
+stream
+42542F4631203132205466203230203230205464202841534349494865784465636F64652066696C7465722C20736D616C6C204C656E6774682C2073697A652064696666206973206D6F7265207468616E203133352062797465732920546A204554>
+endstream
+endobj
+xref
+0 5
+0000000000 65535 f 
+0000000009 00000 n 
+0000000054 00000 n 
+0000000127 00000 n 
+0000000254 00000 n 
+trailer
+<</Root 1 0 R/Size 5>>
+startxref
+519
+%%EOF
diff --git a/test/test_manifest.json b/test/test_manifest.json
index c303e9f..43d01e4 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -2284,5 +2284,11 @@
        "md5": "d0ad8871f4116bca8e39513ffa8b7d8e",
        "rounds": 1,
        "type": "load"
+    },
+    {  "id": "issue6108",
+       "file": "pdfs/issue6108.pdf",
+       "md5": "8961cb55149495989a80bf0487e0f076",
+       "rounds": 1,
+       "type": "load"
     }
 ]

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list