[Pkg-javascript-commits] [pdf.js] 89/139: Fix searching for end of inline (EI) images with ASCII85Decode filters (bug 1077808)

David Prévot taffit at moszumanska.debian.org
Fri Jan 9 21:18:31 UTC 2015


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 184880a751d0011f3df66b45d53bdd48e62aa696
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date:   Sun Oct 5 00:12:47 2014 +0200

    Fix searching for end of inline (EI) images with ASCII85Decode filters (bug 1077808)
    
    This patch changes searching for the end of inline image streams to rely on the EOD marker for the filters: ASCII85Decode and ASCIIHexDecode.
---
 src/core/parser.js            | 147 ++++++++++++++++++++++++++++++++----------
 test/pdfs/bug1077808.pdf.link |   1 +
 test/test_manifest.json       |   9 +++
 3 files changed, 124 insertions(+), 33 deletions(-)

diff --git a/src/core/parser.js b/src/core/parser.js
index b606850..212d1d0 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -126,31 +126,14 @@ var Parser = (function ParserClosure() {
       // simple object
       return buf1;
     },
-    makeInlineImage: function Parser_makeInlineImage(cipherTransform) {
-      var lexer = this.lexer;
-      var stream = lexer.stream;
-
-      // parse dictionary
-      var dict = new Dict(null);
-      while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
-        if (!isName(this.buf1)) {
-          error('Dictionary key must be a name object');
-        }
-
-        var key = this.buf1.name;
-        this.shift();
-        if (isEOF(this.buf1)) {
-          break;
-        }
-        dict.set(key, this.getObj(cipherTransform));
-      }
-
-      // parse image stream
-      var startPos = stream.pos;
-
-      // searching for the /EI\s/
-      var state = 0, ch, i, ii;
-      var E = 0x45, I = 0x49, SPACE = 0x20, NL = 0xA, CR = 0xD;
+    /**
+     * Find the end of the stream by searching for the /EI\s/.
+     * @returns {number} The inline stream length.
+     */
+    findDefaultInlineStreamEnd:
+        function Parser_findDefaultInlineStreamEnd(stream) {
+      var E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD;
+      var startPos = stream.pos, state = 0, ch, i, n, followingBytes;
       while ((ch = stream.getByte()) !== -1) {
         if (state === 0) {
           state = (ch === E) ? 1 : 0;
@@ -158,13 +141,13 @@ var Parser = (function ParserClosure() {
           state = (ch === I) ? 2 : 0;
         } else {
           assert(state === 2);
-          if (ch === SPACE || ch === NL || ch === CR) {
+          if (ch === SPACE || ch === LF || ch === CR) {
             // Let's check the next five bytes are ASCII... just be sure.
-            var n = 5;
-            var followingBytes = stream.peekBytes(n);
+            n = 5;
+            followingBytes = stream.peekBytes(n);
             for (i = 0; i < n; i++) {
               ch = followingBytes[i];
-              if (ch !== NL && ch !== CR && (ch < SPACE || ch > 0x7F)) {
+              if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7F)) {
                 // Not a LF, CR, SPACE or any visible ASCII character, i.e.
                 // it's binary stuff. Resetting the state.
                 state = 0;
@@ -172,18 +155,116 @@ var Parser = (function ParserClosure() {
               }
             }
             if (state === 2) {
-              break;  // finished!
+              break;  // Finished!
             }
           } else {
             state = 0;
           }
         }
       }
+      return ((stream.pos - 4) - startPos);
+    },
+    /**
+     * Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream.
+     * @returns {number} The inline stream length.
+     */
+    findASCII85DecodeInlineStreamEnd:
+        function Parser_findASCII85DecodeInlineStreamEnd(stream) {
+      var TILDE = 0x7E, GT = 0x3E;
+      var startPos = stream.pos, ch, length;
+      while ((ch = stream.getByte()) !== -1) {
+        if (ch === TILDE && stream.peekByte() === GT) {
+          stream.skip();
+          break;
+        }
+      }
+      length = stream.pos - startPos;
+      if (ch === -1) {
+        warn('Inline ASCII85Decode image stream: ' +
+             'EOD marker not found, searching for /EI/ instead.');
+        stream.skip(-length); // Reset the stream position.
+        return this.findDefaultInlineStreamEnd(stream);
+      }
+      this.inlineStreamSkipEI(stream);
+      return length;
+    },
+    /**
+     * Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream.
+     * @returns {number} The inline stream length.
+     */
+    findASCIIHexDecodeInlineStreamEnd:
+        function Parser_findASCIIHexDecodeInlineStreamEnd(stream) {
+      var GT = 0x3E;
+      var startPos = stream.pos, ch, length;
+      while ((ch = stream.getByte()) !== -1) {
+        if (ch === GT) {
+          break;
+        }
+      }
+      length = stream.pos - startPos;
+      if (ch === -1) {
+        warn('Inline ASCIIHexDecode image stream: ' +
+             'EOD marker not found, searching for /EI/ instead.');
+        stream.skip(-length); // Reset the stream position.
+        return this.findDefaultInlineStreamEnd(stream);
+      }
+      this.inlineStreamSkipEI(stream);
+      return length;
+    },
+    /**
+     * Skip over the /EI/ for streams where we search for an EOD marker.
+     */
+    inlineStreamSkipEI: function Parser_inlineStreamSkipEI(stream) {
+      var E = 0x45, I = 0x49;
+      var state = 0, ch;
+      while ((ch = stream.getByte()) !== -1) {
+        if (state === 0) {
+          state = (ch === E) ? 1 : 0;
+        } else if (state === 1) {
+          state = (ch === I) ? 2 : 0;
+        } else if (state === 2) {
+          break;
+        }
+      }
+    },
+    makeInlineImage: function Parser_makeInlineImage(cipherTransform) {
+      var lexer = this.lexer;
+      var stream = lexer.stream;
 
-      var length = (stream.pos - 4) - startPos;
+      // Parse dictionary.
+      var dict = new Dict(null);
+      while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
+        if (!isName(this.buf1)) {
+          error('Dictionary key must be a name object');
+        }
+        var key = this.buf1.name;
+        this.shift();
+        if (isEOF(this.buf1)) {
+          break;
+        }
+        dict.set(key, this.getObj(cipherTransform));
+      }
+
+      // Extract the name of the first (i.e. the current) image filter.
+      var filter = this.fetchIfRef(dict.get('Filter', 'F')), filterName;
+      if (isName(filter)) {
+        filterName = filter.name;
+      } else if (isArray(filter) && isName(filter[0])) {
+        filterName = filter[0].name;
+      }
+
+      // Parse image stream.
+      var startPos = stream.pos, length, i, ii;
+      if (filterName === 'ASCII85Decide' || filterName === 'A85') {
+        length = this.findASCII85DecodeInlineStreamEnd(stream);
+      } else if (filterName === 'ASCIIHexDecode' || filterName === 'AHx') {
+        length = this.findASCIIHexDecodeInlineStreamEnd(stream);
+      } else {
+        length = this.findDefaultInlineStreamEnd(stream);
+      }
       var imageStream = stream.makeSubStream(startPos, length, dict);
 
-      // cache all images below the MAX_LENGTH_TO_CACHE threshold by their
+      // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
       // adler32 checksum.
       var adler32;
       if (length < MAX_LENGTH_TO_CACHE) {
@@ -193,7 +274,7 @@ var Parser = (function ParserClosure() {
         var a = 1;
         var b = 0;
         for (i = 0, ii = imageBytes.length; i < ii; ++i) {
-          // no modulo required in the loop if imageBytes.length < 5552
+          // No modulo required in the loop if imageBytes.length < 5552.
           a += imageBytes[i] & 0xff;
           b += a;
         }
diff --git a/test/pdfs/bug1077808.pdf.link b/test/pdfs/bug1077808.pdf.link
new file mode 100644
index 0000000..68d1bde
--- /dev/null
+++ b/test/pdfs/bug1077808.pdf.link
@@ -0,0 +1 @@
+https://bug1077808.bugzilla.mozilla.org/attachment.cgi?id=8499998
diff --git a/test/test_manifest.json b/test/test_manifest.json
index 3347aaf..e19ee5a 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -1524,6 +1524,15 @@
       "type": "eq",
       "about": "Type3 font with negative HScale and font size"
     },
+    {  "id": "bug1077808",
+       "file": "pdfs/bug1077808.pdf",
+       "md5": "4a4bfc27e3fafe2f74e7a4a4cd04b8dc",
+       "rounds": 1,
+       "lastPage": 1,
+       "link": true,
+       "type": "eq",
+       "about": "Inline image with ASCII85Decode filter."
+    },
     {  "id": "bug1108753",
        "file": "pdfs/bug1108753.pdf",
        "md5": "a7aaf92d55b4602afb0ca3d75198b56b",

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list