[Pkg-javascript-commits] [pdf.js] 74/139: Refactor searching for the SOI marker of inline JPEG image streams

David Prévot taffit at moszumanska.debian.org
Fri Jan 9 21:18:28 UTC 2015


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 3e1b5216ac2722e6b834b1c5dc622ff2e12c8faa
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date:   Thu Nov 20 21:39:35 2014 +0100

    Refactor searching for the SOI marker of inline JPEG image streams
---
 src/core/parser.js | 16 ----------------
 src/core/stream.js | 11 +++++++++--
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/src/core/parser.js b/src/core/parser.js
index 73eb756..4af57fb 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -372,22 +372,6 @@ var Parser = (function ParserClosure() {
           return new LZWStream(stream, maybeLength, earlyChange);
         }
         if (name === 'DCTDecode' || name === 'DCT') {
-          // According to the specification: for inline images, the ID operator
-          // shall be followed by a single whitespace character (unless it uses
-          // ASCII85Decode or ASCIIHexDecode filters).
-          // In practice this only seems to be followed for inline JPEG images,
-          // and generally ignoring the first byte of the stream if it is a
-          // whitespace char can even *cause* issues (e.g. in the CCITTFaxDecode
-          // filters used in issue2984.pdf).
-          // Hence when the first byte of the stream of an inline JPEG image is
-          // a whitespace character, we thus simply skip over it.
-          if (isCmd(this.buf1, 'ID')) {
-            var firstByte = stream.peekByte();
-            if (firstByte === 0x0A /* LF */ || firstByte === 0x0D /* CR */ ||
-                firstByte === 0x20 /* SPACE */) {
-              stream.skip();
-            }
-          }
           xrefStreamStats[StreamType.DCT] = true;
           return new JpegStream(stream, maybeLength, stream.dict, this.xref);
         }
diff --git a/src/core/stream.js b/src/core/stream.js
index a842956..4145b41 100644
--- a/src/core/stream.js
+++ b/src/core/stream.js
@@ -857,8 +857,15 @@ var PredictorStream = (function PredictorStreamClosure() {
  */
 var JpegStream = (function JpegStreamClosure() {
   function JpegStream(stream, maybeLength, dict, xref) {
-    // TODO: per poppler, some images may have 'junk' before that
-    // need to be removed
+    // Some images may contain 'junk' before the SOI (start-of-image) marker.
+    // Note: this seems to mainly affect inline images.
+    var ch;
+    while ((ch = stream.getByte()) !== -1) {
+      if (ch === 0xFF) { // Find the first byte of the SOI marker (0xFFD8).
+        stream.skip(-1); // Reset the stream position to the SOI.
+        break;
+      }
+    }
     this.stream = stream;
     this.maybeLength = maybeLength;
     this.dict = dict;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list