[Pkg-javascript-commits] [pdf.js] 316/414: Parse Type1 font files to determine the various `Length{n}` properties, instead of trusting the PDF file (issue 5686, issue 3928)

David Prévot taffit at moszumanska.debian.org
Tue Jun 28 17:12:35 UTC 2016


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 05cf709f8ef30fbb939f1c1cced3093688988c87
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date:   Sat Mar 5 22:32:54 2016 +0100

    Parse Type1 font files to determine the various `Length{n}` properties, instead of trusting the PDF file (issue 5686, issue 3928)
    
    Fixes 5686.
    Fixes 3928.
---
 src/core/evaluator.js   |   2 +
 src/core/fonts.js       | 125 ++++++++++++++++++++++++++++++++++++++++++++++--
 test/pdfs/.gitignore    |   2 +
 test/pdfs/issue3928.pdf | Bin 0 -> 31272 bytes
 test/pdfs/issue5686.pdf | Bin 0 -> 23718 bytes
 test/test_manifest.json |  16 +++++++
 6 files changed, 141 insertions(+), 4 deletions(-)

diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index d080ac6..aa15c5f 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -2197,6 +2197,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
           }
           var length1 = fontFile.dict.get('Length1');
           var length2 = fontFile.dict.get('Length2');
+          var length3 = fontFile.dict.get('Length3');
         }
       }
 
@@ -2207,6 +2208,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         file: fontFile,
         length1: length1,
         length2: length2,
+        length3: length3,
         loadedName: baseDict.loadedName,
         composite: composite,
         wideChars: composite,
diff --git a/src/core/fonts.js b/src/core/fonts.js
index 7fb4060..4fb0269 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -54,6 +54,7 @@ var shadow = sharedUtil.shadow;
 var stringToBytes = sharedUtil.stringToBytes;
 var string32 = sharedUtil.string32;
 var warn = sharedUtil.warn;
+var MissingDataException = sharedUtil.MissingDataException;
 var Stream = coreStream.Stream;
 var Lexer = coreParser.Lexer;
 var getGlyphsUnicode = coreGlyphList.getGlyphsUnicode;
@@ -3638,6 +3639,120 @@ var CFFStandardStrings = [
 
 // Type1Font is also a CIDFontType0.
 var Type1Font = (function Type1FontClosure() {
+  function findBlock(streamBytes, signature, startIndex) {
+    var streamBytesLength = streamBytes.length;
+    var signatureLength = signature.length;
+    var scanLength = streamBytesLength - signatureLength;
+
+    var i = startIndex, j, found = false;
+    while (i < scanLength) {
+      j = 0;
+      while (j < signatureLength && streamBytes[i + j] === signature[j]) {
+        j++;
+      }
+      if (j >= signatureLength) { // `signature` found, skip over whitespace.
+        i += j;
+        while (i < streamBytesLength && Lexer.isSpace(streamBytes[i])) {
+          i++;
+        }
+        found = true;
+        break;
+      }
+      i++;
+    }
+    return {
+      found: found,
+      length: i,
+    };
+  }
+
+  function getHeaderBlock(stream, suggestedLength) {
+    var EEXEC_SIGNATURE = [0x65, 0x65, 0x78, 0x65, 0x63];
+
+    var streamStartPos = stream.pos; // Save the initial stream position.
+    var headerBytes, headerBytesLength, block;
+    try {
+      headerBytes = stream.getBytes(suggestedLength);
+      headerBytesLength = headerBytes.length;
+    } catch (ex) {
+      if (ex instanceof MissingDataException) {
+        throw ex;
+      }
+      // Ignore errors if the `suggestedLength` is huge enough that a Uint8Array
+      // cannot hold the result of `getBytes`, and fallback to simply checking
+      // the entire stream (fixes issue3928.pdf).
+    }
+
+    if (headerBytesLength === suggestedLength) {
+      // Most of the time `suggestedLength` is correct, so to speed things up we
+      // initially only check the last few bytes to see if the header was found.
+      // Otherwise we (potentially) check the entire stream to prevent errors in
+      // `Type1Parser` (fixes issue5686.pdf).
+      block = findBlock(headerBytes, EEXEC_SIGNATURE,
+                        suggestedLength - 2 * EEXEC_SIGNATURE.length);
+
+      if (block.found && block.length === suggestedLength) {
+        return {
+          stream: new Stream(headerBytes),
+          length: suggestedLength,
+        };
+      }
+    }
+    warn('Invalid "Length1" property in Type1 font -- trying to recover.');
+    stream.pos = streamStartPos; // Reset the stream position.
+
+    var SCAN_BLOCK_LENGTH = 2048;
+    var actualLength;
+    while (true) {
+      var scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
+      block = findBlock(scanBytes, EEXEC_SIGNATURE, 0);
+
+      if (block.length === 0) {
+        break;
+      }
+      stream.pos += block.length; // Update the stream position.
+
+      if (block.found) {
+        actualLength = stream.pos - streamStartPos;
+        break;
+      }
+    }
+    stream.pos = streamStartPos; // Reset the stream position.
+
+    if (actualLength) {
+      return {
+        stream: new Stream(stream.getBytes(actualLength)),
+        length: actualLength,
+      };
+    }
+    warn('Unable to recover "Length1" property in Type1 font -- using as is.');
+    return {
+      stream: new Stream(stream.getBytes(suggestedLength)),
+      length: suggestedLength,
+    };
+  }
+
+  function getEexecBlock(stream, suggestedLength) {
+    // We should ideally parse the eexec block to ensure that `suggestedLength`
+    // is correct, so we don't truncate the block data if it's too small.
+    // However, this would also require checking if the fixed-content portion
+    // exists (using the 'Length3' property), and ensuring that it's valid.
+    //
+    // Given that `suggestedLength` almost always is correct, all the validation
+    // would require a great deal of unnecessary parsing for most fonts.
+    // To save time, we always fetch the entire stream instead, which also avoid
+    // issues if `suggestedLength` is huge (see comment in `getHeaderBlock`).
+    //
+    // NOTE: This means that the function can include the fixed-content portion
+    // in the returned eexec block. In practice this does *not* seem to matter,
+    // since `Type1Parser_extractFontProgram` will skip over any non-commands.
+    var eexecBytes = stream.getBytes();
+    return {
+      stream: new Stream(eexecBytes),
+      length: eexecBytes.length,
+    };
+  }
+
   function Type1Font(name, file, properties) {
     // Some bad generators embed pfb file as is, we have to strip 6-byte header.
     // Also, length1 and length2 might be off by 6 bytes as well.
@@ -3654,8 +3769,9 @@ var Type1Font = (function Type1FontClosure() {
     }
 
     // Get the data block containing glyphs and subrs informations
-    var headerBlock = new Stream(file.getBytes(headerBlockLength));
-    var headerBlockParser = new Type1Parser(headerBlock);
+    var headerBlock = getHeaderBlock(file, headerBlockLength);
+    headerBlockLength = headerBlock.length;
+    var headerBlockParser = new Type1Parser(headerBlock.stream);
     headerBlockParser.extractFontHeader(properties);
 
     if (pfbHeaderPresent) {
@@ -3665,8 +3781,9 @@ var Type1Font = (function Type1FontClosure() {
     }
 
     // Decrypt the data blocks and retrieve it's content
-    var eexecBlock = new Stream(file.getBytes(eexecBlockLength));
-    var eexecBlockParser = new Type1Parser(eexecBlock, true);
+    var eexecBlock = getEexecBlock(file, eexecBlockLength);
+    eexecBlockLength = eexecBlock.length;
+    var eexecBlockParser = new Type1Parser(eexecBlock.stream, true);
     var data = eexecBlockParser.extractFontProgram();
     for (var info in data.properties) {
       properties[info] = data.properties[info];
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index b675adf..9cdc808 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -49,6 +49,8 @@
 !issue3207r.pdf
 !issue3263r.pdf
 !issue3879r.pdf
+!issue5686.pdf
+!issue3928.pdf
 !close-path-bug.pdf
 !issue6019.pdf
 !issue6621.pdf
diff --git a/test/pdfs/issue3928.pdf b/test/pdfs/issue3928.pdf
new file mode 100644
index 0000000..f47c146
Binary files /dev/null and b/test/pdfs/issue3928.pdf differ
diff --git a/test/pdfs/issue5686.pdf b/test/pdfs/issue5686.pdf
new file mode 100644
index 0000000..6525d11
Binary files /dev/null and b/test/pdfs/issue5686.pdf differ
diff --git a/test/test_manifest.json b/test/test_manifest.json
index 6113fcd..c0efbe3 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -1443,6 +1443,22 @@
       "link": false,
       "type": "eq"
     },
+    {  "id": "issue5686",
+       "file": "pdfs/issue5686.pdf",
+       "md5": "78d16b9df07a355ad00d70504a9194f8",
+       "rounds": 1,
+       "link": false,
+       "type": "eq",
+       "about": "Type1 font where Length1/Length2 are slightly incorrect."
+    },
+    {  "id": "issue3928",
+       "file": "pdfs/issue3928.pdf",
+       "md5": "1963493f843e981cbe768b707ef7f08a",
+       "rounds": 1,
+       "link": false,
+       "type": "eq",
+       "about": "Type1 font where Length1/Length2 are several orders of magnitude too large."
+    },
     {  "id": "html5checker",
       "file": "pdfs/html5checker.pdf",
       "md5": "74bbd80d1e7eb5f2951582233ef9ebab",

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list