[Pkg-javascript-commits] [pdf.js] 47/141: Treat fonts with the same font descriptor, encoding and unicode map as aliases

David Prévot taffit at moszumanska.debian.org
Sat Apr 19 22:40:29 UTC 2014


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 79f34b183c8f9872133d1decf0a74c12cbe56a9e
Author: Christian Krebs <chrisk at opera.com>
Date:   Mon Mar 3 18:44:45 2014 +0100

    Treat fonts with the same font descriptor, encoding and unicode map as aliases
    
    Different fonts can point to the same font descriptor
    (see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
    commit such fonts are treated as aliases if they have also the same encoding
    and the same toUnicode map. The according info is stored on the font descriptor.
    This change must also ensure that aliases use always the same font name
    because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.
---
 make.js                 |   3 +-
 src/core/evaluator.js   |  94 ++++++++++++++++++++++++++++---
 src/core/fonts.js       |   7 +++
 src/core/murmurhash3.js | 146 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/core/obj.js         |   4 ++
 src/worker_loader.js    |   1 +
 6 files changed, 245 insertions(+), 10 deletions(-)

diff --git a/make.js b/make.js
index 62b71ca..6ba805b 100644
--- a/make.js
+++ b/make.js
@@ -349,7 +349,8 @@ target.bundle = function(args) {
     'core/jpx.js',
     'core/jbig2.js',
     'core/bidi.js',
-    'core/cmap.js'
+    'core/cmap.js',
+    'core/murmurhash3.js'
   ];
 
   if (!defines.SINGLE_FILE) {
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index d8ce942..d4ee454 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -17,11 +17,11 @@
 /* globals assert, assertWellFormed, ColorSpace, DecodeStream, Dict, Encodings,
            error, ErrorFont, Font, FONT_IDENTITY_MATRIX, fontCharsToUnicode,
            FontFlags, ImageKind, info, isArray, isCmd, isDict, isEOF, isName,
-           isNum, isStream, isString, JpegStream, Lexer, Metrics, Name, Parser,
-           Pattern, PDFImage, PDFJS, serifFonts, stdFontMap, symbolsFonts,
-           getTilingPatternIR, warn, Util, Promise, LegacyPromise,
-           RefSetCache, isRef, TextRenderingMode, CMapFactory, OPS,
-           UNSUPPORTED_FEATURES, UnsupportedManager */
+           isNum, isStream, isString, JpegStream, Lexer, Metrics,
+           MurmurHash3_64, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts,
+           stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise,
+           LegacyPromise, RefSetCache, isRef, TextRenderingMode, CMapFactory,
+           OPS, UNSUPPORTED_FEATURES, UnsupportedManager */
 
 'use strict';
 
@@ -413,6 +413,36 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       if (!isDict(font)) {
         return errorFont();
       }
+
+      var preEvaluatedFont = this.preEvaluateFont(font, xref);
+      var descriptor = preEvaluatedFont.descriptor;
+      var fontID = fontRef.num + '_' + fontRef.gen;
+      if (isDict(descriptor)) {
+        if (!descriptor.fontAliases) {
+          descriptor.fontAliases = Object.create(null);
+        }
+
+        var fontAliases = descriptor.fontAliases;
+        var hash = preEvaluatedFont.hash;
+        if (fontAliases[hash]) {
+          var aliasFontRef = fontAliases[hash].aliasRef;
+          if (aliasFontRef && this.fontCache.has(aliasFontRef)) {
+            this.fontCache.putAlias(fontRef, aliasFontRef);
+            var cachedFont = this.fontCache.get(fontRef);
+            return cachedFont;
+          }
+        }
+
+        if (!fontAliases[hash]) {
+          fontAliases[hash] = {
+            fontID: Font.getFontID()
+          };
+        }
+
+        fontAliases[hash].aliasRef = fontRef;
+        fontID = fontAliases[hash].fontID;
+      }
+
       // Workaround for bad PDF generators that don't reference fonts
       // properly, i.e. by not using an object identifier.
       // Check if the fontRef is a Dict (as opposed to a standard object),
@@ -426,12 +456,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       // Keep track of each font we translated so the caller can
       // load them asynchronously before calling display on a page.
       font.loadedName = 'g_font_' + (fontRefIsDict ?
-        fontName.replace(/\W/g, '') : (fontRef.num + '_' + fontRef.gen));
+        fontName.replace(/\W/g, '') : fontID);
 
       if (!font.translated) {
         var translated;
         try {
-          translated = this.translateFont(font, xref);
+          translated = this.translateFont(preEvaluatedFont, xref);
         } catch (e) {
           UnsupportedManager.notify(UNSUPPORTED_FEATURES.font);
           translated = new ErrorFont(e instanceof Error ? e.message : e);
@@ -1127,7 +1157,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       return widths;
     },
 
-    translateFont: function PartialEvaluator_translateFont(dict, xref) {
+    preEvaluateFont: function PartialEvaluator_preEvaluateFont(dict, xref) {
       var baseDict = dict;
       var type = dict.get('Subtype');
       assertWellFormed(isName(type), 'invalid font Subtype');
@@ -1148,9 +1178,55 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         assertWellFormed(isName(type), 'invalid font Subtype');
         composite = true;
       }
-      var maxCharIndex = (composite ? 0xFFFF : 0xFF);
 
       var descriptor = dict.get('FontDescriptor');
+      if (descriptor) {
+        var hash = new MurmurHash3_64();
+        var encoding = baseDict.getRaw('Encoding');
+        if (isName(encoding)) {
+          hash.update(encoding.name);
+        } else if (isRef(encoding)) {
+          hash.update(encoding.num + '_' + encoding.gen);
+        }
+
+        var toUnicode = dict.get('ToUnicode') || baseDict.get('ToUnicode');
+        if (isStream(toUnicode)) {
+          var stream = toUnicode.str || toUnicode;
+          var uint8array = stream.buffer ?
+            new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength) :
+            new Uint8Array(stream.bytes.buffer,
+                           stream.start, stream.end - stream.start);
+          hash.update(uint8array);
+
+        } else if (isName(toUnicode)) {
+          hash.update(toUnicode.name);
+        }
+
+        var widths = dict.get('Widths') || baseDict.get('Widths');
+        if (widths) {
+          var uint8array = new Uint8Array(new Uint32Array(widths).buffer);
+          hash.update(uint8array);
+        }
+      }
+
+      return {
+        descriptor: descriptor,
+        dict: dict,
+        baseDict: baseDict,
+        composite: composite,
+        hash: hash ? hash.hexdigest() : ''
+      };
+    },
+
+    translateFont: function PartialEvaluator_translateFont(preEvaluatedFont,
+                                                           xref) {
+      var baseDict = preEvaluatedFont.baseDict;
+      var dict = preEvaluatedFont.dict;
+      var composite = preEvaluatedFont.composite;
+      var descriptor = preEvaluatedFont.descriptor;
+      var type = dict.get('Subtype');
+      var maxCharIndex = (composite ? 0xFFFF : 0xFF);
+
       if (!descriptor) {
         if (type.name == 'Type3') {
           // FontDescriptor is only required for Type3 fonts when the document
diff --git a/src/core/fonts.js b/src/core/fonts.js
index e828073..9a76e88 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -2328,6 +2328,13 @@ var Font = (function FontClosure() {
     this.loading = true;
   }
 
+  Font.getFontID = (function () {
+    var ID = 1;
+    return function Font_getFontID() {
+      return String(ID++);
+    };
+  })();
+
   function int16(b0, b1) {
     return (b0 << 8) + b1;
   }
diff --git a/src/core/murmurhash3.js b/src/core/murmurhash3.js
new file mode 100644
index 0000000..c2e7163
--- /dev/null
+++ b/src/core/murmurhash3.js
@@ -0,0 +1,146 @@
+/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
+
+/* Copyright 2014 Opera Software ASA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ * Based on https://code.google.com/p/smhasher/wiki/MurmurHash3.
+ * Hashes roughly 100 KB per millisecond on i7 3.4 GHz.
+ */
+
+'use strict';
+
+var MurmurHash3_64 = (function MurmurHash3_64Closure (seed) {
+  // Workaround for missing math precison in JS.
+  var MASK_HIGH = 0xffff0000;
+  var MASK_LOW = 0xffff;
+
+  function MurmurHash3_64 (seed) {
+    var SEED = 0xc3d2e1f0;
+    this.h1 = seed ? seed & 0xffffffff : SEED;
+    this.h2 = seed ? seed & 0xffffffff : SEED;
+  }
+
+  MurmurHash3_64.prototype = {
+    update: function MurmurHash3_64_update(input) {
+      if (typeof input == 'string') {
+        var data = new Uint8Array(input.length * 2);
+        var length = 0;
+        for (var i = 0; i < input.length; i++) {
+          var code = input.charCodeAt(i);
+          if (code <= 0xff) {
+            data[length++] = code;
+          }
+          else {
+            data[length++] = code >>> 8;
+            data[length++] = code & 0xff;
+          }
+        }
+      } else {
+        if (!(input instanceof Uint8Array)) {
+          throw new Error('Wrong data format in MurmurHash3_64_update. ' +
+                          'Input must be a string or Uint8Array');
+        }
+        data = input;
+        length = data.length;
+      }
+
+      var blockCounts = length >> 2;
+      var tailLength = length - blockCounts * 4;
+      var dataUint32 = new Uint32Array(data.buffer, 0, blockCounts);
+      var k1 = 0;
+      var k2 = 0;
+      var h1 = this.h1;
+      var h2 = this.h2;
+      var C1 = 0xcc9e2d51;
+      var C2 = 0x1b873593;
+      var C1_LOW = C1 & MASK_LOW;
+      var C2_LOW = C2 & MASK_LOW;
+
+      for (var i = 0; i < blockCounts; i++) {
+        if (i & 1) {
+          k1 = dataUint32[i];
+          k1 = (k1 * C1 & MASK_HIGH) | (k1 * C1_LOW & MASK_LOW);
+          k1 = k1 << 15 | k1 >>> 17;
+          k1 = (k1 * C2 & MASK_HIGH) | (k1 * C2_LOW & MASK_LOW);
+          h1 ^= k1;
+          h1 = h1 << 13 | h1 >>> 19;
+          h1 = h1 * 5 + 0xe6546b64;
+        } else {
+          k2 = dataUint32[i];
+          k2 = (k2 * C1 & MASK_HIGH) | (k2 * C1_LOW & MASK_LOW);
+          k2 = k2 << 15 | k2 >>> 17;
+          k2 = (k2 * C2 & MASK_HIGH) | (k2 * C2_LOW & MASK_LOW);
+          h2 ^= k2;
+          h2 = h2 << 13 | h2 >>> 19;
+          h2 = h2 * 5 + 0xe6546b64;
+        }
+      }
+
+      k1 = 0;
+
+      switch (tailLength) {
+        case 3:
+          k1 ^= data[blockCounts * 4 + 2] << 16;
+          /* falls through */
+        case 2:
+          k1 ^= data[blockCounts * 4 + 1] << 8;
+          /* falls through */
+        case 1:
+          k1 ^= data[blockCounts * 4];
+          /* falls through */
+        k1 = (k1 * C1 & MASK_HIGH) | (k1 * C1_LOW & MASK_LOW);
+        k1 = k1 << 15 | k1 >>> 17;
+        k1 = (k1 * C2 & MASK_HIGH) | (k1 * C2_LOW & MASK_LOW);
+        if (blockCounts & 1) {
+          h1 ^= k1;
+        } else {
+          h2 ^= k1;
+        }
+      }
+
+      this.h1 = h1;
+      this.h2 = h2;
+      return this;
+    },
+
+    hexdigest: function MurmurHash3_64_hexdigest () {
+      var h1 = this.h1;
+      var h2 = this.h2;
+
+      h1 ^= h2 >>> 1;
+      h1 = (h1 * 0xed558ccd & MASK_HIGH) | (h1 * 0x8ccd & MASK_LOW);
+      h2 = (h2 * 0xff51afd7 & MASK_HIGH) |
+           (((h2 << 16 | h1 >>> 16) * 0xafd7ed55 & MASK_HIGH) >>> 16);
+      h1 ^= h2 >>> 1;
+      h1 = (h1 * 0x1a85ec53 & MASK_HIGH) | (h1 * 0xec53 & MASK_LOW);
+      h2 = (h2 * 0xc4ceb9fe & MASK_HIGH) |
+           (((h2 << 16 | h1 >>> 16) * 0xb9fe1a85 & MASK_HIGH) >>> 16);
+      h1 ^= h2 >>> 1;
+
+      for (var i = 0, arr = [h1, h2], str = ''; i < arr.length; i++) {
+        var hex = (arr[i] >>> 0).toString(16);
+        while (hex.length < 8) {
+          hex = '0' + hex;
+        }
+        str += hex;
+      }
+
+      return str;
+    }
+  };
+
+  return MurmurHash3_64;
+})();
diff --git a/src/core/obj.js b/src/core/obj.js
index ab4bb04..59c8dbf 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -271,6 +271,10 @@ var RefSetCache = (function RefSetCacheClosure() {
       this.dict['R' + ref.num + '.' + ref.gen] = obj;
     },
 
+    putAlias: function RefSetCache_putAlias(ref, aliasRef) {
+      this.dict['R' + ref.num + '.' + ref.gen] = this.get(aliasRef);
+    },
+
     forEach: function RefSetCache_forEach(fn, thisArg) {
       for (var i in this.dict) {
         fn.call(thisArg, this.dict[i]);
diff --git a/src/worker_loader.js b/src/worker_loader.js
index 3333c30..7ee5e9d 100644
--- a/src/worker_loader.js
+++ b/src/worker_loader.js
@@ -52,6 +52,7 @@ var otherFiles = [
   'core/jpx.js',
   'core/jbig2.js',
   'core/bidi.js',
+  'core/murmurhash3.js',
   '../external/jpgjs/jpg.js'
 ];
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list