[Pkg-javascript-commits] [pdf.js] 62/246: Represent cid chars using integers, not strings.

David Prévot taffit at moszumanska.debian.org
Sun Sep 7 15:36:25 UTC 2014


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit adf58ed6870302fd87a8a0fa25f24c0e4e0716e9
Author: Nicholas Nethercote <nnethercote at mozilla.com>
Date:   Thu Jul 31 23:46:37 2014 -0700

    Represent cid chars using integers, not strings.
    
    cid chars are 16-bit unsigned integers. Currently we convert them to
    single-char strings when inserting them into the CMap, and then convert
    them back to integers when extracting them from the CMap. This patch
    changes CMap so that cid chars stay in integer format throughout, saving
    both time and space.
    
    When loading the PDF from issue #4580, this change reduces peak RSS from
    ~600 to ~370 MiB. It also improves overall speed on that PDF by ~26%,
    going from 724 ms to 533 ms.
---
 src/core/cmap.js       | 54 ++++++++++++++++++++++++++++++--------------------
 src/core/fonts.js      | 15 +++++++-------
 test/unit/cmap_spec.js |  6 +++---
 3 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/src/core/cmap.js b/src/core/cmap.js
index 446b11e..4ffeb77 100644
--- a/src/core/cmap.js
+++ b/src/core/cmap.js
@@ -199,6 +199,10 @@ var CMap = (function CMapClosure() {
     // where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...]
     this.codespaceRanges = [[], [], [], []];
     this.numCodespaceRanges = 0;
+    // Map entries have one of two forms.
+    // - cid chars are 16-bit unsigned integers, stored as integers.
+    // - bf chars are variable-length byte sequences, stored as strings, with
+    //   one byte per character.
     this._map = [];
     this.vertical = false;
     this.useCMap = null;
@@ -210,18 +214,23 @@ var CMap = (function CMapClosure() {
       this.numCodespaceRanges++;
     },
 
-    mapRange: function(low, high, dstLow) {
+    mapCidRange: function(low, high, dstLow) {
+      while (low <= high) {
+        this._map[low++] = dstLow++;
+      }
+    },
+
+    mapBfRange: function(low, high, dstLow) {
       var lastByte = dstLow.length - 1;
       while (low <= high) {
-        this._map[low] = dstLow;
+        this._map[low++] = dstLow;
         // Only the last byte has to be incremented.
         dstLow = dstLow.substr(0, lastByte) +
                  String.fromCharCode(dstLow.charCodeAt(lastByte) + 1);
-        ++low;
       }
     },
 
-    mapRangeToArray: function(low, high, array) {
+    mapBfRangeToArray: function(low, high, array) {
       var i = 0, ii = array.length;
       while (low <= high && i < ii) {
         this._map[low] = array[i++];
@@ -229,6 +238,7 @@ var CMap = (function CMapClosure() {
       }
     },
 
+    // This is used for both bf and cid chars.
     mapOne: function(src, dst) {
       this._map[src] = dst;
     },
@@ -302,7 +312,7 @@ var IdentityCMap = (function IdentityCMapClosure() {
     CMap.call(this);
     this.vertical = vertical;
     this.addCodespaceRange(n, 0, 0xffff);
-    this.mapRange(0, 0xffff, '\u0000');
+    this.mapCidRange(0, 0xffff, 0);
   }
   Util.inherit(IdentityCMap, CMap, {});
 
@@ -522,7 +532,7 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
         case 2: // cidchar
           stream.readHex(char, dataSize);
           code = stream.readNumber();
-          cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code));
+          cMap.mapOne(hexToInt(char, dataSize), code);
           for (i = 1; i < subitemsCount; i++) {
             incHex(char, dataSize);
             if (!sequence) {
@@ -530,7 +540,7 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
               addHex(char, tmp, dataSize);
             }
             code = stream.readSigned() + (code + 1);
-            cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code));
+            cMap.mapOne(hexToInt(char, dataSize), code);
           }
           break;
         case 3: // cidrange
@@ -538,8 +548,8 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
           stream.readHexNumber(end, dataSize);
           addHex(end, start, dataSize);
           code = stream.readNumber();
-          cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
-                        String.fromCharCode(code));
+          cMap.mapCidRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
+                           code);
           for (i = 1; i < subitemsCount; i++) {
             incHex(end, dataSize);
             if (!sequence) {
@@ -551,8 +561,8 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
             stream.readHexNumber(end, dataSize);
             addHex(end, start, dataSize);
             code = stream.readNumber();
-            cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
-                          String.fromCharCode(code));
+            cMap.mapCidRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
+                             code);
           }
           break;
         case 4: // bfchar
@@ -578,9 +588,9 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
           stream.readHexNumber(end, ucs2DataSize);
           addHex(end, start, ucs2DataSize);
           stream.readHex(charCode, dataSize);
-          cMap.mapRange(hexToInt(start, ucs2DataSize),
-                        hexToInt(end, ucs2DataSize),
-                        hexToStr(charCode, dataSize));
+          cMap.mapBfRange(hexToInt(start, ucs2DataSize),
+                          hexToInt(end, ucs2DataSize),
+                          hexToStr(charCode, dataSize));
           for (i = 1; i < subitemsCount; i++) {
             incHex(end, ucs2DataSize);
             if (!sequence) {
@@ -592,9 +602,9 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
             stream.readHexNumber(end, ucs2DataSize);
             addHex(end, start, ucs2DataSize);
             stream.readHex(charCode, dataSize);
-            cMap.mapRange(hexToInt(start, ucs2DataSize),
-                          hexToInt(end, ucs2DataSize),
-                          hexToStr(charCode, dataSize));
+            cMap.mapBfRange(hexToInt(start, ucs2DataSize),
+                            hexToInt(end, ucs2DataSize),
+                            hexToStr(charCode, dataSize));
           }
           break;
         default:
@@ -675,7 +685,7 @@ var CMapFactory = (function CMapFactoryClosure() {
       obj = lexer.getObj();
       if (isInt(obj) || isString(obj)) {
         var dstLow = isInt(obj) ? String.fromCharCode(obj) : obj;
-        cMap.mapRange(low, high, dstLow);
+        cMap.mapBfRange(low, high, dstLow);
       } else if (isCmd(obj, '[')) {
         obj = lexer.getObj();
         var array = [];
@@ -683,7 +693,7 @@ var CMapFactory = (function CMapFactoryClosure() {
           array.push(obj);
           obj = lexer.getObj();
         }
-        cMap.mapRangeToArray(low, high, array);
+        cMap.mapBfRangeToArray(low, high, array);
       } else {
         break;
       }
@@ -704,7 +714,7 @@ var CMapFactory = (function CMapFactoryClosure() {
       var src = strToInt(obj);
       obj = lexer.getObj();
       expectInt(obj);
-      var dst = String.fromCharCode(obj);
+      var dst = obj;
       cMap.mapOne(src, dst);
     }
   }
@@ -725,8 +735,8 @@ var CMapFactory = (function CMapFactoryClosure() {
       var high = strToInt(obj);
       obj = lexer.getObj();
       expectInt(obj);
-      var dstLow = String.fromCharCode(obj);
-      cMap.mapRange(low, high, dstLow);
+      var dstLow = obj;
+      cMap.mapCidRange(low, high, dstLow);
     }
   }
 
diff --git a/src/core/fonts.js b/src/core/fonts.js
index 383465b..de69f5c 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -3899,8 +3899,7 @@ var Font = (function FontClosure() {
         var cidToGidMap = properties.cidToGidMap || [];
         var cidToGidMapLength = cidToGidMap.length;
         properties.cMap.forEach(function(charCode, cid) {
-          assert(cid.length === 1, 'Max size of CID is 65,535');
-          cid = cid.charCodeAt(0);
+          assert(cid <= 0xffff, 'Max size of CID is 65,535');
           var glyphId = -1;
           if (cidToGidMapLength === 0) {
             glyphId = charCode;
@@ -4370,10 +4369,10 @@ var Font = (function FontClosure() {
         var cMap = properties.cMap;
         toUnicode = [];
         cMap.forEach(function(charcode, cid) {
-          assert(cid.length === 1, 'Max size of CID is 65,535');
+          assert(cid <= 0xffff, 'Max size of CID is 65,535');
           // e) Map the CID obtained in step (a) according to the CMap obtained
           // in step (d), producing a Unicode value.
-          var ucs2 = ucs2CMap.lookup(cid.charCodeAt(0));
+          var ucs2 = ucs2CMap.lookup(cid);
           if (ucs2) {
             toUnicode[charcode] =
               String.fromCharCode((ucs2.charCodeAt(0) << 8) +
@@ -4415,7 +4414,7 @@ var Font = (function FontClosure() {
         var charcode = 0;
         if (this.composite) {
           if (this.cMap.contains(glyphUnicode)) {
-            charcode = this.cMap.lookup(glyphUnicode).charCodeAt(0);
+            charcode = this.cMap.lookup(glyphUnicode);
           }
         }
         // ... via toUnicode map
@@ -4444,7 +4443,7 @@ var Font = (function FontClosure() {
 
       var widthCode = charcode;
       if (this.cMap && this.cMap.contains(charcode)) {
-        widthCode = this.cMap.lookup(charcode).charCodeAt(0);
+        widthCode = this.cMap.lookup(charcode);
       }
       width = this.widths[widthCode];
       width = isNum(width) ? width : this.defaultWidth;
@@ -5626,8 +5625,8 @@ var CFFFont = (function CFFFontClosure() {
           // If the font is actually a CID font then we should use the charset
           // to map CIDs to GIDs.
           for (glyphId = 0; glyphId < charsets.length; glyphId++) {
-            var cidString = String.fromCharCode(charsets[glyphId]);
-            var charCode = properties.cMap.charCodeOf(cidString);
+            var cid = charsets[glyphId];
+            var charCode = properties.cMap.charCodeOf(cid);
             charCodeToGlyphId[charCode] = glyphId;
           }
         } else {
diff --git a/test/unit/cmap_spec.js b/test/unit/cmap_spec.js
index d690adb..2a4fb9c 100644
--- a/test/unit/cmap_spec.js
+++ b/test/unit/cmap_spec.js
@@ -44,7 +44,7 @@ describe('cmap', function() {
               'endcidchar\n';
     var stream = new StringStream(str);
     var cmap = CMapFactory.create(stream);
-    expect(cmap.lookup(0x14)).toEqual(String.fromCharCode(0x00));
+    expect(cmap.lookup(0x14)).toEqual(0x00);
     expect(cmap.lookup(0x15)).toBeUndefined();
   });
   it('parses begincidrange', function() {
@@ -54,8 +54,8 @@ describe('cmap', function() {
     var stream = new StringStream(str);
     var cmap = CMapFactory.create(stream);
     expect(cmap.lookup(0x15)).toBeUndefined();
-    expect(cmap.lookup(0x16)).toEqual(String.fromCharCode(0x00));
-    expect(cmap.lookup(0x1B)).toEqual(String.fromCharCode(0x05));
+    expect(cmap.lookup(0x16)).toEqual(0x00);
+    expect(cmap.lookup(0x1B)).toEqual(0x05);
     expect(cmap.lookup(0x1C)).toBeUndefined();
   });
   it('decodes codespace ranges', function() {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list