[Pkg-javascript-commits] [pdf.js] 269/414: Convert `uniXXXX` glyph names to proper ones when building the `charCodeToGlyphId` map for TrueType fonts (bug 1132849, issue 6893, issue 6894)

David Prévot taffit at moszumanska.debian.org
Tue Jun 28 17:12:29 UTC 2016


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit dfe9015a43b82595655419ef496e27263cef5da8
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date:   Mon Mar 7 20:56:15 2016 +0100

    Convert `uniXXXX` glyph names to proper ones when building the `charCodeToGlyphId` map for TrueType fonts (bug 1132849, issue 6893, issue 6894)
    
    This patch adds a `getUnicodeForGlyph` helper function, which is used to recover Unicode values for non-standard glyph names.
    
    Some PDF generators, e.g. Scribus PDF, use improper `uniXXXX` glyph names which breaks the glyph mapping. We can avoid this by converting them to "standard" glyph names instead.
    
    Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1132849.
    Fixes 6893.
    Fixes 6894.
---
 src/core/fonts.js        |  59 +++++++++++++++++++++++++++++++++++++++--------
 src/core/unicode.js      |  31 +++++++++++++++++++++++++
 test/pdfs/.gitignore     |   2 ++
 test/pdfs/bug1132849.pdf | Bin 0 -> 72583 bytes
 test/pdfs/issue6894.pdf  | Bin 0 -> 1372448 bytes
 test/test_manifest.json  |  14 +++++++++++
 6 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/src/core/fonts.js b/src/core/fonts.js
index 78ff4b1..5944325 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack =
   coreStandardFonts.getSupplementalGlyphMapForArialBlack;
 var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
 var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
+var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
 
 // Unicode Private Use Area
 var PRIVATE_USE_OFFSET_START = 0xE000;
@@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([
  */
 var Font = (function FontClosure() {
   function Font(name, file, properties) {
-    var charCode, glyphName, fontChar;
+    var charCode, glyphName, unicode, fontChar;
 
     this.name = name;
     this.loadedName = properties.loadedName;
@@ -609,21 +610,25 @@ var Font = (function FontClosure() {
           this.toFontChar[charCode] = fontChar;
         }
       } else if (isStandardFont) {
-        this.toFontChar = [];
         glyphsUnicodeMap = getGlyphsUnicode();
         for (charCode in properties.defaultEncoding) {
           glyphName = (properties.differences[charCode] ||
                        properties.defaultEncoding[charCode]);
-          this.toFontChar[charCode] = glyphsUnicodeMap[glyphName];
+          unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
+          if (unicode !== -1) {
+            this.toFontChar[charCode] = unicode;
+          }
         }
       } else {
-        var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
         glyphsUnicodeMap = getGlyphsUnicode();
         this.toUnicode.forEach(function(charCode, unicodeCharCode) {
-          if (notCidFont) {
+          if (!this.composite) {
             glyphName = (properties.differences[charCode] ||
                          properties.defaultEncoding[charCode]);
-            unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode);
+            unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
+            if (unicode !== -1) {
+              unicodeCharCode = unicode;
+            }
           }
           this.toFontChar[charCode] = unicodeCharCode;
         }.bind(this));
@@ -722,7 +727,7 @@ var Font = (function FontClosure() {
   function int16(b0, b1) {
     return (b0 << 8) + b1;
   }
- 
+
   function signedInt16(b0, b1) {
     var value = (b0 << 8) + b1;
     return value & (1 << 15) ? value - 0x10000 : value;
@@ -2283,6 +2288,26 @@ var Font = (function FontClosure() {
         return false;
       }
 
+      // Some bad PDF generators, e.g. Scribus PDF, include glyph names
+      // in a 'uniXXXX' format -- attempting to recover proper ones.
+      function recoverGlyphName(name, glyphsUnicodeMap) {
+        if (glyphsUnicodeMap[name] !== undefined) {
+          return name;
+        }
+        // The glyph name is non-standard, trying to recover.
+        var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap);
+        if (unicode !== -1) {
+          for (var key in glyphsUnicodeMap) {
+            if (glyphsUnicodeMap[key] === unicode) {
+              return key;
+            }
+          }
+        }
+        warn('Unable to recover a standard glyph name for: ' + name);
+        return name;
+      }
+
+
       if (properties.type === 'CIDFontType2') {
         var cidToGidMap = properties.cidToGidMap || [];
         var isCidToGidMapEmpty = cidToGidMap.length === 0;
@@ -2337,7 +2362,7 @@ var Font = (function FontClosure() {
           }
           var glyphsUnicodeMap = getGlyphsUnicode();
           for (charCode = 0; charCode < 256; charCode++) {
-            var glyphName;
+            var glyphName, standardGlyphName;
             if (this.differences && charCode in this.differences) {
               glyphName = this.differences[charCode];
             } else if (charCode in baseEncoding &&
@@ -2349,13 +2374,16 @@ var Font = (function FontClosure() {
             if (!glyphName) {
               continue;
             }
+            // Ensure that non-standard glyph names are resolved to valid ones.
+            standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap);
+
             var unicodeOrCharCode, isUnicode = false;
             if (cmapPlatformId === 3 && cmapEncodingId === 1) {
-              unicodeOrCharCode = glyphsUnicodeMap[glyphName];
+              unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName];
               isUnicode = true;
             } else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
               // TODO: the encoding needs to be updated with mac os table.
-              unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName);
+              unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
             }
 
             var found = false;
@@ -2373,6 +2401,11 @@ var Font = (function FontClosure() {
             if (!found && properties.glyphNames) {
               // Try to map using the post table.
               var glyphId = properties.glyphNames.indexOf(glyphName);
+              // The post table ought to use the same kind of glyph names as the
+              // `differences` array, but check the standard ones as a fallback.
+              if (glyphId === -1 && standardGlyphName !== glyphName) {
+                glyphId = properties.glyphNames.indexOf(standardGlyphName);
+              }
               if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
                 charCodeToGlyphId[charCode] = glyphId;
                 found = true;
@@ -2686,6 +2719,12 @@ var Font = (function FontClosure() {
                   code = +glyphName.substr(1);
                 }
                 break;
+              default:
+                // 'uniXXXX'/'uXXXX{XX}' glyphs
+                var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
+                if (unicode !== -1) {
+                  code = unicode;
+                }
             }
             if (code) {
               // If |baseEncodingName| is one the predefined encodings,
diff --git a/src/core/unicode.js b/src/core/unicode.js
index dca93ba..dcfe925 100644
--- a/src/core/unicode.js
+++ b/src/core/unicode.js
@@ -65,6 +65,36 @@
     return code;
   }
 
+  function getUnicodeForGlyph(name, glyphsUnicodeMap) {
+    var unicode = glyphsUnicodeMap[name];
+    if (unicode !== undefined) {
+      return unicode;
+    }
+    if (!name) {
+      return -1;
+    }
+    // Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs.
+    if (name[0] === 'u') {
+      var nameLen = name.length, hexStr;
+
+      if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX'
+        hexStr = name.substr(3);
+      } else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}'
+        hexStr = name.substr(1);
+      } else {
+        return -1;
+      }
+      // Check for upper-case hexadecimal characters, to avoid false positives.
+      if (hexStr === hexStr.toUpperCase()) {
+        unicode = parseInt(hexStr, 16);
+        if (unicode >= 0) {
+          return unicode;
+        }
+      }
+    }
+    return -1;
+  }
+
   var UnicodeRanges = [
     { 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
     { 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
@@ -1612,4 +1642,5 @@
   exports.reverseIfRtl = reverseIfRtl;
   exports.getUnicodeRangeFor = getUnicodeRangeFor;
   exports.getNormalizedUnicodes = getNormalizedUnicodes;
+  exports.getUnicodeForGlyph = getUnicodeForGlyph;
 }));
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index 5fad85a..8806c03 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -32,6 +32,8 @@
 !bug1200096.pdf
 !issue5564_reduced.pdf
 !canvas.pdf
+!bug1132849.pdf
+!issue6894.pdf
 !issue5804.pdf
 !ShowText-ShadingPattern.pdf
 !complex_ttf_font.pdf
diff --git a/test/pdfs/bug1132849.pdf b/test/pdfs/bug1132849.pdf
new file mode 100644
index 0000000..1754b50
Binary files /dev/null and b/test/pdfs/bug1132849.pdf differ
diff --git a/test/pdfs/issue6894.pdf b/test/pdfs/issue6894.pdf
new file mode 100644
index 0000000..7220f7c
Binary files /dev/null and b/test/pdfs/issue6894.pdf differ
diff --git a/test/test_manifest.json b/test/test_manifest.json
index 51f03f3..ae7532c 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -728,6 +728,20 @@
        "rounds": 1,
        "type": "eq"
     },
+    {  "id": "bug1132849",
+       "file": "pdfs/bug1132849.pdf",
+       "md5": "aedfbead1f8feb35cf2e38b279133b47",
+       "rounds": 1,
+       "link": false,
+       "type": "eq"
+    },
+    {  "id": "issue6894",
+       "file": "pdfs/issue6894.pdf",
+       "md5": "bb84f2025c11f23cf436170049f81215",
+       "rounds": 1,
+       "link": false,
+       "type": "eq"
+    },
     {  "id": "personwithdog",
        "file": "pdfs/personwithdog.pdf",
        "md5": "cd68fb2ce00dab97801b3e51495b99e3",

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list