[Pkg-javascript-commits] [pdf.js] 84/141: Heuristics to recognize the unknown glyphs for toUnicode

David Prévot taffit at moszumanska.debian.org
Sat Apr 19 22:40:32 UTC 2014


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 88c1747cc36c69bbce2459d9005573906b1f914e
Author: Yury Delendik <ydelendik at mozilla.com>
Date:   Wed Apr 9 11:17:50 2014 -0500

    Heuristics to recognize the unknown glyphs for toUnicode
---
 src/core/fonts.js | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/core/fonts.js b/src/core/fonts.js
index 4d21a50..ee927ab 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -4193,7 +4193,7 @@ var Font = (function FontClosure() {
         toUnicode: null
       };
       // Section 9.10.2 Mapping Character Codes to Unicode Values
-      if (properties.toUnicode) {
+      if (properties.toUnicode && properties.toUnicode.length !== 0) {
         map.toUnicode = properties.toUnicode;
         return map;
       }
@@ -4217,6 +4217,21 @@ var Font = (function FontClosure() {
           // b) Look up the character name in the Adobe Glyph List (see the
           //    Bibliography) to obtain the corresponding Unicode value.
           if (glyphName === '' || !(glyphName in GlyphsUnicode)) {
+            // (undocumented) c) Few heuristics to recognize unknown glyphs
+            // NOTE: Adobe Reader does not do this step, but OSX Preview does
+            var code;
+            // Gxx glyph
+            if (glyphName.length === 3 &&
+                glyphName[0] === 'G' &&
+                (code = parseInt(glyphName.substr(1), 16))) {
+              toUnicode[charcode] = String.fromCharCode(code);
+            }
+            // Cddd glyph
+            if (glyphName.length >= 3 &&
+                glyphName[0] === 'C' &&
+                (code = +glyphName.substr(1))) {
+              toUnicode[charcode] = String.fromCharCode(code);
+            }
             continue;
           }
           toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]);
@@ -5467,17 +5482,19 @@ var CFFFont = (function CFFFontClosure() {
     },
     getGlyphMapping: function CFFFont_getGlyphMapping() {
       var cff = this.cff;
+      var properties = this.properties;
       var charsets = cff.charset.charset;
-      var charCodeToGlyphId = Object.create(null);
+      var charCodeToGlyphId;
       var glyphId;
 
-      if (this.properties.composite) {
-        if (this.cff.isCIDFont) {
+      if (properties.composite) {
+        charCodeToGlyphId = Object.create(null);
+        if (cff.isCIDFont) {
           // If the font is actually a CID font then we should use the charset
           // to map CIDs to GIDs.
           for (glyphId = 0; glyphId < charsets.length; glyphId++) {
             var cidString = String.fromCharCode(charsets[glyphId]);
-            var charCode = this.properties.cMap.map.indexOf(cidString);
+            var charCode = properties.cMap.map.indexOf(cidString);
             charCodeToGlyphId[charCode] = glyphId;
           }
         } else {
@@ -5491,7 +5508,8 @@ var CFFFont = (function CFFFontClosure() {
       }
 
       var encoding = cff.encoding ? cff.encoding.encoding : null;
-      return type1FontGlyphMapping(this.properties, encoding, charsets);
+      charCodeToGlyphId = type1FontGlyphMapping(properties, encoding, charsets);
+      return charCodeToGlyphId;
     }
   };
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list