[Pkg-javascript-commits] [pdf.js] 269/414: Convert `uniXXXX` glyph names to proper ones when building the `charCodeToGlyphId` map for TrueType fonts (bug 1132849, issue 6893, issue 6894)
David Prévot
taffit at moszumanska.debian.org
Tue Jun 28 17:12:29 UTC 2016
This is an automated email from the git hooks/post-receive script.
taffit pushed a commit to branch master
in repository pdf.js.
commit dfe9015a43b82595655419ef496e27263cef5da8
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date: Mon Mar 7 20:56:15 2016 +0100
Convert `uniXXXX` glyph names to proper ones when building the `charCodeToGlyphId` map for TrueType fonts (bug 1132849, issue 6893, issue 6894)
This patch adds a `getUnicodeForGlyph` helper function, which is used to recover Unicode values for non-standard glyph names.
Some PDF generators, e.g. Scribus PDF, use improper `uniXXXX` glyph names which breaks the glyph mapping. We can avoid this by converting them to "standard" glyph names instead.
Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1132849.
Fixes 6893.
Fixes 6894.
---
src/core/fonts.js | 59 +++++++++++++++++++++++++++++++++++++++--------
src/core/unicode.js | 31 +++++++++++++++++++++++++
test/pdfs/.gitignore | 2 ++
test/pdfs/bug1132849.pdf | Bin 0 -> 72583 bytes
test/pdfs/issue6894.pdf | Bin 0 -> 1372448 bytes
test/test_manifest.json | 14 +++++++++++
6 files changed, 96 insertions(+), 10 deletions(-)
diff --git a/src/core/fonts.js b/src/core/fonts.js
index 78ff4b1..5944325 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack =
coreStandardFonts.getSupplementalGlyphMapForArialBlack;
var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
+var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
// Unicode Private Use Area
var PRIVATE_USE_OFFSET_START = 0xE000;
@@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([
*/
var Font = (function FontClosure() {
function Font(name, file, properties) {
- var charCode, glyphName, fontChar;
+ var charCode, glyphName, unicode, fontChar;
this.name = name;
this.loadedName = properties.loadedName;
@@ -609,21 +610,25 @@ var Font = (function FontClosure() {
this.toFontChar[charCode] = fontChar;
}
} else if (isStandardFont) {
- this.toFontChar = [];
glyphsUnicodeMap = getGlyphsUnicode();
for (charCode in properties.defaultEncoding) {
glyphName = (properties.differences[charCode] ||
properties.defaultEncoding[charCode]);
- this.toFontChar[charCode] = glyphsUnicodeMap[glyphName];
+ unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
+ if (unicode !== -1) {
+ this.toFontChar[charCode] = unicode;
+ }
}
} else {
- var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
glyphsUnicodeMap = getGlyphsUnicode();
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
- if (notCidFont) {
+ if (!this.composite) {
glyphName = (properties.differences[charCode] ||
properties.defaultEncoding[charCode]);
- unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode);
+ unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
+ if (unicode !== -1) {
+ unicodeCharCode = unicode;
+ }
}
this.toFontChar[charCode] = unicodeCharCode;
}.bind(this));
@@ -722,7 +727,7 @@ var Font = (function FontClosure() {
function int16(b0, b1) {
return (b0 << 8) + b1;
}
-
+
function signedInt16(b0, b1) {
var value = (b0 << 8) + b1;
return value & (1 << 15) ? value - 0x10000 : value;
@@ -2283,6 +2288,26 @@ var Font = (function FontClosure() {
return false;
}
+ // Some bad PDF generators, e.g. Scribus PDF, include glyph names
+ // in a 'uniXXXX' format -- attempting to recover proper ones.
+ function recoverGlyphName(name, glyphsUnicodeMap) {
+ if (glyphsUnicodeMap[name] !== undefined) {
+ return name;
+ }
+ // The glyph name is non-standard, trying to recover.
+ var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap);
+ if (unicode !== -1) {
+ for (var key in glyphsUnicodeMap) {
+ if (glyphsUnicodeMap[key] === unicode) {
+ return key;
+ }
+ }
+ }
+ warn('Unable to recover a standard glyph name for: ' + name);
+ return name;
+ }
+
+
if (properties.type === 'CIDFontType2') {
var cidToGidMap = properties.cidToGidMap || [];
var isCidToGidMapEmpty = cidToGidMap.length === 0;
@@ -2337,7 +2362,7 @@ var Font = (function FontClosure() {
}
var glyphsUnicodeMap = getGlyphsUnicode();
for (charCode = 0; charCode < 256; charCode++) {
- var glyphName;
+ var glyphName, standardGlyphName;
if (this.differences && charCode in this.differences) {
glyphName = this.differences[charCode];
} else if (charCode in baseEncoding &&
@@ -2349,13 +2374,16 @@ var Font = (function FontClosure() {
if (!glyphName) {
continue;
}
+ // Ensure that non-standard glyph names are resolved to valid ones.
+ standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap);
+
var unicodeOrCharCode, isUnicode = false;
if (cmapPlatformId === 3 && cmapEncodingId === 1) {
- unicodeOrCharCode = glyphsUnicodeMap[glyphName];
+ unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName];
isUnicode = true;
} else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
// TODO: the encoding needs to be updated with mac os table.
- unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName);
+ unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
}
var found = false;
@@ -2373,6 +2401,11 @@ var Font = (function FontClosure() {
if (!found && properties.glyphNames) {
// Try to map using the post table.
var glyphId = properties.glyphNames.indexOf(glyphName);
+ // The post table ought to use the same kind of glyph names as the
+ // `differences` array, but check the standard ones as a fallback.
+ if (glyphId === -1 && standardGlyphName !== glyphName) {
+ glyphId = properties.glyphNames.indexOf(standardGlyphName);
+ }
if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
charCodeToGlyphId[charCode] = glyphId;
found = true;
@@ -2686,6 +2719,12 @@ var Font = (function FontClosure() {
code = +glyphName.substr(1);
}
break;
+ default:
+ // 'uniXXXX'/'uXXXX{XX}' glyphs
+ var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
+ if (unicode !== -1) {
+ code = unicode;
+ }
}
if (code) {
// If |baseEncodingName| is one the predefined encodings,
diff --git a/src/core/unicode.js b/src/core/unicode.js
index dca93ba..dcfe925 100644
--- a/src/core/unicode.js
+++ b/src/core/unicode.js
@@ -65,6 +65,36 @@
return code;
}
+ function getUnicodeForGlyph(name, glyphsUnicodeMap) {
+ var unicode = glyphsUnicodeMap[name];
+ if (unicode !== undefined) {
+ return unicode;
+ }
+ if (!name) {
+ return -1;
+ }
+ // Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs.
+ if (name[0] === 'u') {
+ var nameLen = name.length, hexStr;
+
+ if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX'
+ hexStr = name.substr(3);
+ } else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}'
+ hexStr = name.substr(1);
+ } else {
+ return -1;
+ }
+ // Check for upper-case hexadecimal characters, to avoid false positives.
+ if (hexStr === hexStr.toUpperCase()) {
+ unicode = parseInt(hexStr, 16);
+ if (unicode >= 0) {
+ return unicode;
+ }
+ }
+ }
+ return -1;
+ }
+
var UnicodeRanges = [
{ 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
{ 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
@@ -1612,4 +1642,5 @@
exports.reverseIfRtl = reverseIfRtl;
exports.getUnicodeRangeFor = getUnicodeRangeFor;
exports.getNormalizedUnicodes = getNormalizedUnicodes;
+ exports.getUnicodeForGlyph = getUnicodeForGlyph;
}));
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index 5fad85a..8806c03 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -32,6 +32,8 @@
!bug1200096.pdf
!issue5564_reduced.pdf
!canvas.pdf
+!bug1132849.pdf
+!issue6894.pdf
!issue5804.pdf
!ShowText-ShadingPattern.pdf
!complex_ttf_font.pdf
diff --git a/test/pdfs/bug1132849.pdf b/test/pdfs/bug1132849.pdf
new file mode 100644
index 0000000..1754b50
Binary files /dev/null and b/test/pdfs/bug1132849.pdf differ
diff --git a/test/pdfs/issue6894.pdf b/test/pdfs/issue6894.pdf
new file mode 100644
index 0000000..7220f7c
Binary files /dev/null and b/test/pdfs/issue6894.pdf differ
diff --git a/test/test_manifest.json b/test/test_manifest.json
index 51f03f3..ae7532c 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -728,6 +728,20 @@
"rounds": 1,
"type": "eq"
},
+ { "id": "bug1132849",
+ "file": "pdfs/bug1132849.pdf",
+ "md5": "aedfbead1f8feb35cf2e38b279133b47",
+ "rounds": 1,
+ "link": false,
+ "type": "eq"
+ },
+ { "id": "issue6894",
+ "file": "pdfs/issue6894.pdf",
+ "md5": "bb84f2025c11f23cf436170049f81215",
+ "rounds": 1,
+ "link": false,
+ "type": "eq"
+ },
{ "id": "personwithdog",
"file": "pdfs/personwithdog.pdf",
"md5": "cd68fb2ce00dab97801b3e51495b99e3",
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git
More information about the Pkg-javascript-commits
mailing list