[Pkg-javascript-commits] [pdf.js] 03/161: Use built in CMaps and unify the glyph mapping.
David Prévot
taffit at moszumanska.debian.org
Sat Apr 19 14:16:15 UTC 2014
This is an automated email from the git hooks/post-receive script.
taffit pushed a commit to branch master
in repository pdf.js.
commit b5b94a4af389ef387570cf22662d9fc6bd41417a
Author: Brendan Dahl <brendan.dahl at gmail.com>
Date: Tue Feb 11 10:27:09 2014 -0800
Use built in CMaps and unify the glyph mapping.
---
make.js | 5 +
src/core/cmap.js | 433 ++++++++------
src/core/evaluator.js | 118 ++--
src/core/fonts.js | 1346 ++++++++++++++++---------------------------
src/core/worker.js | 2 +
src/display/api.js | 8 +
src/display/canvas.js | 51 +-
test/driver.js | 1 +
test/font/font_fpgm_spec.js | 4 +-
test/font/font_os2_spec.js | 6 +-
test/font/font_post_spec.js | 7 +-
test/font/font_test.html | 1 +
test/test.py | 5 +
test/unit/cmap_spec.js | 19 +-
test/unit/font_spec.js | 2 +-
web/viewer.js | 5 +
16 files changed, 887 insertions(+), 1126 deletions(-)
diff --git a/make.js b/make.js
index eee9016..a37d378 100644
--- a/make.js
+++ b/make.js
@@ -105,6 +105,7 @@ target.generic = function() {
copy: [
[COMMON_WEB_FILES, GENERIC_DIR + '/web'],
['external/webL10n/l10n.js', GENERIC_DIR + '/web'],
+ ['external/cmaps/', GENERIC_DIR + '/web/cmaps'],
['web/viewer.css', GENERIC_DIR + '/web'],
['web/compatibility.js', GENERIC_DIR + '/web'],
['web/compressed.tracemonkey-pldi-09.pdf', GENERIC_DIR + '/web'],
@@ -489,6 +490,7 @@ target.firefox = function() {
defines: defines,
copy: [
[COMMON_WEB_FILES, FIREFOX_BUILD_CONTENT_DIR + '/web'],
+ ['external/cmaps/', FIREFOX_BUILD_CONTENT_DIR + '/web/cmaps'],
[FIREFOX_EXTENSION_DIR + 'tools/l10n.js',
FIREFOX_BUILD_CONTENT_DIR + '/web'],
['web/default_preferences.js', FIREFOX_BUILD_CONTENT_DIR]
@@ -604,6 +606,7 @@ target.mozcentral = function() {
defines: defines,
copy: [
[COMMON_WEB_FILES, MOZCENTRAL_CONTENT_DIR + '/web'],
+ ['external/cmaps/', MOZCENTRAL_CONTENT_DIR + '/web/cmaps'],
['extensions/firefox/tools/l10n.js', MOZCENTRAL_CONTENT_DIR + '/web'],
['web/default_preferences.js', MOZCENTRAL_CONTENT_DIR]
],
@@ -673,6 +676,7 @@ target.b2g = function() {
var setup = {
defines: defines,
copy: [
+ ['external/cmaps/', B2G_BUILD_CONTENT_DIR + '/web/cmaps'],
['extensions/b2g/images', B2G_BUILD_CONTENT_DIR + '/web'],
['extensions/b2g/viewer.html', B2G_BUILD_CONTENT_DIR + '/web'],
['extensions/b2g/viewer.css', B2G_BUILD_CONTENT_DIR + '/web'],
@@ -713,6 +717,7 @@ target.chromium = function() {
var setup = {
defines: defines,
copy: [
+ ['external/cmaps/', CHROME_BUILD_CONTENT_DIR + '/web/cmaps'],
[COMMON_WEB_FILES, CHROME_BUILD_CONTENT_DIR + '/web'],
[['extensions/chromium/*.json',
'extensions/chromium/*.html',
diff --git a/src/core/cmap.js b/src/core/cmap.js
index aa76128..2b45dad 100644
--- a/src/core/cmap.js
+++ b/src/core/cmap.js
@@ -15,184 +15,199 @@
* limitations under the License.
*/
/* globals Util, isString, isInt, warn, error, isCmd, isEOF, isName, Lexer,
- isStream */
+ isStream, StringStream */
'use strict';
-var CMAP_CODESPACES = {
- 'Adobe-CNS1-0': [[], [0, 14335]],
- 'Adobe-CNS1-1': [[], [0, 17407]],
- 'Adobe-CNS1-2': [[], [0, 17663]],
- 'Adobe-CNS1-3': [[], [0, 18943]],
- 'Adobe-CNS1-4': [[], [0, 19199]],
- 'Adobe-CNS1-5': [[], [0, 19199]],
- 'Adobe-CNS1-6': [[], [0, 19199]],
- 'Adobe-CNS1-UCS2': [[], [0, 65535]],
- 'B5-H': [[0, 128], [41280, 65278]],
- 'B5-V': [[0, 128], [41280, 65278]],
- 'B5pc-H': [[0, 128, 253, 255], [41280, 64766]],
- 'B5pc-V': [[0, 128, 253, 255], [41280, 64766]],
- 'CNS-EUC-H': [[0, 128], [41377, 65278], [],
- [2392957345, 2392981246, 2393022881, 2393046782, 2393088417, 2393112318]],
- 'CNS-EUC-V': [[0, 128], [41377, 65278], [],
- [2392957345, 2392981246, 2393022881, 2393046782, 2393088417, 2393112318]],
- 'CNS1-H': [[], [8481, 32382]],
- 'CNS1-V': [[], [8481, 32382]],
- 'CNS2-H': [[], [8481, 32382]],
- 'CNS2-V': [[], [8481, 32382]],
- 'ETen-B5-H': [[0, 128], [41280, 65278]],
- 'ETen-B5-V': [[0, 128], [41280, 65278]],
- 'ETenms-B5-H': [[0, 128], [41280, 65278]],
- 'ETenms-B5-V': [[0, 128], [41280, 65278]],
- 'ETHK-B5-H': [[0, 128], [34624, 65278]],
- 'ETHK-B5-V': [[0, 128], [34624, 65278]],
- 'HKdla-B5-H': [[0, 128], [41280, 65278]],
- 'HKdla-B5-V': [[0, 128], [41280, 65278]],
- 'HKdlb-B5-H': [[0, 128], [36416, 65278]],
- 'HKdlb-B5-V': [[0, 128], [36416, 65278]],
- 'HKgccs-B5-H': [[0, 128], [35392, 65278]],
- 'HKgccs-B5-V': [[0, 128], [35392, 65278]],
- 'HKm314-B5-H': [[0, 128], [41280, 65278]],
- 'HKm314-B5-V': [[0, 128], [41280, 65278]],
- 'HKm471-B5-H': [[0, 128], [41280, 65278]],
- 'HKm471-B5-V': [[0, 128], [41280, 65278]],
- 'HKscs-B5-H': [[0, 128], [34624, 65278]],
- 'HKscs-B5-V': [[0, 128], [34624, 65278]],
- 'UniCNS-UCS2-H': [[], [0, 55295, 57344, 65535]],
- 'UniCNS-UCS2-V': [[], [0, 55295, 57344, 65535]],
- 'UniCNS-UTF16-H': [[], [0, 55295, 57344, 65535], [],
- [3623934976, 3690979327]],
- 'UniCNS-UTF16-V': [[], [0, 55295, 57344, 65535], [],
- [3623934976, 3690979327]],
- 'Adobe-GB1-0': [[], [0, 7935]],
- 'Adobe-GB1-1': [[], [0, 9983]],
- 'Adobe-GB1-2': [[], [0, 22271]],
- 'Adobe-GB1-3': [[], [0, 22527]],
- 'Adobe-GB1-4': [[], [0, 29183]],
- 'Adobe-GB1-5': [[], [0, 30463]],
- 'Adobe-GB1-UCS2': [[], [0, 65535]],
- 'GB-EUC-H': [[0, 128], [41377, 65278]],
- 'GB-EUC-V': [[0, 128], [41377, 65278]],
- 'GB-H': [[], [8481, 32382]],
- 'GB-V': [[], [8481, 32382]],
- 'GBK-EUC-H': [[0, 128], [33088, 65278]],
- 'GBK-EUC-V': [[0, 128], [33088, 65278]],
- 'GBK2K-H': [[0, 127], [33088, 65278], [], [2167439664, 4265213497]],
- 'GBK2K-V': [[0, 127], [33088, 65278], [], [2167439664, 4265213497]],
- 'GBKp-EUC-H': [[0, 128], [33088, 65278]],
- 'GBKp-EUC-V': [[0, 128], [33088, 65278]],
- 'GBpc-EUC-H': [[0, 128, 253, 255], [41377, 64766]],
- 'GBpc-EUC-V': [[0, 128, 253, 255], [41377, 64766]],
- 'GBT-EUC-H': [[0, 128], [41377, 65278]],
- 'GBT-EUC-V': [[0, 128], [41377, 65278]],
- 'GBT-H': [[], [8481, 32382]],
- 'GBT-V': [[], [8481, 32382]],
- 'GBTpc-EUC-H': [[0, 128, 253, 255], [41377, 64766]],
- 'GBTpc-EUC-V': [[0, 128, 253, 255], [41377, 64766]],
- 'UniGB-UCS2-H': [[], [0, 55295, 57344, 65535]],
- 'UniGB-UCS2-V': [[], [0, 55295, 57344, 65535]],
- 'UniGB-UTF16-H': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]],
- 'UniGB-UTF16-V': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]],
- '78-EUC-H': [[0, 128], [36512, 36575, 41377, 65278]],
- '78-EUC-V': [[0, 128], [36512, 36575, 41377, 65278]],
- '78-H': [[], [8481, 32382]],
- '78-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- '78-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- '78-V': [[], [8481, 32382]],
- '78ms-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- '78ms-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- '83pv-RKSJ-H': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]],
- '90ms-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- '90ms-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- '90msp-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- '90msp-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- '90pv-RKSJ-H': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]],
- '90pv-RKSJ-V': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]],
- 'Add-H': [[], [8481, 32382]],
- 'Add-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- 'Add-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- 'Add-V': [[], [8481, 32382]],
- 'Adobe-Japan1-0': [[], [0, 8447]],
- 'Adobe-Japan1-1': [[], [0, 8447]],
- 'Adobe-Japan1-2': [[], [0, 8959]],
- 'Adobe-Japan1-3': [[], [0, 9471]],
- 'Adobe-Japan1-4': [[], [0, 15615]],
- 'Adobe-Japan1-5': [[], [0, 20479]],
- 'Adobe-Japan1-6': [[], [0, 23295]],
- 'Adobe-Japan1-UCS2': [[], [0, 65535]],
- 'Adobe-Japan2-0': [[], [0, 6143]],
- 'EUC-H': [[0, 128], [36512, 36575, 41377, 65278]],
- 'EUC-V': [[0, 128], [36512, 36575, 41377, 65278]],
- 'Ext-H': [[], [8481, 32382]],
- 'Ext-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- 'Ext-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- 'Ext-V': [[], [8481, 32382]],
- 'H': [[], [8481, 32382]],
- 'Hankaku': [[0, 255], []],
- 'Hiragana': [[0, 255], []],
- 'Hojo-EUC-H': [[], [], [9413025, 9436926], []],
- 'Hojo-EUC-V': [[], [], [9413025, 9436926], []],
- 'Hojo-H': [[], [8481, 32382]],
- 'Hojo-V': [[], [8481, 32382]],
- 'Katakana': [[0, 255], []],
- 'NWP-H': [[], [8481, 32382]],
- 'NWP-V': [[], [8481, 32382]],
- 'RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- 'RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
- 'Roman': [[0, 255], []],
- 'UniHojo-UCS2-H': [[], [0, 55295, 57344, 65535]],
- 'UniHojo-UCS2-V': [[], [0, 55295, 57344, 65535]],
- 'UniHojo-UTF16-H': [[], [0, 55295, 57344, 65535], [],
- [3623934976, 3690979327]],
- 'UniHojo-UTF16-V': [[], [0, 55295, 57344, 65535], [],
- [3623934976, 3690979327]],
- 'UniJIS-UCS2-H': [[], [0, 55295, 57344, 65535]],
- 'UniJIS-UCS2-HW-H': [[], [0, 55295, 57344, 65535]],
- 'UniJIS-UCS2-HW-V': [[], [0, 55295, 57344, 65535]],
- 'UniJIS-UCS2-V': [[], [0, 55295, 57344, 65535]],
- 'UniJIS-UTF16-H': [[], [0, 55295, 57344, 65535], [],
- [3623934976, 3690979327]],
- 'UniJIS-UTF16-V': [[], [0, 55295, 57344, 65535], [],
- [3623934976, 3690979327]],
- 'UniJISPro-UCS2-HW-V': [[], [0, 55295, 57344, 65535]],
- 'UniJISPro-UCS2-V': [[], [0, 55295, 57344, 65535]],
- 'V': [[], [8481, 32382]],
- 'WP-Symbol': [[0, 255], []],
- 'Adobe-Korea1-0': [[], [0, 9471]],
- 'Adobe-Korea1-1': [[], [0, 18175]],
- 'Adobe-Korea1-2': [[], [0, 18431]],
- 'Adobe-Korea1-UCS2': [[], [0, 65535]],
- 'KSC-EUC-H': [[0, 128], [41377, 65278]],
- 'KSC-EUC-V': [[0, 128], [41377, 65278]],
- 'KSC-H': [[], [8481, 32382]],
- 'KSC-Johab-H': [[0, 128], [33857, 54270, 55345, 57086, 57393, 63998]],
- 'KSC-Johab-V': [[0, 128], [33857, 54270, 55345, 57086, 57393, 63998]],
- 'KSC-V': [[], [8481, 32382]],
- 'KSCms-UHC-H': [[0, 128], [33089, 65278]],
- 'KSCms-UHC-HW-H': [[0, 128], [33089, 65278]],
- 'KSCms-UHC-HW-V': [[0, 128], [33089, 65278]],
- 'KSCms-UHC-V': [[0, 128], [33089, 65278]],
- 'KSCpc-EUC-H': [[0, 132, 254, 255], [41281, 65022]],
- 'KSCpc-EUC-V': [[0, 132, 254, 255], [41281, 65022]],
- 'UniKS-UCS2-H': [[], [0, 55295, 57344, 65535]],
- 'UniKS-UCS2-V': [[], [0, 55295, 57344, 65535]],
- 'UniKS-UTF16-H': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]],
- 'UniKS-UTF16-V': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]]
-};
+var BUILT_IN_CMAPS = [
+// << Start unicode maps.
+'Adobe-GB1-UCS2',
+'Adobe-CNS1-UCS2',
+'Adobe-Japan1-UCS2',
+'Adobe-Korea1-UCS2',
+// >> End unicode maps.
+'78-EUC-H',
+'78-EUC-V',
+'78-H',
+'78-RKSJ-H',
+'78-RKSJ-V',
+'78-V',
+'78ms-RKSJ-H',
+'78ms-RKSJ-V',
+'83pv-RKSJ-H',
+'90ms-RKSJ-H',
+'90ms-RKSJ-V',
+'90msp-RKSJ-H',
+'90msp-RKSJ-V',
+'90pv-RKSJ-H',
+'90pv-RKSJ-V',
+'Add-H',
+'Add-RKSJ-H',
+'Add-RKSJ-V',
+'Add-V',
+'Adobe-CNS1-0',
+'Adobe-CNS1-1',
+'Adobe-CNS1-2',
+'Adobe-CNS1-3',
+'Adobe-CNS1-4',
+'Adobe-CNS1-5',
+'Adobe-CNS1-6',
+'Adobe-GB1-0',
+'Adobe-GB1-1',
+'Adobe-GB1-2',
+'Adobe-GB1-3',
+'Adobe-GB1-4',
+'Adobe-GB1-5',
+'Adobe-Japan1-0',
+'Adobe-Japan1-1',
+'Adobe-Japan1-2',
+'Adobe-Japan1-3',
+'Adobe-Japan1-4',
+'Adobe-Japan1-5',
+'Adobe-Japan1-6',
+'Adobe-Korea1-0',
+'Adobe-Korea1-1',
+'Adobe-Korea1-2',
+'B5-H',
+'B5-V',
+'B5pc-H',
+'B5pc-V',
+'CNS-EUC-H',
+'CNS-EUC-V',
+'CNS1-H',
+'CNS1-V',
+'CNS2-H',
+'CNS2-V',
+'ETHK-B5-H',
+'ETHK-B5-V',
+'ETen-B5-H',
+'ETen-B5-V',
+'ETenms-B5-H',
+'ETenms-B5-V',
+'EUC-H',
+'EUC-V',
+'Ext-H',
+'Ext-RKSJ-H',
+'Ext-RKSJ-V',
+'Ext-V',
+'GB-EUC-H',
+'GB-EUC-V',
+'GB-H',
+'GB-V',
+'GBK-EUC-H',
+'GBK-EUC-V',
+'GBK2K-H',
+'GBK2K-V',
+'GBKp-EUC-H',
+'GBKp-EUC-V',
+'GBT-EUC-H',
+'GBT-EUC-V',
+'GBT-H',
+'GBT-V',
+'GBTpc-EUC-H',
+'GBTpc-EUC-V',
+'GBpc-EUC-H',
+'GBpc-EUC-V',
+'H',
+'HKdla-B5-H',
+'HKdla-B5-V',
+'HKdlb-B5-H',
+'HKdlb-B5-V',
+'HKgccs-B5-H',
+'HKgccs-B5-V',
+'HKm314-B5-H',
+'HKm314-B5-V',
+'HKm471-B5-H',
+'HKm471-B5-V',
+'HKscs-B5-H',
+'HKscs-B5-V',
+'Hankaku',
+'Hiragana',
+'KSC-EUC-H',
+'KSC-EUC-V',
+'KSC-H',
+'KSC-Johab-H',
+'KSC-Johab-V',
+'KSC-V',
+'KSCms-UHC-H',
+'KSCms-UHC-HW-H',
+'KSCms-UHC-HW-V',
+'KSCms-UHC-V',
+'KSCpc-EUC-H',
+'KSCpc-EUC-V',
+'Katakana',
+'NWP-H',
+'NWP-V',
+'RKSJ-H',
+'RKSJ-V',
+'Roman',
+'UniCNS-UCS2-H',
+'UniCNS-UCS2-V',
+'UniCNS-UTF16-H',
+'UniCNS-UTF16-V',
+'UniCNS-UTF32-H',
+'UniCNS-UTF32-V',
+'UniCNS-UTF8-H',
+'UniCNS-UTF8-V',
+'UniGB-UCS2-H',
+'UniGB-UCS2-V',
+'UniGB-UTF16-H',
+'UniGB-UTF16-V',
+'UniGB-UTF32-H',
+'UniGB-UTF32-V',
+'UniGB-UTF8-H',
+'UniGB-UTF8-V',
+'UniJIS-UCS2-H',
+'UniJIS-UCS2-HW-H',
+'UniJIS-UCS2-HW-V',
+'UniJIS-UCS2-V',
+'UniJIS-UTF16-H',
+'UniJIS-UTF16-V',
+'UniJIS-UTF32-H',
+'UniJIS-UTF32-V',
+'UniJIS-UTF8-H',
+'UniJIS-UTF8-V',
+'UniJIS2004-UTF16-H',
+'UniJIS2004-UTF16-V',
+'UniJIS2004-UTF32-H',
+'UniJIS2004-UTF32-V',
+'UniJIS2004-UTF8-H',
+'UniJIS2004-UTF8-V',
+'UniJISPro-UCS2-HW-V',
+'UniJISPro-UCS2-V',
+'UniJISPro-UTF8-V',
+'UniJISX0213-UTF32-H',
+'UniJISX0213-UTF32-V',
+'UniJISX02132004-UTF32-H',
+'UniJISX02132004-UTF32-V',
+'UniKS-UCS2-H',
+'UniKS-UCS2-V',
+'UniKS-UTF16-H',
+'UniKS-UTF16-V',
+'UniKS-UTF32-H',
+'UniKS-UTF32-V',
+'UniKS-UTF8-H',
+'UniKS-UTF8-V',
+'V',
+'WP-Symbol'];
// CMap, not to be confused with TrueType's cmap.
var CMap = (function CMapClosure() {
- function CMap() {
+ function CMap(builtInCMap) {
// Codespace ranges are stored as follows:
// [[1BytePairs], [2BytePairs], [3BytePairs], [4BytePairs]]
// where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...]
this.codespaceRanges = [[], [], [], []];
+ this.numCodespaceRanges = 0;
this.map = [];
this.vertical = false;
+ this.useCMap = null;
+ this.builtInCMap = builtInCMap;
}
CMap.prototype = {
addCodespaceRange: function(n, low, high) {
this.codespaceRanges[n - 1].push(low, high);
+ this.numCodespaceRanges++;
},
mapRange: function(low, high, dstLow) {
@@ -395,17 +410,33 @@ var CMapFactory = (function CMapFactoryClosure() {
error('Invalid codespace range.');
}
- function parseCmap(cMap, lexer) {
+ function parseWMode(cMap, lexer) {
+ var obj = lexer.getObj();
+ if (isInt(obj)) {
+ cMap.vertical = !!obj;
+ }
+ }
+
+ function parseCMap(cMap, lexer, builtInCMapUrl, useCMap) {
+ var previous;
+ var embededUseCMap;
objLoop: while (true) {
var obj = lexer.getObj();
if (isEOF(obj)) {
break;
+ } else if (isName(obj)) {
+ if (obj.name === 'WMode') {
+ parseWMode(cMap, lexer);
+ }
+ previous = obj;
} else if (isCmd(obj)) {
switch (obj.cmd) {
- case 'endcMap':
+ case 'endcmap':
break objLoop;
- case 'usecMap':
- // TODO
+ case 'usecmap':
+ if (isName(previous)) {
+ embededUseCMap = previous.name;
+ }
break;
case 'begincodespacerange':
parseCodespaceRange(cMap, lexer);
@@ -425,30 +456,66 @@ var CMapFactory = (function CMapFactoryClosure() {
}
}
}
+
+ if (!useCMap && embededUseCMap) {
+ // Load the usecmap definition from the file only if there wasn't one
+ // specified.
+ useCMap = embededUseCMap;
+ }
+ if (useCMap) {
+ cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapUrl);
+ // If there aren't any code space ranges defined clone all the parent ones
+ // into this cMap.
+ if (cMap.numCodespaceRanges === 0) {
+ var useCodespaceRanges = cMap.useCMap.codespaceRanges;
+ for (var i = 0; i < useCodespaceRanges.length; i++) {
+ cMap.codespaceRanges[i] = useCodespaceRanges[i].slice();
+ }
+ cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
+ }
+ // Merge the map into the current one, making sure not to override
+ // any previously defined entries.
+ for (var key in cMap.useCMap.map) {
+ if (key in cMap.map) {
+ continue;
+ }
+ cMap.map[key] = cMap.useCMap.map[key];
+ }
+ }
+ }
+
+ function createBuiltInCMap(name, builtInCMapUrl) {
+ if (name === 'Identity-H') {
+ return new IdentityCMap(false, 2);
+ } else if (name === 'Identity-V') {
+ return new IdentityCMap(true, 2);
+ }
+ if (BUILT_IN_CMAPS.indexOf(name) === -1) {
+ error('Unknown cMap name: ' + name);
+ }
+
+ var request = new XMLHttpRequest();
+ var url = builtInCMapUrl + name;
+ request.open('GET', url, false);
+ request.send(null);
+ if (request.status === 0 && /^https?:/i.test(url)) {
+ error('Unable to get cMap at: ' + url);
+ }
+ var cMap = new CMap(true);
+ var lexer = new Lexer(new StringStream(request.responseText));
+ parseCMap(cMap, lexer, builtInCMapUrl, null);
+ return cMap;
}
+
return {
- create: function (encoding) {
+ create: function (encoding, builtInCMapUrl, useCMap) {
if (isName(encoding)) {
- switch (encoding.name) {
- case 'Identity-H':
- return new IdentityCMap(false, 2);
- case 'Identity-V':
- return new IdentityCMap(true, 2);
- default:
- if (encoding.name in CMAP_CODESPACES) {
- // XXX: Temporary hack so the correct amount of bytes are read in
- // CMap.readCharCode.
- var cMap = new CMap();
- cMap.codespaceRanges = CMAP_CODESPACES[encoding.name];
- return cMap;
- }
- return null;
- }
+ return createBuiltInCMap(encoding.name, builtInCMapUrl);
} else if (isStream(encoding)) {
var cMap = new CMap();
var lexer = new Lexer(encoding);
try {
- parseCmap(cMap, lexer);
+ parseCMap(cMap, lexer, builtInCMapUrl, useCMap);
} catch (e) {
warn('Invalid CMap data. ' + e);
}
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index 2e85cbd..fd68580 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -828,41 +828,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
// Based on 9.6.6 of the spec the encoding can come from multiple places
- // but should be prioritized in the following order:
- // 1. Encoding dictionary
- // 2. Encoding within font file (Type1 or Type1C)
- // 3. Default (depends on font type)
- // Differences applied to the above.
- // Note: we don't fill in the encoding from the font file(2) here but use
- // the flag overridableEncoding to signal that the font can override the
- // encoding if it has one built in.
- var overridableEncoding = true;
- var hasEncoding = false;
- var flags = properties.flags;
+ // and depends on the font type. The base encoding and differences are
+ // read here, but the encoding that is actually used is chosen during
+ // glyph mapping in the font.
+ // TODO: Loading the built in encoding in the font would allow the
+ // differences to be merged in here not require us to hold on to it.
var differences = [];
- var baseEncoding = properties.type === 'TrueType' ?
- Encodings.WinAnsiEncoding :
- Encodings.StandardEncoding;
- // The Symbolic attribute can be misused for regular fonts
- // Heuristic: we have to check if the font is a standard one and has
- // Symbolic font name
- if (!!(flags & FontFlags.Symbolic)) {
- baseEncoding = !properties.file && /Symbol/i.test(properties.name) ?
- Encodings.SymbolSetEncoding : Encodings.MacRomanEncoding;
- }
+ var baseEncodingName = null;
if (dict.has('Encoding')) {
var encoding = dict.get('Encoding');
if (isDict(encoding)) {
- var baseName = encoding.get('BaseEncoding');
- if (baseName) {
- overridableEncoding = false;
- hasEncoding = true;
- baseEncoding = Encodings[baseName.name];
- }
-
+ baseEncodingName = encoding.get('BaseEncoding');
+ baseEncodingName = isName(baseEncodingName) ? baseEncodingName.name :
+ null;
// Load the differences between the base and original
if (encoding.has('Differences')) {
- hasEncoding = true;
var diffEncoding = encoding.get('Differences');
var index = 0;
for (var j = 0, jj = diffEncoding.length; j < jj; j++) {
@@ -874,38 +854,44 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
}
} else if (isName(encoding)) {
- overridableEncoding = false;
- hasEncoding = true;
- var currentEncoding = Encodings[encoding.name];
-
- // Some bad PDF files contain fonts whose encoding name is not among
- // the predefined encodings, causing baseEncoding to be undefined.
- // In this case, fallback to using the baseEncoding as defined above
- // and let the font override the encoding if one is available.
- if (currentEncoding) {
- baseEncoding = currentEncoding;
- } else {
- overridableEncoding = true;
- }
+ baseEncodingName = encoding.name;
} else {
error('Encoding is not a Name nor a Dict');
}
+ // According to table 114 if the encoding is a named encoding it must be
+ // one of these predefined encodings.
+ if ((baseEncodingName !== 'MacRomanEncoding' &&
+ baseEncodingName !== 'MacExpertEncoding' &&
+ baseEncodingName !== 'WinAnsiEncoding')) {
+ baseEncodingName = null;
+ }
+ }
+
+ if (baseEncodingName) {
+ properties.defaultEncoding = Encodings[baseEncodingName].slice();
+ } else {
+ var encoding = properties.type === 'TrueType' ?
+ Encodings.WinAnsiEncoding :
+ Encodings.StandardEncoding;
+ // The Symbolic attribute can be misused for regular fonts
+ // Heuristic: we have to check if the font is a standard one also
+ if (!!(properties.flags & FontFlags.Symbolic)) {
+ encoding = !properties.file && /Symbol/i.test(properties.name) ?
+ Encodings.SymbolSetEncoding : Encodings.MacRomanEncoding;
+ }
+ properties.defaultEncoding = encoding;
}
properties.differences = differences;
- properties.baseEncoding = baseEncoding;
- properties.hasEncoding = hasEncoding;
- properties.overridableEncoding = overridableEncoding;
+ properties.baseEncodingName = baseEncodingName;
+ properties.dict = dict;
},
- readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref,
- properties) {
+ readToUnicode: function PartialEvaluator_readToUnicode(toUnicode) {
var cmapObj = toUnicode;
var charToUnicode = [];
if (isName(cmapObj)) {
- var isIdentityMap = cmapObj.name.substr(0, 9) == 'Identity-';
- if (!isIdentityMap)
- error('ToUnicode file cmap translation not implemented');
+ return CMapFactory.create(cmapObj).map;
} else if (isStream(cmapObj)) {
var cmap = CMapFactory.create(cmapObj).map;
// Convert UTF-16BE
@@ -927,7 +913,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
});
return cmap;
}
- return charToUnicode;
+ return null;
},
readCidToGidMap: function PartialEvaluator_readCidToGidMap(cidToGidStream) {
// Extract the encoding from the CIDToGIDMap
@@ -1006,7 +992,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
if (isName(baseFontName)) {
var metrics = this.getBaseFontMetrics(baseFontName.name);
- glyphsWidths = metrics.widths;
+ glyphsWidths = this.buildCharCodeToWidth(metrics.widths,
+ properties);
defaultWidth = metrics.defaultWidth;
}
}
@@ -1074,6 +1061,25 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
};
},
+ buildCharCodeToWidth: function PartialEvaluator_bulildCharCodeToWidth(
+ widthsByGlyphName, properties) {
+ var widths = Object.create(null);
+ var differences = properties.differences;
+ var encoding = properties.defaultEncoding;
+ for (var charCode = 0; charCode < 256; charCode++) {
+ if (charCode in differences &&
+ widthsByGlyphName[differences[charCode]]) {
+ widths[charCode] = widthsByGlyphName[differences[charCode]];
+ continue;
+ }
+ if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) {
+ widths[charCode] = widthsByGlyphName[encoding[charCode]];
+ continue;
+ }
+ }
+ return widths;
+ },
+
translateFont: function PartialEvaluator_translateFont(dict,
xref) {
var baseDict = dict;
@@ -1135,6 +1141,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
lastChar: maxCharIndex
};
this.extractDataStructures(dict, dict, xref, properties);
+ properties.widths = this.buildCharCodeToWidth(metrics.widths,
+ properties);
return new Font(baseFontName, null, properties);
}
@@ -1212,12 +1220,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var cidEncoding = baseDict.get('Encoding');
if (isName(cidEncoding)) {
properties.cidEncoding = cidEncoding.name;
- properties.vertical = /-V$/.test(cidEncoding.name);
}
- properties.cmap = CMapFactory.create(cidEncoding);
+ properties.cMap = CMapFactory.create(cidEncoding, PDFJS.cMapUrl, null);
+ properties.vertical = properties.cMap.vertical;
}
- this.extractWidths(dict, xref, descriptor, properties);
this.extractDataStructures(dict, baseDict, xref, properties);
+ this.extractWidths(dict, xref, descriptor, properties);
if (type.name === 'Type3') {
properties.coded = true;
diff --git a/src/core/fonts.js b/src/core/fonts.js
index 1d92856..e6bf1f6 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -18,14 +18,15 @@
ExpertSubsetCharset, FileReaderSync, GlyphsUnicode,
info, isArray, isNum, ISOAdobeCharset, Stream,
stringToBytes, TextDecoder, warn, Lexer, Util,
- FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString */
+ FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString,
+ IdentityCMap, Name, CMapFactory, PDFJS */
'use strict';
// Unicode Private Use Area
-var CMAP_GLYPH_OFFSET = 0xE000;
-var GLYPH_AREA_SIZE = 0x1900;
-var SYMBOLIC_FONT_GLYPH_OFFSET = 0xF000;
+var PRIVATE_USE_OFFSET_START = 0xE000;
+var PRIVATE_USE_OFFSET_END = 0xF8FF;
+var SKIP_PRIVATE_USE_RANGE_F000_TO_F01F = false;
// PDF Glyph Space Units are one Thousandth of a TextSpace Unit
// except for Type 3 fonts
@@ -425,37 +426,6 @@ var symbolsFonts = {
'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true
};
-var CMapConverterList = {
- 'H': jis7ToUnicode,
- 'V': jis7ToUnicode,
- 'EUC-H': eucjpToUnicode,
- 'EUC-V': eucjpToUnicode,
- '83pv-RKSJ-H': sjis83pvToUnicode,
- '90pv-RKSJ-H': sjis90pvToUnicode,
- '90ms-RKSJ-H': sjisToUnicode,
- '90ms-RKSJ-V': sjisToUnicode,
- '90msp-RKSJ-H': sjisToUnicode,
- '90msp-RKSJ-V': sjisToUnicode,
- 'GBK-EUC-H': gbkToUnicode,
- 'GBKp-EUC-H': gbkToUnicode,
- 'B5pc-H': big5ToUnicode,
- 'ETenms-B5-H': big5ToUnicode,
- 'ETenms-B5-V': big5ToUnicode,
-};
-
-// CMaps using Hankaku (Halfwidth) Latin glyphs instead of proportional one.
-// We need to distinguish them to get correct widths from CIDFont dicts.
-var HalfwidthCMaps = {
- 'H': true,
- 'V': true,
- 'EUC-H': true,
- 'EUC-V': true,
- '90ms-RKSJ-H': true,
- '90ms-RKSJ-V': true,
- 'UniJIS-UCS2-HW-H': true,
- 'UniJIS-UCS2-HW-V': true
-};
-
// Glyph map for well-known standard fonts. Sometimes Ghostscript uses CID fonts
// but does not embed the CID to GID mapping. The mapping is incomplete for all
// glyphs, but common for some set of the standard fonts.
@@ -526,75 +496,13 @@ var GlyphMapForStandardFonts = {
'3316': 578, '3379': 42785, '3393': 1159, '3416': 8377
};
-var decodeBytes;
-if (typeof TextDecoder !== 'undefined') {
- // The encodings supported by TextDecoder can be found at:
- // http://encoding.spec.whatwg.org/#concept-encoding-get
- decodeBytes = function(bytes, encoding, fatal) {
- return new TextDecoder(encoding, {fatal: !!fatal}).decode(bytes);
- };
-} else if (typeof FileReaderSync !== 'undefined') {
- decodeBytes = function(bytes, encoding) {
- return new FileReaderSync().readAsText(new Blob([bytes]), encoding);
- };
-} else {
- // Clear the list so that decodeBytes will never be called.
- CMapConverterList = {};
-}
-
-function jis7ToUnicode(str) {
- var bytes = stringToBytes(str);
- var length = bytes.length;
- for (var i = 0; i < length; ++i) {
- bytes[i] |= 0x80;
- }
- return decodeBytes(bytes, 'euc-jp');
-}
-
-function eucjpToUnicode(str) {
- return decodeBytes(stringToBytes(str), 'euc-jp');
-}
-
-function sjisToUnicode(str) {
- return decodeBytes(stringToBytes(str), 'shift_jis');
-}
-
-function sjis83pvToUnicode(str) {
- var bytes = stringToBytes(str);
- try {
- // TODO: 83pv has incompatible mappings in ed40..ee9c range.
- return decodeBytes(bytes, 'shift_jis', true);
- } catch (e) {
- warn('Unsupported 83pv character found');
- // Just retry without checking errors for now.
- return decodeBytes(bytes, 'shift_jis');
- }
-}
-
-function sjis90pvToUnicode(str) {
- var bytes = stringToBytes(str);
- try {
- // TODO: 90pv has incompatible mappings in 8740..879c and eb41..ee9c.
- return decodeBytes(bytes, 'shift_jis', true);
- } catch (e) {
- warn('Unsupported 90pv character found');
- // Just retry without checking errors for now.
- return decodeBytes(bytes, 'shift_jis');
- }
-}
-
-function gbkToUnicode(str) {
- return decodeBytes(stringToBytes(str), 'gbk');
-}
-
-function big5ToUnicode(str) {
- return decodeBytes(stringToBytes(str), 'big5');
-}
-
// Some characters, e.g. copyrightserif, mapped to the private use area and
// might not be displayed using standard fonts. Mapping/hacking well-known chars
// to the similar equivalents in the normal characters range.
-function mapPrivateUseChars(code) {
+function mapSpecialUnicodeValues(code) {
+ if (code >= 0xFFF0 && code <= 0xFFFF) { // Specials unicode block.
+ return 0;
+ }
switch (code) {
case 0xF8E9: // copyrightsans
case 0xF6D9: // copyrightserif
@@ -787,12 +695,6 @@ function isRTLRangeFor(value) {
return false;
}
-function isSpecialUnicode(unicode) {
- return (unicode <= 0x1F || (unicode >= 127 && unicode < GLYPH_AREA_SIZE)) ||
- (unicode >= CMAP_GLYPH_OFFSET &&
- unicode < CMAP_GLYPH_OFFSET + GLYPH_AREA_SIZE);
-}
-
// The normalization table is obtained by filtering the Unicode characters
// database with <compat> entries.
var NormalizedUnicodes = {
@@ -2251,19 +2153,25 @@ var Font = (function FontClosure() {
this.defaultWidth = properties.defaultWidth;
this.composite = properties.composite;
this.wideChars = properties.wideChars;
- this.hasEncoding = properties.hasEncoding;
- this.cmap = properties.cmap;
+ this.cMap = properties.cMap;
this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS;
this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
-
this.fontMatrix = properties.fontMatrix;
+
+ var unicode = this.buildToUnicode(properties);
+ this.toUnicode = properties.toUnicode = unicode.toUnicode;
+ this.isIdentityUnicode = properties.isIdentityUnicode = unicode.isIdentity;
+
+ this.toFontChar = [];
+
if (properties.type == 'Type3') {
- this.encoding = properties.baseEncoding;
+ for (var charCode = 0; charCode < 256; charCode++) {
+ this.toFontChar[charCode] = this.differences[charCode] ||
+ properties.defaultEncoding[charCode];
+ }
return;
}
- // Trying to fix encoding using glyph CIDSystemInfo.
- this.loadCidToUnicode(properties);
this.cidEncoding = properties.cidEncoding;
this.vertical = properties.vertical;
if (this.vertical) {
@@ -2271,14 +2179,8 @@ var Font = (function FontClosure() {
this.defaultVMetrics = properties.defaultVMetrics;
}
- if (properties.toUnicode && properties.toUnicode.length > 0)
- this.toUnicode = properties.toUnicode;
- else
- this.rebuildToUnicode(properties);
-
- this.toFontChar = this.buildToFontChar(this.toUnicode);
-
if (!file) {
+ this.missingFile = true;
// The file data is not specified. Trying to fix the font name
// to be used with the canvas.font.
var fontName = name.replace(/[,_]/g, '-');
@@ -2295,9 +2197,6 @@ var Font = (function FontClosure() {
// if at least one width is present, remeasure all chars when exists
this.remeasure = Object.keys(this.widths).length > 0;
-
- this.encoding = properties.baseEncoding;
- this.noUnicodeAdaptation = true;
if (isStandardFont && type === 'CIDFontType2' &&
properties.cidEncoding.indexOf('Identity-') === 0) {
// Standard fonts might be embedded as CID font without glyph mapping.
@@ -2308,6 +2207,10 @@ var Font = (function FontClosure() {
}
this.toFontChar = map;
this.toUnicode = map;
+ } else {
+ for (var charCode in this.toUnicode) {
+ this.toFontChar[charCode] = this.toUnicode[charCode].charCodeAt(0);
+ }
}
this.loadedName = fontName.split('-')[0];
this.loading = false;
@@ -2368,11 +2271,6 @@ var Font = (function FontClosure() {
this.loading = true;
}
- var numFonts = 0;
- function getUniqueName() {
- return 'pdfFont' + numFonts++;
- }
-
function stringToArray(str) {
var array = [];
for (var i = 0, ii = str.length; i < ii; ++i)
@@ -2483,26 +2381,95 @@ var Font = (function FontClosure() {
file.virtualOffset += data.length;
}
- function getRanges(glyphs, deltas) {
+ /**
+ * Rebuilds the char code to glyph ID map by trying to replace the char codes
+ * with their unicode value. It also moves char codes that are in known
+ * problematic locations.
+ * @return {Object} Two properties:
+ * 'toFontChar' - maps original char codes(the value that will be read
+ * from commands such as show text) to the char codes that will be used in the
+ * font that we build
+ * 'charCodeToGlyphId' - maps the new font char codes to glyph ids
+ */
+ function adjustMapping(charCodeToGlyphId, properties) {
+ var toUnicode = properties.toUnicode;
+ var isSymbolic = !!(properties.flags & FontFlags.Symbolic);
+ var isIdentityUnicode = properties.isIdentityUnicode;
+ var newMap = Object.create(null);
+ var toFontChar = [];
+ var usedCharCodes = [];
+ var usedFontCharCodes = [];
+ var nextAvailableFontCharCode = PRIVATE_USE_OFFSET_START;
+ for (var originalCharCode in charCodeToGlyphId) {
+ originalCharCode |= 0;
+ var glyphId = charCodeToGlyphId[originalCharCode];
+ var fontCharCode = originalCharCode;
+ // First try to map the value to a unicode position if a non identity map
+ // was created.
+ if (!isIdentityUnicode && originalCharCode in toUnicode) {
+ var unicode = toUnicode[fontCharCode];
+ // TODO: Try to map ligatures to the correct spot.
+ if (unicode.length === 1) {
+ fontCharCode = unicode.charCodeAt(0);
+ }
+ }
+ // Try to move control characters, special characters and already mapped
+ // characters to the private use area since they will not be drawn by
+ // canvas if left in their current position. Also, move characters if the
+ // font was symbolic and there is only an identity unicode map since the
+ // characters probably aren't in the correct position (fixes an issue
+ // with firefox and thuluthfont).
+ if ((fontCharCode in usedFontCharCodes ||
+ fontCharCode <= 0x1f || // Control chars
+ fontCharCode === 0x7F || // Control char
+ fontCharCode === 0xAD || // Soft hyphen
+ (fontCharCode >= 0x80 && fontCharCode <= 0x9F) || // Control chars
+ (isSymbolic && isIdentityUnicode)) &&
+ nextAvailableFontCharCode <= PRIVATE_USE_OFFSET_END) { // Room left.
+ // Loop to try and find a free spot in the private use area.
+ do {
+ fontCharCode = nextAvailableFontCharCode++;
+
+ if (SKIP_PRIVATE_USE_RANGE_F000_TO_F01F && fontCharCode === 0xF000) {
+ fontCharCode = 0xF020;
+ nextAvailableFontCharCode = fontCharCode + 1;
+ }
+
+ } while (fontCharCode in usedFontCharCodes &&
+ nextAvailableFontCharCode <= PRIVATE_USE_OFFSET_END);
+ }
+
+ newMap[fontCharCode] = glyphId;
+ toFontChar[originalCharCode] = fontCharCode;
+ usedFontCharCodes[fontCharCode] = true;
+ }
+ return {
+ toFontChar: toFontChar,
+ charCodeToGlyphId: newMap
+ };
+ }
+
+ function getRanges(glyphs) {
// Array.sort() sorts by characters, not numerically, so convert to an
// array of characters.
var codes = [];
- var length = glyphs.length;
- for (var n = 0; n < length; ++n)
- codes.push({ unicode: glyphs[n].unicode, code: n });
+ for (var charCode in glyphs) {
+ codes.push({ fontCharCode: charCode | 0, glyphId: glyphs[charCode] });
+ }
codes.sort(function fontGetRangesSort(a, b) {
- return a.unicode - b.unicode;
+ return a.fontCharCode - b.fontCharCode;
});
// Split the sorted codes into ranges.
var ranges = [];
+ var length = codes.length;
for (var n = 0; n < length; ) {
- var start = codes[n].unicode;
- var codeIndices = [deltas ? deltas[codes[n].code] : codes[n].code + 1];
+ var start = codes[n].fontCharCode;
+ var codeIndices = [codes[n].glyphId];
++n;
var end = start;
- while (n < length && end + 1 == codes[n].unicode) {
- codeIndices.push(deltas ? deltas[codes[n].code] : codes[n].code + 1);
+ while (n < length && end + 1 == codes[n].fontCharCode) {
+ codeIndices.push(codes[n].glyphId);
++end;
++n;
if (end === 0xFFFF) { break; }
@@ -2513,9 +2480,8 @@ var Font = (function FontClosure() {
return ranges;
}
- function createCmapTable(glyphs, deltas) {
- var ranges = getRanges(glyphs, deltas);
-
+ function createCmapTable(glyphs) {
+ var ranges = getRanges(glyphs);
var numTables = ranges[ranges.length - 1][1] > 0xFFFF ? 2 : 1;
var cmap = '\x00\x00' + // version
string16(numTables) + // numTables
@@ -2677,8 +2643,8 @@ var Font = (function FontClosure() {
var lastCharIndex = 0;
if (charstrings) {
- for (var i = 0; i < charstrings.length; ++i) {
- var code = charstrings[i].unicode;
+ for (var code in charstrings) {
+ code |= 0;
if (firstCharIndex > code || !firstCharIndex)
firstCharIndex = code;
if (lastCharIndex < code)
@@ -2840,37 +2806,6 @@ var Font = (function FontClosure() {
return nameTable;
}
- // Normalize the charcodes in the cmap table into unicode values
- // that will work with the (3, 1) cmap table we will write out.
- function cmapCharcodeToUnicode(charcode, symbolic, platformId, encodingId) {
- var unicode;
- if (symbolic) {
- // These codes will be shifted into the range
- // SYMBOLIC_FONT_GLYPH_OFFSET to (SYMBOLIC_FONT_GLYPH_OFFSET + 0xFF)
- // so that they are not in the control character range that could
- // be displayed as spaces by browsers.
- if (platformId === 3 && encodingId === 0 ||
- platformId === 1 && encodingId === 0) {
- unicode = SYMBOLIC_FONT_GLYPH_OFFSET | (charcode & 0xFF);
- }
- } else {
- if (platformId === 3 && encodingId === 1) {
- // A (3, 1) table is alredy unicode (Microsoft Unicode format)
- unicode = charcode;
- } else if (platformId === 1 && encodingId === 0) {
- // TODO(mack): Should apply the changes to convert the
- // MacRomanEncoding to Mac OS Roman encoding in 9.6.6.4
- // table 115 of the pdf spec
- var glyphName = Encodings.MacRomanEncoding[charcode];
- if (glyphName) {
- unicode = GlyphsUnicode[glyphName];
- }
- }
- }
- return unicode;
- }
-
-
Font.prototype = {
name: null,
font: null,
@@ -2934,34 +2869,11 @@ var Font = (function FontClosure() {
};
}
- function createGlyphNameMap(glyphs, ids, properties) {
- var glyphNames = properties.glyphNames;
- if (!glyphNames) {
- properties.glyphNameMap = {};
- return;
- }
- var glyphsLength = glyphs.length;
- var glyphNameMap = {};
- var encoding = [];
- for (var i = 0; i < glyphsLength; ++i) {
- var glyphName = glyphNames[ids[i]];
- if (!glyphName)
- continue;
- var unicode = glyphs[i].unicode;
- glyphNameMap[glyphName] = unicode;
- var code = glyphs[i].code;
- encoding[code] = glyphName;
- }
- properties.glyphNameMap = glyphNameMap;
- if (properties.overridableEncoding)
- properties.baseEncoding = encoding;
- }
-
/**
* Read the appropriate subtable from the cmap according to 9.6.6.4 from
* PDF spec
*/
- function readCmapTable(cmap, font, hasEncoding, isSymbolicFont) {
+ function readCmapTable(cmap, font, isSymbolicFont) {
var start = (font.start ? font.start : 0) + cmap.offset;
font.pos = start;
@@ -2969,73 +2881,36 @@ var Font = (function FontClosure() {
var numTables = int16(font.getBytes(2));
var potentialTable;
- var foundPreferredTable;
- // There's an order of preference in terms of which cmap subtable we
- // want to use. So scan through them to find our preferred table.
+ var canBreak = false;
+ // There's an order of preference in terms of which cmap subtable to
+ // use:
+ // - non-symbolic fonts the preference is a 3,1 table then a 1,0 table
+ // - symbolic fonts the preference is a 3,0 table then a 1,0 table
+ // The following takes advantage of the fact that the tables are sorted
+ // to work.
for (var i = 0; i < numTables; i++) {
var platformId = int16(font.getBytes(2));
var encodingId = int16(font.getBytes(2));
var offset = int32(font.getBytes(4));
var useTable = false;
- var canBreak = false;
- // The following block implements the following from the spec:
- //
- // When the font has no Encoding entry, or the font descriptor’s
- // Symbolic flag is set (in which case the Encoding entry
- // is ignored), this shall occur:
- // - If the font contains a (3, 0) subtable, the range of
- // - Otherwise, the (1, 0) subtable will be used.
- // Otherwise, if the font does have an encoding:
- // - Use the (3, 1) cmap subtable
- // - Otherwise, use the (1, 0) subtable if present
- //
- // The following diverges slightly from the above spec in order
- // to handle the case that hasEncoding and isSymbolicFont are both
- // true. In this, based on the ordering of the rules in the spec,
- // my interpretation is that we should be acting as if the font is
- // symbolic.
- //
- // However, in this case, the test pdf 'preistabelle.pdf'
- // is interpreting this case as a non-symbolic font. In this case
- // though, 'presitabelle.pdf' does contain a (3, 1) table and does
- // not contain a (3, 0) table which indicates it is non-symbolic.
- //
- // Thus, I am using this heurisitic of looking at which table is
- // found to truly determine whether or not the font is symbolic.
- // That is, if the specific symbolic/non-symbolic font specific
- // tables (3, 0) or (3, 1) is found, that information is used for
- // deciding if the font is symbolic or not.
- //
- // TODO(mack): This section needs some more thought on whether the
- // heuristic is good enough. For now, it passes all the regression
- // tests.
- if (isSymbolicFont && platformId === 3 && encodingId === 0) {
+ if (platformId == 1 && encodingId === 0) {
useTable = true;
- canBreak = true;
- foundPreferredTable = true;
- } else if (hasEncoding && platformId === 3 && encodingId === 1) {
+ // Continue the loop since there still may be a higher priority
+ // table.
+ } else if (!isSymbolicFont && platformId === 3 && encodingId === 1) {
useTable = true;
canBreak = true;
- foundPreferredTable = true;
- // Update the isSymbolicFont based on this heuristic
- isSymbolicFont = false;
- } else if (platformId === 1 && encodingId === 0 &&
- !foundPreferredTable) {
- useTable = true;
- foundPreferredTable = true;
- } else if (!potentialTable) {
- // We will use an arbitrary table if we cannot find a preferred
- // table
+ } else if (isSymbolicFont && platformId === 3 && encodingId === 0) {
useTable = true;
+ canBreak = true;
}
if (useTable) {
potentialTable = {
platformId: platformId,
encodingId: encodingId,
- offset: offset,
- isSymbolicFont: isSymbolicFont
+ offset: offset
};
}
if (canBreak) {
@@ -3044,16 +2919,8 @@ var Font = (function FontClosure() {
}
if (!potentialTable) {
- error('Could not find a cmap table');
- return;
- }
-
- if (!foundPreferredTable) {
- warn('Did not find a cmap of suitable format. Interpreting (' +
- potentialTable.platformId + ', ' + potentialTable.encodingId +
- ') as (3, 1) table');
- potentialTable.platformId = 3;
- potentialTable.encodingId = 1;
+ warn('Could not find a preferred cmap table.');
+ return [];
}
font.pos = start + potentialTable.offset;
@@ -3072,7 +2939,7 @@ var Font = (function FontClosure() {
continue;
}
mappings.push({
- charcode: j,
+ charCode: j,
glyphId: index
});
}
@@ -3132,7 +2999,7 @@ var Font = (function FontClosure() {
continue;
}
mappings.push({
- charcode: j,
+ charCode: j,
glyphId: glyphId
});
}
@@ -3150,10 +3017,10 @@ var Font = (function FontClosure() {
var ids = [];
for (var j = 0; j < entryCount; j++) {
var glyphId = int16(font.getBytes(2));
- var charcode = firstCode + j;
+ var charCode = firstCode + j;
mappings.push({
- charcode: charcode,
+ charCode: charCode,
glyphId: glyphId
});
}
@@ -3163,10 +3030,10 @@ var Font = (function FontClosure() {
// removing duplicate entries
mappings.sort(function (a, b) {
- return a.charcode - b.charcode;
+ return a.charCode - b.charCode;
});
for (var i = 1; i < mappings.length; i++) {
- if (mappings[i - 1].charcode === mappings[i].charcode) {
+ if (mappings[i - 1].charCode === mappings[i].charCode) {
mappings.splice(i, 1);
i--;
}
@@ -3175,7 +3042,6 @@ var Font = (function FontClosure() {
return {
platformId: potentialTable.platformId,
encodingId: potentialTable.encodingId,
- isSymbolicFont: potentialTable.isSymbolicFont,
mappings: mappings,
hasShortCmap: hasShortCmap
};
@@ -3901,229 +3767,121 @@ var Font = (function FontClosure() {
}
}
- var glyphs, ids;
+ var charCodeToGlyphId = [];
if (properties.type == 'CIDFontType2') {
- // Replace the old CMAP table with a shiny new one
- // Type2 composite fonts map characters directly to glyphs so the cmap
- // table must be replaced.
- // canvas fillText will reencode some characters even if the font has a
- // glyph at that position - e.g. newline is converted to a space and
- // U+00AD (soft hyphen) is not drawn.
- // So, offset all the glyphs by 0xFF to avoid these cases and use
- // the encoding to map incoming characters to the new glyph positions
- if (!tables.cmap) {
- tables.cmap = {
- tag: 'cmap',
- data: null
- };
- }
-
var cidToGidMap = properties.cidToGidMap || [];
- var gidToCidMap = [0];
- if (cidToGidMap.length > 0) {
- for (var j = cidToGidMap.length - 1; j >= 0; j--) {
- var gid = cidToGidMap[j];
- if (gid)
- gidToCidMap[gid] = j;
+ var cMap = properties.cMap.map;
+ for (var charCode in cMap) {
+ charCode |= 0;
+ var cid = cMap[charCode];
+ assert(cid.length === 1, 'Max size of CID is 65,535');
+ cid = cid.charCodeAt(0);
+ var glyphId = -1;
+ if (cidToGidMap.length === 0) {
+ glyphId = charCode;
+ } else if (cid in cidToGidMap) {
+ glyphId = cidToGidMap[cid];
}
- // filling the gaps using CID above the CIDs currently used in font
- var nextCid = cidToGidMap.length;
- for (var i = 1; i < numGlyphs; i++) {
- if (!gidToCidMap[i])
- gidToCidMap[i] = nextCid++;
- }
- } else {
- for (var i = 1; i < numGlyphs; i++) {
- gidToCidMap[i] = i;
- }
- if (dupFirstEntry) {
- gidToCidMap[numGlyphs - 1] = 0;
+ if (glyphId >= 0 && glyphId < numGlyphs) {
+ charCodeToGlyphId[charCode] = glyphId;
}
}
-
- glyphs = [];
- ids = [];
-
- var usedUnicodes = [];
- var unassignedUnicodeItems = [];
- var toFontChar = this.cidToFontChar || this.toFontChar;
- for (var i = 1; i < numGlyphs; i++) {
- var cid = gidToCidMap[i];
- var unicode = toFontChar[cid];
- if (!unicode || typeof unicode !== 'number' ||
- isSpecialUnicode(unicode) || unicode in usedUnicodes) {
- unassignedUnicodeItems.push(i);
- continue;
- }
- usedUnicodes[unicode] = true;
- glyphs.push({ unicode: unicode, code: cid });
- ids.push(i);
- }
-
- // unassigned codepoints will never be used for non-Identity CMap
- // because the input will be Unicode
- if (!this.cidToFontChar) {
- // trying to fit as many unassigned symbols as we can
- // in the range allocated for the user defined symbols
- var unusedUnicode = CMAP_GLYPH_OFFSET;
- for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j++) {
- var i = unassignedUnicodeItems[j];
- var cid = gidToCidMap[i];
- while (unusedUnicode in usedUnicodes)
- unusedUnicode++;
- if (unusedUnicode >= CMAP_GLYPH_OFFSET + GLYPH_AREA_SIZE)
- break;
- var unicode = unusedUnicode++;
- this.toFontChar[cid] = unicode;
- usedUnicodes[unicode] = true;
- glyphs.push({ unicode: unicode, code: cid });
- ids.push(i);
- }
+ if (dupFirstEntry) {
+ charCodeToGlyphId[0] = numGlyphs - 1;
}
} else {
- this.useToFontChar = true;
// Most of the following logic in this code branch is based on the
// 9.6.6.4 of the PDF spec.
-
- // TODO(mack):
- // We are using this.hasEncoding to mean that the encoding is either
- // MacRomanEncoding or WinAnsiEncoding (following spec in 9.6.6.4),
- // but this.hasEncoding is currently true for any encodings on the
- // Encodings object (e.g. MacExpertEncoding). So should consider using
- // better check for this.
- var cmapTable = readCmapTable(tables.cmap, font, this.hasEncoding,
- this.isSymbolicFont);
-
- // TODO(mack): If the (3, 0) cmap table used, then the font is
- // symbolic. The range of charcodes in the cmap table should be
- // one of the following:
- // -> 0x0000 - 0x00FF
- // -> 0xF000 - 0xF0FF
- // -> 0xF100 - 0xF1FF
- // -> 0xF200 - 0xF2FF
- // If it is not, we should change not consider this a symbolic font
- this.isSymbolicFont = cmapTable.isSymbolicFont;
-
+ var cmapTable = readCmapTable(tables.cmap, font, this.isSymbolicFont);
var cmapPlatformId = cmapTable.platformId;
var cmapEncodingId = cmapTable.encodingId;
var cmapMappings = cmapTable.mappings;
var cmapMappingsLength = cmapMappings.length;
- var glyphs = [];
- var ids = [];
- for (var i = 0; i < cmapMappingsLength; ++i) {
- var cmapMapping = cmapMappings[i];
- var charcode = cmapMapping.charcode;
- var unicode = cmapCharcodeToUnicode(charcode, this.isSymbolicFont,
- cmapPlatformId, cmapEncodingId);
-
- if (!unicode) {
- // TODO(mack): gotta check if skipping mappings where we cannot find
- // a unicode is the correct behaviour
- continue;
- }
- glyphs.push({
- code: charcode,
- unicode: unicode
- });
- ids.push(cmapMapping.glyphId);
- }
-
- var hasShortCmap = cmapTable.hasShortCmap;
- var toFontChar = this.toFontChar;
-
- if (hasShortCmap && ids.length == numGlyphs) {
- // Fixes the short cmap tables -- some generators use incorrect
- // glyph id.
- for (var i = 0, ii = ids.length; i < ii; i++) {
- ids[i] = i;
- }
- }
-
- // Rewrite the whole toFontChar dictionary with a new one using the
- // information from the mappings in the cmap table.
- var newToFontChar = [];
- if (this.isSymbolicFont) {
- for (var i = 0, ii = glyphs.length; i < ii; i++) {
- var glyph = glyphs[i];
- // For (3, 0) cmap tables:
- // The charcode key being stored in toFontChar is the lower byte
- // of the two-byte charcodes of the cmap table since according to
- // the spec: 'each byte from the string shall be prepended with the
- // high byte of the range [of charcodes in the cmap table], to form
- // a two-byte character, which shall be used to select the
- // associated glyph description from the subtable'.
- //
- // For (1, 0) cmap tables:
- // 'single bytes from the string shall be used to look up the
- // associated glyph descriptions from the subtable'. This means
- // charcodes in the cmap will be single bytes, so no-op since
- // glyph.code & 0xFF === glyph.code
- newToFontChar[glyph.code & 0xFF] = glyph.unicode;
+ var hasEncoding = properties.differences.length ||
+ !!properties.baseEncodingName;
+
+ // The spec seems to imply that if the font is symbolic the encoding
+ // should be ignored, this doesn't appear to work for 'preistabelle.pdf'
+ // where the the font is symbolic and it has an encoding.
+ if (hasEncoding &&
+ (cmapPlatformId === 3 && cmapEncodingId === 1 ||
+ cmapPlatformId === 1 && cmapEncodingId === 0)) {
+ var baseEncoding = [];
+ if (properties.baseEncodingName === 'MacRomanEncoding' ||
+ properties.baseEncodingName === 'WinAnsiEncoding') {
+ baseEncoding = Encodings[properties.baseEncodingName];
}
- } else {
-
- var encoding = properties.baseEncoding;
- var differences = properties.differences;
-
- // TODO(mack): check if it is necessary to shift control characters
- // for non-symbolic fonts so that browsers dont't render them using
- // space characters
-
- var glyphCodeMapping = cmapTable.glyphCodeMapping;
- for (var charcode = 0; charcode < encoding.length; ++charcode) {
- if (!encoding.hasOwnProperty(charcode)) {
+ for (var charCode = 0; charCode < 256; charCode++) {
+ var glyphName;
+ if (this.differences && charCode in this.differences) {
+ glyphName = this.differences[charCode];
+ } else if (charCode in baseEncoding &&
+ baseEncoding[charCode] !== '') {
+ glyphName = baseEncoding[charCode];
+ } else {
+ glyphName = Encodings.StandardEncoding[charCode];
+ }
+ if (!glyphName) {
continue;
}
-
- // Since the cmap table that we will be writing out is a (3, 1)
- // unicode table, in this section we will rewrites the charcodes
- // in the pdf into unicodes
-
- var glyphName = encoding[charcode];
- // A nonsymbolic font should not have a Differences array, but
- // if it does have one, we should still use it
- if (charcode in differences) {
- glyphName = differences[charcode];
+ var unicodeOrCharCode;
+ if (cmapPlatformId === 3 && cmapEncodingId === 1) {
+ unicodeOrCharCode = GlyphsUnicode[glyphName];
+ } else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
+ // TODO: the encoding needs to be updated with mac os table.
+ unicodeOrCharCode = Encodings.MacRomanEncoding.indexOf(glyphName);
}
- // Finally, any undefined entries in the table shall be filled
- // using StandardEncoding
- if (!glyphName) {
- glyphName = Encodings.StandardEncoding[charcode];
+ var found = false;
+ for (var i = 0; i < cmapMappingsLength; ++i) {
+ if (cmapMappings[i].charCode === unicodeOrCharCode) {
+ charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
+ found = true;
+ break;
+ }
}
-
- // TODO(mack): Handle the case that the glyph name cannot be
- // mapped as specified, in which case the glyph name shall be
- // looked up in the font program's 'post' table (if one is
- // present) and the associated glyph id shall be used.
- //
- // For now, we're just using the '.notdef' glyph name in this
- // case.
- glyphName = glyphName || '.notdef';
-
- var unicode = GlyphsUnicode[glyphName];
- newToFontChar[charcode] = unicode;
+ if (!found && properties.glyphNames) {
+ // Try to map using the post table. There are currently no known
+ // pdfs that this fixes.
+ var glyphId = properties.glyphNames.indexOf(glyphName);
+ if (glyphId > 0) {
+ charCodeToGlyphId[charCode] = glyphId;
+ }
+ }
+ }
+ } else {
+ // For (3, 0) cmap tables:
+ // The charcode key being stored in charCodeToGlyphId is the lower
+ // byte of the two-byte charcodes of the cmap table since according to
+ // the spec: 'each byte from the string shall be prepended with the
+ // high byte of the range [of charcodes in the cmap table], to form
+ // a two-byte character, which shall be used to select the
+ // associated glyph description from the subtable'.
+ //
+ // For (1, 0) cmap tables:
+ // 'single bytes from the string shall be used to look up the
+ // associated glyph descriptions from the subtable'. This means
+ // charcodes in the cmap will be single bytes, so no-op since
+ // glyph.charCode & 0xFF === glyph.charCode
+ for (var i = 0; i < cmapMappingsLength; ++i) {
+ var charCode = cmapMappings[i].charCode & 0xFF;
+ charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
}
}
- this.toFontChar = toFontChar = newToFontChar;
-
- createGlyphNameMap(glyphs, ids, properties);
- this.glyphNameMap = properties.glyphNameMap;
}
- if (glyphs.length === 0) {
+ if (charCodeToGlyphId.length === 0) {
// defines at least one glyph
- glyphs.push({ unicode: 0xF000, code: 0xF000, glyph: '.notdef' });
- ids.push(0);
+ charCodeToGlyphId[0] = 0;
}
// Converting glyphs and ids into font's cmap table
- tables.cmap.data = createCmapTable(glyphs, ids);
- var unicodeIsEnabled = [];
- for (var i = 0, ii = glyphs.length; i < ii; i++) {
- unicodeIsEnabled[glyphs[i].unicode] = true;
- }
- this.unicodeIsEnabled = unicodeIsEnabled;
+ var newMapping = adjustMapping(charCodeToGlyphId, properties);
+ this.toFontChar = newMapping.toFontChar;
+ tables.cmap = {
+ tag: 'cmap',
+ data: createCmapTable(newMapping.charCodeToGlyphId)
+ };
if (!tables['OS/2'] || !validateOS2Table(tables['OS/2'])) {
// extract some more font properties from the OpenType head and
@@ -4138,7 +3896,9 @@ var Font = (function FontClosure() {
tables['OS/2'] = {
tag: 'OS/2',
- data: stringToArray(createOS2Table(properties, glyphs, override))
+ data: stringToArray(createOS2Table(properties,
+ newMapping.charCodeToGlyphId,
+ override))
};
}
@@ -4222,63 +3982,49 @@ var Font = (function FontClosure() {
createOpenTypeHeader('\x4F\x54\x54\x4F', otf, 9);
- var charstrings = font.charstrings;
- properties.fixedPitch = isFixedPitch(charstrings);
+ properties.fixedPitch = false; //isFixedPitch(charstrings);
- var glyphNameMap = {};
- for (var i = 0; i < charstrings.length; ++i) {
- var charstring = charstrings[i];
- glyphNameMap[charstring.glyph] = charstring.unicode;
- }
- this.glyphNameMap = glyphNameMap;
+ var mapping = font.getGlyphMapping(properties);
+ var newMapping = adjustMapping(mapping, properties);
+ this.toFontChar = newMapping.toFontChar;
+ var numGlyphs = font.numGlyphs;
var seacs = font.seacs;
- if (SEAC_ANALYSIS_ENABLED && seacs) {
- var seacMap = [];
+ if (SEAC_ANALYSIS_ENABLED && seacs && seacs.length) {
var matrix = properties.fontMatrix || FONT_IDENTITY_MATRIX;
- for (var i = 0; i < charstrings.length; ++i) {
- var charstring = charstrings[i];
- var seac = seacs[charstring.gid];
- if (!seac) {
- continue;
- }
+ var charset = font.getCharset();
+ var charCodeToGlyphId = mapping;
+ var toFontChar = newMapping.toFontChar;
+ var seacs = font.seacs;
+ var seacMap = Object.create(null);
+ var glyphIdToCharCode = Object.create(null);
+ for (var charCode in charCodeToGlyphId) {
+ glyphIdToCharCode[charCodeToGlyphId[charCode]] = charCode | 0;
+ }
+ for (var glyphId in seacs) {
+ glyphId |= 0;
+ var seac = seacs[glyphId];
var baseGlyphName = Encodings.StandardEncoding[seac[2]];
- var baseUnicode = glyphNameMap[baseGlyphName];
var accentGlyphName = Encodings.StandardEncoding[seac[3]];
- var accentUnicode = glyphNameMap[accentGlyphName];
- if (!baseUnicode || !accentUnicode) {
+ var baseGlyphId = charset.indexOf(baseGlyphName);
+ var accentGlyphId = charset.indexOf(accentGlyphName);
+ if (baseGlyphId < 0 || accentGlyphId < 0) {
continue;
}
var accentOffset = {
x: seac[0] * matrix[0] + seac[1] * matrix[2] + matrix[4],
y: seac[0] * matrix[1] + seac[1] * matrix[3] + matrix[5]
};
- seacMap[charstring.unicode] = {
- baseUnicode: baseUnicode,
- accentUnicode: accentUnicode,
+ var charCode = glyphIdToCharCode[glyphId];
+ seacMap[charCode] = {
+ baseFontCharCode: toFontChar[glyphIdToCharCode[baseGlyphId]],
+ accentFontCharCode: toFontChar[glyphIdToCharCode[accentGlyphId]],
accentOffset: accentOffset
};
}
properties.seacMap = seacMap;
}
- if (properties.overridableEncoding && (properties.subtype == 'Type1C' ||
- properties.subtype == 'CIDFontType0C')) {
- var encoding = [];
- for (var i = 0; i < charstrings.length; ++i) {
- var charstring = charstrings[i];
- encoding[charstring.code] = charstring.glyph;
- }
- properties.baseEncoding = encoding;
- }
- if (properties.subtype == 'CIDFontType0C') {
- var toFontChar = [];
- for (var i = 0; i < charstrings.length; ++i) {
- var charstring = charstrings[i];
- toFontChar[charstring.code] = charstring.unicode;
- }
- this.toFontChar = toFontChar;
- }
var unitsPerEm = 1 / (properties.fontMatrix || FONT_IDENTITY_MATRIX)[0];
var fields = {
@@ -4286,11 +4032,11 @@ var Font = (function FontClosure() {
'CFF ': font.data,
// OS/2 and Windows Specific metrics
- 'OS/2': stringToArray(createOS2Table(properties, charstrings)),
+ 'OS/2': stringToArray(createOS2Table(properties,
+ newMapping.charCodeToGlyphId)),
// Character to glyphs mapping
- 'cmap': createCmapTable(charstrings.slice(),
- ('glyphIds' in font) ? font.glyphIds : null),
+ 'cmap': createCmapTable(newMapping.charCodeToGlyphId),
// Font header
'head': (function fontFieldsHead() {
@@ -4334,14 +4080,17 @@ var Font = (function FontClosure() {
'\x00\x00' + // -reserved-
'\x00\x00' + // -reserved-
'\x00\x00' + // metricDataFormat
- string16(charstrings.length + 1)); // Number of HMetrics
+ string16(numGlyphs + 1)); // Number of HMetrics
})(),
// Horizontal metrics
'hmtx': (function fontFieldsHmtx() {
+ var charstrings = font.charstrings;
var hmtx = '\x00\x00\x00\x00'; // Fake .notdef
- for (var i = 0, ii = charstrings.length; i < ii; i++) {
- var charstring = charstrings[i];
+ for (var i = 0, ii = numGlyphs; i < ii; i++) {
+ // TODO: For CFF fonts the width should technically match th x in
+ // the glyph, but it doesn't seem to matter.
+ var charstring = charstrings ? charstrings[i] : {};
var width = 'width' in charstring ? charstring.width : 0;
hmtx += string16(width) + string16(0);
}
@@ -4352,7 +4101,7 @@ var Font = (function FontClosure() {
'maxp': (function fontFieldsMaxp() {
return stringToArray(
'\x00\x00\x50\x00' + // Version number
- string16(charstrings.length + 1)); // Num of glyphs
+ string16(numGlyphs + 1)); // Num of glyphs
})(),
// Naming tables
@@ -4373,124 +4122,103 @@ var Font = (function FontClosure() {
return stringToArray(otf.file);
},
- buildToFontChar: function Font_buildToFontChar(toUnicode) {
- var result = [];
- var unusedUnicode = CMAP_GLYPH_OFFSET;
- for (var i = 0, ii = toUnicode.length; i < ii; i++) {
- var unicode = toUnicode[i];
- var fontCharCode = typeof unicode === 'object' ? unusedUnicode++ :
- unicode;
- if (typeof unicode !== 'undefined') {
- if (isString(fontCharCode) && fontCharCode.length === 1) {
- fontCharCode = fontCharCode.charCodeAt(0);
- }
- result[i] = fontCharCode;
- }
+ /**
+ * Builds a char code to unicode map based on section 9.10 of the spec.
+ * @param {Object} properties Font properties object.
+ * @return {Object} Has two properties: 'toUnicode' which maps char codes to
+ * unicode (string) values and 'isIdentity' which is true if an identity map
+ * is used.
+ */
+ buildToUnicode: function Font_buildToUnicode(properties) {
+ var map = {
+ isIdentity: false,
+ toUnicode: null
+ };
+ // Section 9.10.2 Mapping Character Codes to Unicode Values
+ if (properties.toUnicode) {
+ map.toUnicode = properties.toUnicode;
+ return map;
}
- return result;
- },
-
- rebuildToUnicode: function Font_rebuildToUnicode(properties) {
- var firstChar = properties.firstChar, lastChar = properties.lastChar;
- var map = [];
- var toUnicode = this.toUnicode || this.cidToUnicode;
- if (toUnicode) {
- var isIdentityMap = toUnicode.length === 0;
- for (var i = firstChar, ii = lastChar; i <= ii; i++) {
- // TODO missing map the character according font's CMap
- map[i] = isIdentityMap ? String.fromCharCode(i) : toUnicode[i];
+ // According to the spec if the font is a simple font we should only map
+ // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
+ // the differences array only contains adobe standard or symbol set names,
+ // in pratice it seems better to always try to create a toUnicode
+ // map based of the default encoding.
+ if (!properties.composite /* is simple font */) {
+ var toUnicode = [];
+ var encoding = properties.defaultEncoding.slice();
+ // Merge in the differences array.
+ var differences = properties.differences;
+ for (var charcode in differences) {
+ encoding[charcode] = differences[charcode];
}
- } else {
- for (var i = firstChar, ii = lastChar; i <= ii; i++) {
- var glyph = properties.differences[i];
- if (!glyph)
- glyph = properties.baseEncoding[i];
- if (!!glyph && (glyph in GlyphsUnicode))
- map[i] = String.fromCharCode(GlyphsUnicode[glyph]);
- }
- }
- this.toUnicode = map;
- },
-
- loadCidToUnicode: function Font_loadCidToUnicode(properties) {
- if (!properties.cidSystemInfo)
- return;
-
- var cidToUnicodeMap = [], unicodeToCIDMap = [];
- this.cidToUnicode = cidToUnicodeMap;
- this.unicodeToCID = unicodeToCIDMap;
-
- var cidEncoding = properties.cidEncoding;
- if (properties.toUnicode) {
- if (cidEncoding && cidEncoding.indexOf('Identity-') !== 0) {
- warn('Need to create a reverse mapping from \'ToUnicode\' CMap');
+ for (var charcode in encoding) {
+ // a) Map the character code to a character name.
+ var glyphName = encoding[charcode];
+ // b) Look up the character name in the Adobe Glyph List (see the
+ // Bibliography) to obtain the corresponding Unicode value.
+ if (glyphName === '' || !(glyphName in GlyphsUnicode)) {
+ continue;
+ }
+ toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]);
}
- return; // 'ToUnicode' CMap will be used
+ map.toUnicode = toUnicode;
+ return map;
}
-
- var cidSystemInfo = properties.cidSystemInfo;
- var cidToUnicode;
- if (cidSystemInfo) {
- cidToUnicode = CIDToUnicodeMaps[
- cidSystemInfo.registry + '-' + cidSystemInfo.ordering];
- }
-
- if (!cidToUnicode)
- return; // identity encoding
-
- var overwrite = HalfwidthCMaps[cidEncoding];
- var cid = 1, i, j, k, ii;
- for (i = 0, ii = cidToUnicode.length; i < ii; ++i) {
- var unicode = cidToUnicode[i];
- if (isArray(unicode)) {
- var length = unicode.length;
- for (j = 0; j < length; j++) {
- cidToUnicodeMap[cid] = k = unicode[j];
- if (!unicodeToCIDMap[k] || overwrite) {
- unicodeToCIDMap[k] = cid;
- }
- }
- cid++;
- } else if (typeof unicode === 'object') {
- var fillLength = unicode.f;
- if (fillLength) {
- k = unicode.c;
- for (j = 0; j < fillLength; ++j) {
- cidToUnicodeMap[cid] = k;
- if (!unicodeToCIDMap[k] || overwrite) {
- unicodeToCIDMap[k] = cid;
- }
- cid++;
- k++;
- }
- } else
- cid += unicode.s;
- } else if (unicode) {
- cidToUnicodeMap[cid] = unicode;
- if (!unicodeToCIDMap[unicode] || overwrite) {
- unicodeToCIDMap[unicode] = cid;
+ // If the font is a composite font that uses one of the predefined CMaps
+ // listed in Table 118 (except Identity–H and Identity–V) or whose
+ // descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
+ // Adobe-Korea1 character collection:
+ if (properties.composite && (
+ (properties.cMap.builtInCMap &&
+ !(properties.cMap instanceof IdentityCMap)) ||
+ (properties.cidSystemInfo.registry === 'Adobe' &&
+ (properties.cidSystemInfo.ordering === 'GB1' ||
+ properties.cidSystemInfo.ordering === 'CNS1' ||
+ properties.cidSystemInfo.ordering === 'Japan1' ||
+ properties.cidSystemInfo.ordering === 'Korea1')))) {
+ // Then:
+ // a) Map the character code to a character identifier (CID) according
+ // to the font’s CMap.
+ // b) Obtain the registry and ordering of the character collection used
+ // by the font’s CMap (for example, Adobe and Japan1) from its
+ // CIDSystemInfo dictionary.
+ var registry = properties.cidSystemInfo.registry;
+ var ordering = properties.cidSystemInfo.ordering;
+ // c) Construct a second CMap name by concatenating the registry and
+ // ordering obtained in step (b) in the format registry–ordering–UCS2
+ // (for example, Adobe–Japan1–UCS2).
+ var ucs2CMapName = new Name(registry + '-' + ordering + '-UCS2');
+ // d) Obtain the CMap with the name constructed in step (c) (available
+ // from the ASN Web site; see the Bibliography).
+ var ucs2CMap = CMapFactory.create(ucs2CMapName, PDFJS.cMapUrl, null);
+ var cMap = properties.cMap;
+ var toUnicode = [];
+ for (var charcode in cMap.map) {
+ var cid = cMap.map[charcode];
+ assert(cid.length === 1, 'Max size of CID is 65,535');
+ // e) Map the CID obtained in step (a) according to the CMap obtained
+ // in step (d), producing a Unicode value.
+ var ucs2 = ucs2CMap.map[cid.charCodeAt(0)];
+ if (!ucs2) {
+ continue;
}
- cid++;
- } else
- cid++;
+ toUnicode[charcode] = String.fromCharCode((ucs2.charCodeAt(0) << 8) +
+ ucs2.charCodeAt(1));
+ }
+ map.toUnicode = toUnicode;
+ return map;
}
- if (!cidEncoding) {
- return;
- }
- if (cidEncoding.indexOf('Identity-') !== 0) {
- // input is already Unicode for non-Identity CMap encodings.
- this.cidToUnicode = [];
- // For CIDFontType2, however, we need cid-to-Unicode conversion
- // to rebuild cmap.
- if (properties.type == 'CIDFontType2') {
- this.cidToFontChar = cidToUnicodeMap;
- }
- } else {
- // We don't have to do reverse conversions if the string is
- // already CID.
- this.unicodeToCID = [];
+ // The viewer's choice, just use an identity map.
+ var toUnicode = [];
+ var firstChar = properties.firstChar, lastChar = properties.lastChar;
+ for (var i = firstChar, ii = lastChar; i <= ii; i++) {
+ toUnicode[i] = String.fromCharCode(i);
}
+ map.isIdentity = true;
+ map.toUnicode = toUnicode;
+ return map;
},
get spaceWidth() {
@@ -4511,8 +4239,11 @@ var Font = (function FontClosure() {
var glyphUnicode = GlyphsUnicode[glyphName];
// finding the charcode via unicodeToCID map
var charcode = 0;
- if (this.composite)
- charcode = this.unicodeToCID[glyphUnicode];
+ if (this.composite) {
+ if (glyphUnicode in this.cMap.map) {
+ charcode = this.cMap.lookup(glyphUnicode).charCodeAt(0);
+ }
+ }
// ... via toUnicode map
if (!charcode && 'toUnicode' in this)
charcode = this.toUnicode.indexOf(glyphUnicode);
@@ -4532,99 +4263,39 @@ var Font = (function FontClosure() {
},
charToGlyph: function Font_charToGlyph(charcode) {
- var fontCharCode, width, operatorList, disabled;
-
- var width = this.widths[charcode];
- var vmetric = this.vmetrics && this.vmetrics[charcode];
-
- switch (this.type) {
- case 'CIDFontType0':
- var cid = this.unicodeToCID[charcode] || charcode;
- if (this.unicodeToCID.length > 0) {
- width = this.widths[cid];
- vmetric = this.vmetrics && this.vmetrics[cid];
- }
- if (this.noUnicodeAdaptation) {
- fontCharCode = this.toFontChar[charcode] || charcode;
- break;
- }
- // CIDFontType0 is not encoded in Unicode.
- fontCharCode = this.toFontChar[cid] || cid;
- break;
- case 'CIDFontType2':
- if (this.unicodeToCID.length > 0) {
- var cid = this.unicodeToCID[charcode] || charcode;
- width = this.widths[cid];
- vmetric = this.vmetrics && this.vmetrics[cid];
- fontCharCode = charcode;
- break;
- }
- fontCharCode = this.toFontChar[charcode] || charcode;
- break;
- case 'MMType1': // XXX at the moment only "standard" fonts are supported
- case 'Type1':
- var glyphName = this.differences[charcode] || this.encoding[charcode];
- if (!isNum(width))
- width = this.widths[glyphName];
- if (this.noUnicodeAdaptation) {
- fontCharCode = mapPrivateUseChars(GlyphsUnicode[glyphName] ||
- charcode);
- break;
- }
- fontCharCode = this.glyphNameMap[glyphName] ||
- GlyphsUnicode[glyphName] || charcode;
- break;
- case 'Type3':
- var glyphName = this.differences[charcode] || this.encoding[charcode];
- operatorList = this.charProcOperatorList[glyphName];
- fontCharCode = charcode;
- break;
- case 'TrueType':
- if (this.useToFontChar) {
- fontCharCode = this.toFontChar[charcode] || charcode;
- break;
- }
- var glyphName = this.differences[charcode] || this.encoding[charcode];
- if (!glyphName)
- glyphName = Encodings.StandardEncoding[charcode];
- if (!isNum(width))
- width = this.widths[glyphName];
- if (this.noUnicodeAdaptation) {
- fontCharCode = GlyphsUnicode[glyphName] || charcode;
- break;
- }
- if (!this.hasEncoding || this.isSymbolicFont) {
- fontCharCode = this.useToFontChar ? this.toFontChar[charcode] :
- charcode;
- break;
- }
-
- // MacRoman encoding address by re-encoding the cmap table
+ var fontCharCode, width, operatorList;
- fontCharCode = glyphName in this.glyphNameMap ?
- this.glyphNameMap[glyphName] : GlyphsUnicode[glyphName];
- break;
- default:
- warn('Unsupported font type: ' + this.type);
- break;
+ var widthCode = charcode;
+ if (this.cMap && charcode in this.cMap.map) {
+ widthCode = this.cMap.map[charcode].charCodeAt(0);
}
+ var width = this.widths[widthCode];
+ width = isNum(width) ? width : this.defaultWidth;
+ var vmetric = this.vmetrics && this.vmetrics[widthCode];
- var unicodeChars = !('toUnicode' in this) ? charcode :
- this.toUnicode[charcode] || charcode;
+ var unicodeChars = this.toUnicode[charcode] || charcode;
if (typeof unicodeChars === 'number') {
unicodeChars = String.fromCharCode(unicodeChars);
}
- width = isNum(width) ? width : this.defaultWidth;
- disabled = this.unicodeIsEnabled ?
- !this.unicodeIsEnabled[fontCharCode] : false;
+ // First try the toFontChar map, if it's not there then try falling
+ // back to the char code.
+ fontCharCode = this.toFontChar[charcode] || charcode;
+ if (this.missingFile) {
+ fontCharCode = mapSpecialUnicodeValues(fontCharCode);
+ }
+
+ if (this.type === 'Type3') {
+ // Font char code in this case is actually a glyph name.
+ operatorList = this.charProcOperatorList[fontCharCode];
+ }
var accent = null;
- if (this.seacMap && this.seacMap[fontCharCode]) {
- var seac = this.seacMap[fontCharCode];
- fontCharCode = seac.baseUnicode;
+ if (this.seacMap && this.seacMap[charcode]) {
+ var seac = this.seacMap[charcode];
+ fontCharCode = seac.baseFontCharCode;
accent = {
- fontChar: String.fromCharCode(seac.accentUnicode),
+ fontChar: String.fromCharCode(seac.accentFontCharCode),
offset: seac.accentOffset
};
}
@@ -4635,7 +4306,6 @@ var Font = (function FontClosure() {
accent: accent,
width: width,
vmetric: vmetric,
- disabled: disabled,
operatorList: operatorList
};
},
@@ -4658,23 +4328,12 @@ var Font = (function FontClosure() {
glyphs = [];
var charsCacheKey = chars;
- var converter;
- var cidEncoding = this.cidEncoding;
- if (cidEncoding) {
- converter = CMapConverterList[cidEncoding];
- if (converter) {
- chars = converter(chars);
- } else if (cidEncoding.indexOf('Uni') !== 0 &&
- cidEncoding.indexOf('Identity-') !== 0) {
- warn('Unsupported CMap: ' + cidEncoding);
- }
- }
- if (!converter && this.cmap) {
+ if (this.cMap) {
var i = 0;
// composite fonts have multi-byte strings convert the string from
// single-byte to multi-byte
while (i < chars.length) {
- var c = this.cmap.readCharCode(chars, i);
+ var c = this.cMap.readCharCode(chars, i);
var charcode = c[0];
var length = c[1];
i += length;
@@ -4686,8 +4345,7 @@ var Font = (function FontClosure() {
glyphs.push(null);
}
}
- }
- else {
+ } else {
for (var i = 0, ii = chars.length; i < ii; ++i) {
var charcode = chars.charCodeAt(i);
var glyph = this.charToGlyph(charcode);
@@ -4722,6 +4380,59 @@ var ErrorFont = (function ErrorFontClosure() {
return ErrorFont;
})();
+/**
+ * Shared logic for building a char code to glyph id mapping for Type1 and
+ * simple CFF fonts. See section 9.6.6.2 of the spec.
+ * @param {Object} properties Font properties object.
+ * @param {Object} builtInEncoding The encoding contained within the actual font
+ * data.
+ * @param {Array} Array of glyph names where the index is the glyph ID.
+ * @returns {Object} A char code to glyph ID map.
+ */
+function type1FontGlyphMapping(properties, builtInEncoding, glyphNames) {
+ var charCodeToGlyphId = Object.create(null);
+ if (properties.baseEncodingName) {
+ // If a valid base encoding name was used, the mapping is initialized with
+ // that.
+ var baseEncoding = Encodings[properties.baseEncodingName];
+ for (var charCode = 0; charCode < baseEncoding.length; charCode++) {
+ var glyphId = glyphNames.indexOf(baseEncoding[charCode]);
+ if (glyphId >= 0) {
+ charCodeToGlyphId[charCode] = glyphId;
+ }
+ }
+ } else if (!!(properties.flags & FontFlags.Symbolic)) {
+ // For a symbolic font the encoding should be the fonts built-in
+ // encoding.
+ for (var charCode in builtInEncoding) {
+ charCodeToGlyphId[charCode] = builtInEncoding[charCode];
+ }
+ } else {
+ // For non-symbolic fonts that don't have a base encoding the standard
+ // encoding should be used.
+ var baseEncoding = Encodings.StandardEncoding;
+ for (var charCode = 0; charCode < baseEncoding.length; charCode++) {
+ var glyphId = glyphNames.indexOf(baseEncoding[charCode]);
+ if (glyphId >= 0) {
+ charCodeToGlyphId[charCode] = glyphId;
+ }
+ }
+ }
+
+ // Lastly, merge in the differences.
+ var differences = properties.differences;
+ if (differences) {
+ for (var charCode in differences) {
+ var glyphName = differences[charCode];
+ var glyphId = glyphNames.indexOf(glyphName);
+ if (glyphId >= 0) {
+ charCodeToGlyphId[charCode] = glyphId;
+ }
+ }
+ }
+ return charCodeToGlyphId;
+}
+
/*
* CharStrings are encoded following the the CharString Encoding sequence
* describe in Chapter 6 of the "Adobe Type1 Font Format" specification.
@@ -5290,11 +5001,11 @@ var Type1Parser = (function Type1ParserClosure() {
output = [14];
}
program.charstrings.push({
- glyph: glyph,
- data: output,
- seac: charString.seac,
+ glyphName: glyph,
+ charstring: output,
+ width: charString.width,
lsb: charString.lsb,
- width: charString.width
+ seac: charString.seac
});
}
@@ -5343,10 +5054,7 @@ var Type1Parser = (function Type1ParserClosure() {
this.getToken(); // read the in 'put'
}
}
- if (properties.overridableEncoding && encoding) {
- properties.baseEncoding = encoding;
- break;
- }
+ properties.builtInEncoding = encoding;
break;
case 'FontBBox':
var fontBBox = this.readNumberArray();
@@ -5469,7 +5177,7 @@ var Type1Font = function Type1Font(name, file, properties) {
for (var info in data.properties)
properties[info] = data.properties[info];
- var charstrings = this.getOrderedCharStrings(data.charstrings, properties);
+ var charstrings = data.charstrings;
var type2Charstrings = this.getType2Charstrings(charstrings);
var subrs = this.getType2Subrs(data.subrs);
@@ -5480,35 +5188,37 @@ var Type1Font = function Type1Font(name, file, properties) {
};
Type1Font.prototype = {
- getOrderedCharStrings: function Type1Font_getOrderedCharStrings(glyphs,
- properties) {
- var charstrings = [];
- var usedUnicodes = [];
- var i, length, glyphName;
- var unusedUnicode = CMAP_GLYPH_OFFSET;
- for (i = 0, length = glyphs.length; i < length; i++) {
- var item = glyphs[i];
- var glyphName = item.glyph;
- var unicode = glyphName in GlyphsUnicode ?
- GlyphsUnicode[glyphName] : unusedUnicode++;
- while (usedUnicodes[unicode]) {
- unicode = unusedUnicode++;
+ get numGlyphs() {
+ return this.charstrings.length;
+ },
+
+ getCharset: function Type1Font_getCharset() {
+ var charset = ['.notdef'];
+ var charstrings = this.charstrings;
+ for (var glyphId = 0; glyphId < charstrings.length; glyphId++) {
+ charset.push(charstrings[glyphId].glyphName);
+ }
+ return charset;
+ },
+
+ getGlyphMapping: function Type1Font_getGlyphMapping(properties) {
+ var charstrings = this.charstrings;
+ var glyphNames = ['.notdef'];
+ for (var glyphId = 0; glyphId < charstrings.length; glyphId++) {
+ glyphNames.push(charstrings[glyphId].glyphName);
+ }
+ var encoding = properties.builtInEncoding;
+ if (encoding) {
+ var builtInEncoding = {};
+ for (var charCode in encoding) {
+ var glyphId = glyphNames.indexOf(encoding[charCode]);
+ if (glyphId >= 0) {
+ builtInEncoding[charCode] = glyphId;
+ }
}
- usedUnicodes[unicode] = true;
- charstrings.push({
- glyph: glyphName,
- unicode: unicode,
- gid: i,
- charstring: item.data,
- width: item.width,
- lsb: item.lsb
- });
}
- charstrings.sort(function charstrings_sort(a, b) {
- return a.unicode - b.unicode;
- });
- return charstrings;
+ return type1FontGlyphMapping(properties, builtInEncoding, glyphNames);
},
getSeacs: function Type1Font_getSeacs(charstrings) {
@@ -5517,7 +5227,8 @@ Type1Font.prototype = {
for (i = 0, ii = charstrings.length; i < ii; i++) {
var charstring = charstrings[i];
if (charstring.seac) {
- seacMap[i] = charstring.seac;
+ // Offset by 1 for .notdef
+ seacMap[i + 1] = charstring.seac;
}
}
return seacMap;
@@ -5589,11 +5300,11 @@ Type1Font.prototype = {
var count = glyphs.length;
var charsetArray = [0];
for (var i = 0; i < count; i++) {
- var index = CFFStandardStrings.indexOf(charstrings[i].glyph);
- // Some characters like asterikmath && circlecopyrt are
- // missing from the original strings, for the moment let's
- // map them to .notdef and see later if it cause any
- // problems
+ var index = CFFStandardStrings.indexOf(charstrings[i].glyphName);
+ // TODO: Insert the string and correctly map it. Previously it was
+ // thought mapping names that aren't in the standard strings to .notdef
+ // was fine, however in issue818 when mapping them all to .notdef the
+ // adieresis glyph no longer worked.
if (index == -1)
index = 0;
@@ -5660,7 +5371,7 @@ var CFFFont = (function CFFFontClosure() {
var parser = new CFFParser(file, properties);
this.cff = parser.parse();
var compiler = new CFFCompiler(this.cff);
- this.readExtra();
+ this.seacs = this.cff.seacs;
try {
this.data = compiler.compile();
} catch (e) {
@@ -5672,111 +5383,38 @@ var CFFFont = (function CFFFontClosure() {
}
CFFFont.prototype = {
- readExtra: function CFFFont_readExtra() {
- // charstrings contains info about glyphs (one element per glyph
- // containing mappings for {unicode, width})
- var charstrings = this.getCharStrings();
-
- // create the mapping between charstring and glyph id
- var glyphIds = [];
- for (var i = 0, ii = charstrings.length; i < ii; i++)
- glyphIds.push(charstrings[i].gid);
-
- this.charstrings = charstrings;
- this.glyphIds = glyphIds;
- this.seacs = this.cff.seacs;
+ get numGlyphs() {
+ return this.cff.charStrings.count;
+ },
+ getCharset: function CFFFont_getCharset() {
+ return this.cff.charset.charset;
},
- getCharStrings: function CFFFont_getCharStrings() {
+ getGlyphMapping: function CFFFont_getGlyphMapping() {
var cff = this.cff;
var charsets = cff.charset.charset;
- var encoding = cff.encoding ? cff.encoding.encoding : null;
- var charstrings = [];
- var unicodeUsed = [];
- var unassignedUnicodeItems = [];
- var inverseEncoding = [];
- var gidStart = 0;
- if (charsets[0] === '.notdef') {
- gidStart = 1;
- }
- // According to section 9.7.4.2 CIDFontType0C glyph selection should be
- // handled differently.
- if (this.properties.subtype === 'CIDFontType0C') {
+ var charCodeToGlyphId = Object.create(null);
+
+ if (this.properties.composite) {
if (this.cff.isCIDFont) {
// If the font is actually a CID font then we should use the charset
// to map CIDs to GIDs.
- inverseEncoding = charsets;
+ for (var glyphId = 0; glyphId < charsets.length; glyphId++) {
+ var cidString = String.fromCharCode(charsets[glyphId]);
+ var charCode = this.properties.cMap.map.indexOf(cidString);
+ charCodeToGlyphId[charCode] = glyphId;
+ }
} else {
// If it is NOT actually a CID font then CIDs should be mapped
// directly to GIDs.
- inverseEncoding = [];
- for (var i = 0, ii = cff.charStrings.count; i < ii; i++) {
- inverseEncoding.push(i);
- }
- // Use the identity map for charsets as well.
- charsets = inverseEncoding;
- }
- } else {
- for (var charcode in encoding) {
- var gid = encoding[charcode];
- if (gid in inverseEncoding) {
- // Glyphs can be multiply-encoded if there was an encoding
- // supplement. Convert to an array and append the charcode.
- var previousCharcode = inverseEncoding[gid];
- if (!isArray(previousCharcode)) {
- inverseEncoding[gid] = [previousCharcode];
- }
- inverseEncoding[gid].push(charcode | 0);
- } else {
- inverseEncoding[gid] = charcode | 0;
+ for (var glyphId = 0; glyphId < cff.charStrings.count; glyphId++) {
+ charCodeToGlyphId[glyphId] = glyphId;
}
}
+ return charCodeToGlyphId;
}
- for (var i = gidStart, ii = charsets.length; i < ii; i++) {
- var glyph = charsets[i];
-
- var codes = inverseEncoding[i];
- if (!isArray(codes)) {
- codes = [codes];
- }
-
- for (var j = 0; j < codes.length; j++) {
- var code = codes[j];
-
- if (!code || isSpecialUnicode(code)) {
- unassignedUnicodeItems.push(i, code);
- continue;
- }
- charstrings.push({
- unicode: code,
- code: code,
- gid: i,
- glyph: glyph
- });
- unicodeUsed[code] = true;
- }
- }
-
- var nextUnusedUnicode = CMAP_GLYPH_OFFSET;
- for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j += 2) {
- var i = unassignedUnicodeItems[j];
- // giving unicode value anyway
- while (nextUnusedUnicode in unicodeUsed)
- nextUnusedUnicode++;
- var unicode = nextUnusedUnicode++;
- charstrings.push({
- unicode: unicode,
- code: unassignedUnicodeItems[j + 1] || 0,
- gid: i,
- glyph: charsets[i]
- });
- }
-
- // sort the array by the unicode value (again)
- charstrings.sort(function getCharStringsSort(a, b) {
- return a.unicode - b.unicode;
- });
- return charstrings;
+ var encoding = cff.encoding ? cff.encoding.encoding : null;
+ return type1FontGlyphMapping(this.properties, encoding, charsets);
}
};
@@ -7163,6 +6801,6 @@ var CFFCompiler = (function CFFCompilerClosure() {
// https://github.com/mozilla/pdf.js/issues/1689
(function checkChromeWindows() {
if (/Windows.*Chrome/.test(navigator.userAgent)) {
- SYMBOLIC_FONT_GLYPH_OFFSET = 0xF100;
+ SKIP_PRIVATE_USE_RANGE_F000_TO_F01F = true;
}
})();
diff --git a/src/core/worker.js b/src/core/worker.js
index b72d545..307e184 100644
--- a/src/core/worker.js
+++ b/src/core/worker.js
@@ -239,6 +239,8 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
PDFJS.disableFontFace = data.disableFontFace;
PDFJS.disableCreateObjectURL = data.disableCreateObjectURL;
PDFJS.verbosity = data.verbosity;
+ PDFJS.cMapUrl = data.cMapUrl === undefined ?
+ null : data.cMapUrl;
getPdfManager(data).then(function () {
pdfManager.onLoadedStream().then(function(stream) {
diff --git a/src/display/api.js b/src/display/api.js
index 74f0bec..ece5816 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -30,6 +30,13 @@
PDFJS.maxImageSize = PDFJS.maxImageSize === undefined ? -1 : PDFJS.maxImageSize;
/**
+ * The url of where the predefined Adobe CMaps are located. Include trailing
+ * slash.
+ * @var {string}
+ */
+PDFJS.cMapUrl = PDFJS.cMapUrl === undefined ? null : PDFJS.cMapUrl;
+
+/*
* By default fonts are converted to OpenType fonts and loaded via font face
* rules. If disabled, the font will be rendered using a built in font renderer
* that constructs the glyphs with primitive path commands.
@@ -917,6 +924,7 @@ var WorkerTransport = (function WorkerTransportClosure() {
source: source,
disableRange: PDFJS.disableRange,
maxImageSize: PDFJS.maxImageSize,
+ cMapUrl: PDFJS.cMapUrl,
disableFontFace: PDFJS.disableFontFace,
disableCreateObjectURL: PDFJS.disableCreateObjectURL,
verbosity: PDFJS.verbosity
diff --git a/src/display/canvas.js b/src/display/canvas.js
index 7381aa6..e1348a4 100644
--- a/src/display/canvas.js
+++ b/src/display/canvas.js
@@ -1383,38 +1383,37 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
var accent = glyph.accent;
var scaledX, scaledY, scaledAccentX, scaledAccentY;
- if (!glyph.disabled) {
- if (vertical) {
- scaledX = vx / fontSizeScale;
- scaledY = (x + vy) / fontSizeScale;
- } else {
- scaledX = x / fontSizeScale;
- scaledY = 0;
- }
- if (font.remeasure && width > 0 && this.isFontSubpixelAAEnabled) {
- // some standard fonts may not have the exact width, trying to
- // rescale per character
- var measuredWidth = ctx.measureText(character).width * 1000 /
- current.fontSize * current.fontSizeScale;
- var characterScaleX = width / measuredWidth;
- restoreNeeded = true;
- ctx.save();
- ctx.scale(characterScaleX, 1);
- scaledX /= characterScaleX;
- if (accent) {
- scaledAccentX /= characterScaleX;
- }
- }
+ if (vertical) {
+ scaledX = vx / fontSizeScale;
+ scaledY = (x + vy) / fontSizeScale;
+ } else {
+ scaledX = x / fontSizeScale;
+ scaledY = 0;
+ }
- this.paintChar(character, scaledX, scaledY);
+ if (font.remeasure && width > 0 && this.isFontSubpixelAAEnabled) {
+ // some standard fonts may not have the exact width, trying to
+ // rescale per character
+ var measuredWidth = ctx.measureText(character).width * 1000 /
+ current.fontSize * current.fontSizeScale;
+ var characterScaleX = width / measuredWidth;
+ restoreNeeded = true;
+ ctx.save();
+ ctx.scale(characterScaleX, 1);
+ scaledX /= characterScaleX;
if (accent) {
- scaledAccentX = scaledX + accent.offset.x / fontSizeScale;
- scaledAccentY = scaledY - accent.offset.y / fontSizeScale;
- this.paintChar(accent.fontChar, scaledAccentX, scaledAccentY);
+ scaledAccentX /= characterScaleX;
}
}
+ this.paintChar(character, scaledX, scaledY);
+ if (accent) {
+ scaledAccentX = scaledX + accent.offset.x / fontSizeScale;
+ scaledAccentY = scaledY - accent.offset.y / fontSizeScale;
+ this.paintChar(accent.fontChar, scaledAccentX, scaledAccentY);
+ }
+
x += charWidth;
canvasWidth += charWidth;
diff --git a/test/driver.js b/test/driver.js
index 3d69dc4..464eb2a 100644
--- a/test/driver.js
+++ b/test/driver.js
@@ -28,6 +28,7 @@
// "firefox-bin: Fatal IO error 12 (Cannot allocate memory) on X server :1."
// PDFJS.disableWorker = true;
PDFJS.enableStats = true;
+PDFJS.cMapUrl = '../external/cmaps/';
var appPath, masterMode, browser, canvas, dummyCanvas, currentTaskIdx,
manifest, stdout;
diff --git a/test/font/font_fpgm_spec.js b/test/font/font_fpgm_spec.js
index 088cfa0..c040899 100644
--- a/test/font/font_fpgm_spec.js
+++ b/test/font/font_fpgm_spec.js
@@ -9,7 +9,9 @@ describe('font_fpgm', function() {
var font = new Font("font", new Stream(font2324), {
loadedName: 'font',
type: 'CIDFontType2',
- differences: []
+ differences: [],
+ defaultEncoding: [],
+ cMap: CMapFactory.create(new Name('Identity-H'))
});
ttx(font.data, function(result) { output = result; });
runs(function() {
diff --git a/test/font/font_os2_spec.js b/test/font/font_os2_spec.js
index 320881b..70e87d0 100644
--- a/test/font/font_os2_spec.js
+++ b/test/font/font_os2_spec.js
@@ -11,7 +11,7 @@ describe('font_post', function() {
loadedName: 'font',
type: 'TrueType',
differences: [],
- baseEncoding: []
+ defaultEncoding: []
});
ttx(font.data, function(result) { output = result; });
runs(function() {
@@ -26,7 +26,9 @@ describe('font_post', function() {
var font = new Font("font", new Stream(font1282), {
loadedName: 'font',
type: 'CIDFontType2',
- differences: []
+ differences: [],
+ defaultEncoding: [],
+ cMap: CMapFactory.create(new Name('Identity-H'))
});
ttx(font.data, function(result) { output = result; });
runs(function() {
diff --git a/test/font/font_post_spec.js b/test/font/font_post_spec.js
index acd07b8..3de7209 100644
--- a/test/font/font_post_spec.js
+++ b/test/font/font_post_spec.js
@@ -12,7 +12,8 @@ describe('font_post', function() {
loadedName: 'font',
type: 'CIDFontType2',
differences: [],
- baseEncoding: []
+ defaultEncoding: [],
+ cMap: CMapFactory.create(new Name('Identity-H'))
});
ttx(font.data, function(result) { output = result; });
runs(function() {
@@ -28,7 +29,7 @@ describe('font_post', function() {
loadedName: 'font',
type: 'TrueType',
differences: [],
- baseEncoding: []
+ defaultEncoding: []
});
ttx(font.data, function(result) { output = result; });
runs(function() {
@@ -44,7 +45,7 @@ describe('font_post', function() {
loadedName: 'font',
type: 'TrueType',
differences: [],
- baseEncoding: []
+ defaultEncoding: []
});
ttx(font.data, function(result) { output = result; });
runs(function() {
diff --git a/test/font/font_test.html b/test/font/font_test.html
index 4fc43a7..12f1819 100644
--- a/test/font/font_test.html
+++ b/test/font/font_test.html
@@ -28,6 +28,7 @@
<script type="text/javascript" src="../../src/core/crypto.js"></script>
<script type="text/javascript" src="../../src/core/pattern.js"></script>
<script type="text/javascript" src="../../src/core/evaluator.js"></script>
+ <script type="text/javascript" src="../../src/core/cmap.js"></script>
<script type="text/javascript" src="../../src/core/fonts.js"></script>
<script type="text/javascript" src="../../src/core/glyphlist.js"></script>
<script type="text/javascript" src="../../src/core/image.js"></script>
diff --git a/test/test.py b/test/test.py
index 51f1d61..8ddbf13 100644
--- a/test/test.py
+++ b/test/test.py
@@ -220,6 +220,11 @@ class TestHandlerBase(BaseHTTPRequestHandler):
self.sendIndex(url.path, url.query)
return
+ pieces = path.split(os.sep);
+ if pieces[len(pieces) - 2] == 'cmaps':
+ self.sendFile(path, '.properties');
+ return
+
if not (prefix == DOC_ROOT
and os.path.isfile(path)
and ext in MIMEs):
diff --git a/test/unit/cmap_spec.js b/test/unit/cmap_spec.js
index be0e412..6d8350b 100644
--- a/test/unit/cmap_spec.js
+++ b/test/unit/cmap_spec.js
@@ -1,6 +1,6 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
-/* globals expect, it, describe, StringStream, Lexer, CMapFactory */
+/* globals expect, it, describe, StringStream, Lexer, CMapFactory, Name */
'use strict';
@@ -82,5 +82,22 @@ describe('cmap', function() {
expect(c[0]).toEqual(0x8EA1A1A1);
expect(c[1]).toEqual(4);
});
+ it('read usecmap', function() {
+ var str = '/Adobe-Japan1-1 usecmap\n';
+ var stream = new StringStream(str);
+ var cmap = CMapFactory.create(stream, null, '../../external/cmaps/');
+ expect(cmap.useCMap).toBeDefined();
+ });
+ it('parses wmode', function() {
+ var str = '/WMode 1 def\n';
+ var stream = new StringStream(str);
+ var cmap = CMapFactory.create(stream);
+ expect(cmap.vertical).toEqual(true);
+ });
+ it('loads built in cmap', function() {
+ var cmap = CMapFactory.create(new Name('Adobe-Japan1-1'),
+ '../../external/cmaps/',
+ null);
+ });
});
diff --git a/test/unit/font_spec.js b/test/unit/font_spec.js
index 4efccb9..aa50cc5 100644
--- a/test/unit/font_spec.js
+++ b/test/unit/font_spec.js
@@ -386,7 +386,7 @@ describe('font', function() {
var parser = new Type1Parser(stream);
var props = { overridableEncoding: true };
var program = parser.extractFontHeader(props);
- expect(props.baseEncoding[33]).toEqual('arrowright');
+ expect(props.builtInEncoding[33]).toEqual('arrowright');
});
});
});
diff --git a/web/viewer.js b/web/viewer.js
index a7b4d25..a1dc43a 100644
--- a/web/viewer.js
+++ b/web/viewer.js
@@ -62,6 +62,11 @@ PDFJS.imageResourcesPath = './images/';
//#if (FIREFOX || MOZCENTRAL || B2G || GENERIC || CHROME)
//PDFJS.workerSrc = '../build/pdf.worker.js';
//#endif
+//#if !PRODUCTION
+PDFJS.cMapUrl = '../external/cmaps/';
+//#else
+//PDFJS.cMapUrl = '../web/cmaps/';
+//#endif
var mozL10n = document.mozL10n || document.webL10n;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git
More information about the Pkg-javascript-commits
mailing list