[Pkg-javascript-commits] [pdf.js] 03/161: Use built in CMaps and unify the glyph mapping.

David Prévot taffit at moszumanska.debian.org
Sat Apr 19 14:16:15 UTC 2014


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit b5b94a4af389ef387570cf22662d9fc6bd41417a
Author: Brendan Dahl <brendan.dahl at gmail.com>
Date:   Tue Feb 11 10:27:09 2014 -0800

    Use built in CMaps and unify the glyph mapping.
---
 make.js                     |    5 +
 src/core/cmap.js            |  433 ++++++++------
 src/core/evaluator.js       |  118 ++--
 src/core/fonts.js           | 1346 ++++++++++++++++---------------------------
 src/core/worker.js          |    2 +
 src/display/api.js          |    8 +
 src/display/canvas.js       |   51 +-
 test/driver.js              |    1 +
 test/font/font_fpgm_spec.js |    4 +-
 test/font/font_os2_spec.js  |    6 +-
 test/font/font_post_spec.js |    7 +-
 test/font/font_test.html    |    1 +
 test/test.py                |    5 +
 test/unit/cmap_spec.js      |   19 +-
 test/unit/font_spec.js      |    2 +-
 web/viewer.js               |    5 +
 16 files changed, 887 insertions(+), 1126 deletions(-)

diff --git a/make.js b/make.js
index eee9016..a37d378 100644
--- a/make.js
+++ b/make.js
@@ -105,6 +105,7 @@ target.generic = function() {
     copy: [
       [COMMON_WEB_FILES, GENERIC_DIR + '/web'],
       ['external/webL10n/l10n.js', GENERIC_DIR + '/web'],
+      ['external/cmaps/', GENERIC_DIR + '/web/cmaps'],
       ['web/viewer.css', GENERIC_DIR + '/web'],
       ['web/compatibility.js', GENERIC_DIR + '/web'],
       ['web/compressed.tracemonkey-pldi-09.pdf', GENERIC_DIR + '/web'],
@@ -489,6 +490,7 @@ target.firefox = function() {
     defines: defines,
     copy: [
       [COMMON_WEB_FILES, FIREFOX_BUILD_CONTENT_DIR + '/web'],
+      ['external/cmaps/', FIREFOX_BUILD_CONTENT_DIR + '/web/cmaps'],
       [FIREFOX_EXTENSION_DIR + 'tools/l10n.js',
        FIREFOX_BUILD_CONTENT_DIR + '/web'],
       ['web/default_preferences.js', FIREFOX_BUILD_CONTENT_DIR]
@@ -604,6 +606,7 @@ target.mozcentral = function() {
     defines: defines,
     copy: [
       [COMMON_WEB_FILES, MOZCENTRAL_CONTENT_DIR + '/web'],
+      ['external/cmaps/', MOZCENTRAL_CONTENT_DIR + '/web/cmaps'],
       ['extensions/firefox/tools/l10n.js', MOZCENTRAL_CONTENT_DIR + '/web'],
       ['web/default_preferences.js', MOZCENTRAL_CONTENT_DIR]
     ],
@@ -673,6 +676,7 @@ target.b2g = function() {
   var setup = {
     defines: defines,
     copy: [
+      ['external/cmaps/', B2G_BUILD_CONTENT_DIR + '/web/cmaps'],
       ['extensions/b2g/images', B2G_BUILD_CONTENT_DIR + '/web'],
       ['extensions/b2g/viewer.html', B2G_BUILD_CONTENT_DIR + '/web'],
       ['extensions/b2g/viewer.css', B2G_BUILD_CONTENT_DIR + '/web'],
@@ -713,6 +717,7 @@ target.chromium = function() {
   var setup = {
     defines: defines,
     copy: [
+      ['external/cmaps/', CHROME_BUILD_CONTENT_DIR + '/web/cmaps'],
       [COMMON_WEB_FILES, CHROME_BUILD_CONTENT_DIR + '/web'],
       [['extensions/chromium/*.json',
         'extensions/chromium/*.html',
diff --git a/src/core/cmap.js b/src/core/cmap.js
index aa76128..2b45dad 100644
--- a/src/core/cmap.js
+++ b/src/core/cmap.js
@@ -15,184 +15,199 @@
  * limitations under the License.
  */
 /* globals Util, isString, isInt, warn, error, isCmd, isEOF, isName, Lexer,
-           isStream */
+           isStream, StringStream */
 
 'use strict';
 
-var CMAP_CODESPACES = {
-  'Adobe-CNS1-0': [[], [0, 14335]],
-  'Adobe-CNS1-1': [[], [0, 17407]],
-  'Adobe-CNS1-2': [[], [0, 17663]],
-  'Adobe-CNS1-3': [[], [0, 18943]],
-  'Adobe-CNS1-4': [[], [0, 19199]],
-  'Adobe-CNS1-5': [[], [0, 19199]],
-  'Adobe-CNS1-6': [[], [0, 19199]],
-  'Adobe-CNS1-UCS2': [[], [0, 65535]],
-  'B5-H': [[0, 128], [41280, 65278]],
-  'B5-V': [[0, 128], [41280, 65278]],
-  'B5pc-H': [[0, 128, 253, 255], [41280, 64766]],
-  'B5pc-V': [[0, 128, 253, 255], [41280, 64766]],
-  'CNS-EUC-H': [[0, 128], [41377, 65278], [],
-    [2392957345, 2392981246, 2393022881, 2393046782, 2393088417, 2393112318]],
-  'CNS-EUC-V': [[0, 128], [41377, 65278], [],
-    [2392957345, 2392981246, 2393022881, 2393046782, 2393088417, 2393112318]],
-  'CNS1-H': [[], [8481, 32382]],
-  'CNS1-V': [[], [8481, 32382]],
-  'CNS2-H': [[], [8481, 32382]],
-  'CNS2-V': [[], [8481, 32382]],
-  'ETen-B5-H': [[0, 128], [41280, 65278]],
-  'ETen-B5-V': [[0, 128], [41280, 65278]],
-  'ETenms-B5-H': [[0, 128], [41280, 65278]],
-  'ETenms-B5-V': [[0, 128], [41280, 65278]],
-  'ETHK-B5-H': [[0, 128], [34624, 65278]],
-  'ETHK-B5-V': [[0, 128], [34624, 65278]],
-  'HKdla-B5-H': [[0, 128], [41280, 65278]],
-  'HKdla-B5-V': [[0, 128], [41280, 65278]],
-  'HKdlb-B5-H': [[0, 128], [36416, 65278]],
-  'HKdlb-B5-V': [[0, 128], [36416, 65278]],
-  'HKgccs-B5-H': [[0, 128], [35392, 65278]],
-  'HKgccs-B5-V': [[0, 128], [35392, 65278]],
-  'HKm314-B5-H': [[0, 128], [41280, 65278]],
-  'HKm314-B5-V': [[0, 128], [41280, 65278]],
-  'HKm471-B5-H': [[0, 128], [41280, 65278]],
-  'HKm471-B5-V': [[0, 128], [41280, 65278]],
-  'HKscs-B5-H': [[0, 128], [34624, 65278]],
-  'HKscs-B5-V': [[0, 128], [34624, 65278]],
-  'UniCNS-UCS2-H': [[], [0, 55295, 57344, 65535]],
-  'UniCNS-UCS2-V': [[], [0, 55295, 57344, 65535]],
-  'UniCNS-UTF16-H': [[], [0, 55295, 57344, 65535], [],
-    [3623934976, 3690979327]],
-  'UniCNS-UTF16-V': [[], [0, 55295, 57344, 65535], [],
-    [3623934976, 3690979327]],
-  'Adobe-GB1-0': [[], [0, 7935]],
-  'Adobe-GB1-1': [[], [0, 9983]],
-  'Adobe-GB1-2': [[], [0, 22271]],
-  'Adobe-GB1-3': [[], [0, 22527]],
-  'Adobe-GB1-4': [[], [0, 29183]],
-  'Adobe-GB1-5': [[], [0, 30463]],
-  'Adobe-GB1-UCS2': [[], [0, 65535]],
-  'GB-EUC-H': [[0, 128], [41377, 65278]],
-  'GB-EUC-V': [[0, 128], [41377, 65278]],
-  'GB-H': [[], [8481, 32382]],
-  'GB-V': [[], [8481, 32382]],
-  'GBK-EUC-H': [[0, 128], [33088, 65278]],
-  'GBK-EUC-V': [[0, 128], [33088, 65278]],
-  'GBK2K-H': [[0, 127], [33088, 65278], [], [2167439664, 4265213497]],
-  'GBK2K-V': [[0, 127], [33088, 65278], [], [2167439664, 4265213497]],
-  'GBKp-EUC-H': [[0, 128], [33088, 65278]],
-  'GBKp-EUC-V': [[0, 128], [33088, 65278]],
-  'GBpc-EUC-H': [[0, 128, 253, 255], [41377, 64766]],
-  'GBpc-EUC-V': [[0, 128, 253, 255], [41377, 64766]],
-  'GBT-EUC-H': [[0, 128], [41377, 65278]],
-  'GBT-EUC-V': [[0, 128], [41377, 65278]],
-  'GBT-H': [[], [8481, 32382]],
-  'GBT-V': [[], [8481, 32382]],
-  'GBTpc-EUC-H': [[0, 128, 253, 255], [41377, 64766]],
-  'GBTpc-EUC-V': [[0, 128, 253, 255], [41377, 64766]],
-  'UniGB-UCS2-H': [[], [0, 55295, 57344, 65535]],
-  'UniGB-UCS2-V': [[], [0, 55295, 57344, 65535]],
-  'UniGB-UTF16-H': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]],
-  'UniGB-UTF16-V': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]],
-  '78-EUC-H': [[0, 128], [36512, 36575, 41377, 65278]],
-  '78-EUC-V': [[0, 128], [36512, 36575, 41377, 65278]],
-  '78-H': [[], [8481, 32382]],
-  '78-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  '78-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  '78-V': [[], [8481, 32382]],
-  '78ms-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  '78ms-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  '83pv-RKSJ-H': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]],
-  '90ms-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  '90ms-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  '90msp-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  '90msp-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  '90pv-RKSJ-H': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]],
-  '90pv-RKSJ-V': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]],
-  'Add-H': [[], [8481, 32382]],
-  'Add-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  'Add-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  'Add-V': [[], [8481, 32382]],
-  'Adobe-Japan1-0': [[], [0, 8447]],
-  'Adobe-Japan1-1': [[], [0, 8447]],
-  'Adobe-Japan1-2': [[], [0, 8959]],
-  'Adobe-Japan1-3': [[], [0, 9471]],
-  'Adobe-Japan1-4': [[], [0, 15615]],
-  'Adobe-Japan1-5': [[], [0, 20479]],
-  'Adobe-Japan1-6': [[], [0, 23295]],
-  'Adobe-Japan1-UCS2': [[], [0, 65535]],
-  'Adobe-Japan2-0': [[], [0, 6143]],
-  'EUC-H': [[0, 128], [36512, 36575, 41377, 65278]],
-  'EUC-V': [[0, 128], [36512, 36575, 41377, 65278]],
-  'Ext-H': [[], [8481, 32382]],
-  'Ext-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  'Ext-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  'Ext-V': [[], [8481, 32382]],
-  'H': [[], [8481, 32382]],
-  'Hankaku': [[0, 255], []],
-  'Hiragana': [[0, 255], []],
-  'Hojo-EUC-H': [[], [], [9413025, 9436926], []],
-  'Hojo-EUC-V': [[], [], [9413025, 9436926], []],
-  'Hojo-H': [[], [8481, 32382]],
-  'Hojo-V': [[], [8481, 32382]],
-  'Katakana': [[0, 255], []],
-  'NWP-H': [[], [8481, 32382]],
-  'NWP-V': [[], [8481, 32382]],
-  'RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  'RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]],
-  'Roman': [[0, 255], []],
-  'UniHojo-UCS2-H': [[], [0, 55295, 57344, 65535]],
-  'UniHojo-UCS2-V': [[], [0, 55295, 57344, 65535]],
-  'UniHojo-UTF16-H': [[], [0, 55295, 57344, 65535], [],
-    [3623934976, 3690979327]],
-  'UniHojo-UTF16-V': [[], [0, 55295, 57344, 65535], [],
-    [3623934976, 3690979327]],
-  'UniJIS-UCS2-H': [[], [0, 55295, 57344, 65535]],
-  'UniJIS-UCS2-HW-H': [[], [0, 55295, 57344, 65535]],
-  'UniJIS-UCS2-HW-V': [[], [0, 55295, 57344, 65535]],
-  'UniJIS-UCS2-V': [[], [0, 55295, 57344, 65535]],
-  'UniJIS-UTF16-H': [[], [0, 55295, 57344, 65535], [],
-    [3623934976, 3690979327]],
-  'UniJIS-UTF16-V': [[], [0, 55295, 57344, 65535], [],
-    [3623934976, 3690979327]],
-  'UniJISPro-UCS2-HW-V': [[], [0, 55295, 57344, 65535]],
-  'UniJISPro-UCS2-V': [[], [0, 55295, 57344, 65535]],
-  'V': [[], [8481, 32382]],
-  'WP-Symbol': [[0, 255], []],
-  'Adobe-Korea1-0': [[], [0, 9471]],
-  'Adobe-Korea1-1': [[], [0, 18175]],
-  'Adobe-Korea1-2': [[], [0, 18431]],
-  'Adobe-Korea1-UCS2': [[], [0, 65535]],
-  'KSC-EUC-H': [[0, 128], [41377, 65278]],
-  'KSC-EUC-V': [[0, 128], [41377, 65278]],
-  'KSC-H': [[], [8481, 32382]],
-  'KSC-Johab-H': [[0, 128], [33857, 54270, 55345, 57086, 57393, 63998]],
-  'KSC-Johab-V': [[0, 128], [33857, 54270, 55345, 57086, 57393, 63998]],
-  'KSC-V': [[], [8481, 32382]],
-  'KSCms-UHC-H': [[0, 128], [33089, 65278]],
-  'KSCms-UHC-HW-H': [[0, 128], [33089, 65278]],
-  'KSCms-UHC-HW-V': [[0, 128], [33089, 65278]],
-  'KSCms-UHC-V': [[0, 128], [33089, 65278]],
-  'KSCpc-EUC-H': [[0, 132, 254, 255], [41281, 65022]],
-  'KSCpc-EUC-V': [[0, 132, 254, 255], [41281, 65022]],
-  'UniKS-UCS2-H': [[], [0, 55295, 57344, 65535]],
-  'UniKS-UCS2-V': [[], [0, 55295, 57344, 65535]],
-  'UniKS-UTF16-H': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]],
-  'UniKS-UTF16-V': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]]
-};
+var BUILT_IN_CMAPS = [
+// << Start unicode maps.
+'Adobe-GB1-UCS2',
+'Adobe-CNS1-UCS2',
+'Adobe-Japan1-UCS2',
+'Adobe-Korea1-UCS2',
+// >> End unicode maps.
+'78-EUC-H',
+'78-EUC-V',
+'78-H',
+'78-RKSJ-H',
+'78-RKSJ-V',
+'78-V',
+'78ms-RKSJ-H',
+'78ms-RKSJ-V',
+'83pv-RKSJ-H',
+'90ms-RKSJ-H',
+'90ms-RKSJ-V',
+'90msp-RKSJ-H',
+'90msp-RKSJ-V',
+'90pv-RKSJ-H',
+'90pv-RKSJ-V',
+'Add-H',
+'Add-RKSJ-H',
+'Add-RKSJ-V',
+'Add-V',
+'Adobe-CNS1-0',
+'Adobe-CNS1-1',
+'Adobe-CNS1-2',
+'Adobe-CNS1-3',
+'Adobe-CNS1-4',
+'Adobe-CNS1-5',
+'Adobe-CNS1-6',
+'Adobe-GB1-0',
+'Adobe-GB1-1',
+'Adobe-GB1-2',
+'Adobe-GB1-3',
+'Adobe-GB1-4',
+'Adobe-GB1-5',
+'Adobe-Japan1-0',
+'Adobe-Japan1-1',
+'Adobe-Japan1-2',
+'Adobe-Japan1-3',
+'Adobe-Japan1-4',
+'Adobe-Japan1-5',
+'Adobe-Japan1-6',
+'Adobe-Korea1-0',
+'Adobe-Korea1-1',
+'Adobe-Korea1-2',
+'B5-H',
+'B5-V',
+'B5pc-H',
+'B5pc-V',
+'CNS-EUC-H',
+'CNS-EUC-V',
+'CNS1-H',
+'CNS1-V',
+'CNS2-H',
+'CNS2-V',
+'ETHK-B5-H',
+'ETHK-B5-V',
+'ETen-B5-H',
+'ETen-B5-V',
+'ETenms-B5-H',
+'ETenms-B5-V',
+'EUC-H',
+'EUC-V',
+'Ext-H',
+'Ext-RKSJ-H',
+'Ext-RKSJ-V',
+'Ext-V',
+'GB-EUC-H',
+'GB-EUC-V',
+'GB-H',
+'GB-V',
+'GBK-EUC-H',
+'GBK-EUC-V',
+'GBK2K-H',
+'GBK2K-V',
+'GBKp-EUC-H',
+'GBKp-EUC-V',
+'GBT-EUC-H',
+'GBT-EUC-V',
+'GBT-H',
+'GBT-V',
+'GBTpc-EUC-H',
+'GBTpc-EUC-V',
+'GBpc-EUC-H',
+'GBpc-EUC-V',
+'H',
+'HKdla-B5-H',
+'HKdla-B5-V',
+'HKdlb-B5-H',
+'HKdlb-B5-V',
+'HKgccs-B5-H',
+'HKgccs-B5-V',
+'HKm314-B5-H',
+'HKm314-B5-V',
+'HKm471-B5-H',
+'HKm471-B5-V',
+'HKscs-B5-H',
+'HKscs-B5-V',
+'Hankaku',
+'Hiragana',
+'KSC-EUC-H',
+'KSC-EUC-V',
+'KSC-H',
+'KSC-Johab-H',
+'KSC-Johab-V',
+'KSC-V',
+'KSCms-UHC-H',
+'KSCms-UHC-HW-H',
+'KSCms-UHC-HW-V',
+'KSCms-UHC-V',
+'KSCpc-EUC-H',
+'KSCpc-EUC-V',
+'Katakana',
+'NWP-H',
+'NWP-V',
+'RKSJ-H',
+'RKSJ-V',
+'Roman',
+'UniCNS-UCS2-H',
+'UniCNS-UCS2-V',
+'UniCNS-UTF16-H',
+'UniCNS-UTF16-V',
+'UniCNS-UTF32-H',
+'UniCNS-UTF32-V',
+'UniCNS-UTF8-H',
+'UniCNS-UTF8-V',
+'UniGB-UCS2-H',
+'UniGB-UCS2-V',
+'UniGB-UTF16-H',
+'UniGB-UTF16-V',
+'UniGB-UTF32-H',
+'UniGB-UTF32-V',
+'UniGB-UTF8-H',
+'UniGB-UTF8-V',
+'UniJIS-UCS2-H',
+'UniJIS-UCS2-HW-H',
+'UniJIS-UCS2-HW-V',
+'UniJIS-UCS2-V',
+'UniJIS-UTF16-H',
+'UniJIS-UTF16-V',
+'UniJIS-UTF32-H',
+'UniJIS-UTF32-V',
+'UniJIS-UTF8-H',
+'UniJIS-UTF8-V',
+'UniJIS2004-UTF16-H',
+'UniJIS2004-UTF16-V',
+'UniJIS2004-UTF32-H',
+'UniJIS2004-UTF32-V',
+'UniJIS2004-UTF8-H',
+'UniJIS2004-UTF8-V',
+'UniJISPro-UCS2-HW-V',
+'UniJISPro-UCS2-V',
+'UniJISPro-UTF8-V',
+'UniJISX0213-UTF32-H',
+'UniJISX0213-UTF32-V',
+'UniJISX02132004-UTF32-H',
+'UniJISX02132004-UTF32-V',
+'UniKS-UCS2-H',
+'UniKS-UCS2-V',
+'UniKS-UTF16-H',
+'UniKS-UTF16-V',
+'UniKS-UTF32-H',
+'UniKS-UTF32-V',
+'UniKS-UTF8-H',
+'UniKS-UTF8-V',
+'V',
+'WP-Symbol'];
 
 // CMap, not to be confused with TrueType's cmap.
 var CMap = (function CMapClosure() {
-  function CMap() {
+  function CMap(builtInCMap) {
     // Codespace ranges are stored as follows:
     // [[1BytePairs], [2BytePairs], [3BytePairs], [4BytePairs]]
     // where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...]
     this.codespaceRanges = [[], [], [], []];
+    this.numCodespaceRanges = 0;
     this.map = [];
     this.vertical = false;
+    this.useCMap = null;
+    this.builtInCMap = builtInCMap;
   }
   CMap.prototype = {
     addCodespaceRange: function(n, low, high) {
       this.codespaceRanges[n - 1].push(low, high);
+      this.numCodespaceRanges++;
     },
 
     mapRange: function(low, high, dstLow) {
@@ -395,17 +410,33 @@ var CMapFactory = (function CMapFactoryClosure() {
     error('Invalid codespace range.');
   }
 
-  function parseCmap(cMap, lexer) {
+  function parseWMode(cMap, lexer) {
+    var obj = lexer.getObj();
+    if (isInt(obj)) {
+      cMap.vertical = !!obj;
+    }
+  }
+
+  function parseCMap(cMap, lexer, builtInCMapUrl, useCMap) {
+    var previous;
+    var embededUseCMap;
     objLoop: while (true) {
       var obj = lexer.getObj();
       if (isEOF(obj)) {
         break;
+      } else if (isName(obj)) {
+        if (obj.name === 'WMode') {
+          parseWMode(cMap, lexer);
+        }
+        previous = obj;
       } else if (isCmd(obj)) {
         switch (obj.cmd) {
-          case 'endcMap':
+          case 'endcmap':
             break objLoop;
-          case 'usecMap':
-            // TODO
+          case 'usecmap':
+            if (isName(previous)) {
+              embededUseCMap = previous.name;
+            }
             break;
           case 'begincodespacerange':
             parseCodespaceRange(cMap, lexer);
@@ -425,30 +456,66 @@ var CMapFactory = (function CMapFactoryClosure() {
         }
       }
     }
+
+    if (!useCMap && embededUseCMap) {
+      // Load the usecmap definition from the file only if there wasn't one
+      // specified.
+      useCMap = embededUseCMap;
+    }
+    if (useCMap) {
+      cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapUrl);
+      // If there aren't any code space ranges defined clone all the parent ones
+      // into this cMap.
+      if (cMap.numCodespaceRanges === 0) {
+        var useCodespaceRanges = cMap.useCMap.codespaceRanges;
+        for (var i = 0; i < useCodespaceRanges.length; i++) {
+          cMap.codespaceRanges[i] = useCodespaceRanges[i].slice();
+        }
+        cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
+      }
+      // Merge the map into the current one, making sure not to override
+      // any previously defined entries.
+      for (var key in cMap.useCMap.map) {
+        if (key in cMap.map) {
+          continue;
+        }
+        cMap.map[key] = cMap.useCMap.map[key];
+      }
+    }
+  }
+
+  function createBuiltInCMap(name, builtInCMapUrl) {
+    if (name === 'Identity-H') {
+      return new IdentityCMap(false, 2);
+    } else if (name === 'Identity-V') {
+      return new IdentityCMap(true, 2);
+    }
+    if (BUILT_IN_CMAPS.indexOf(name) === -1) {
+      error('Unknown cMap name: ' + name);
+    }
+
+    var request = new XMLHttpRequest();
+    var url = builtInCMapUrl + name;
+    request.open('GET', url, false);
+    request.send(null);
+    if (request.status === 0 && /^https?:/i.test(url)) {
+      error('Unable to get cMap at: ' + url);
+    }
+    var cMap = new CMap(true);
+    var lexer = new Lexer(new StringStream(request.responseText));
+    parseCMap(cMap, lexer, builtInCMapUrl, null);
+    return cMap;
   }
+
   return {
-    create: function (encoding) {
+    create: function (encoding, builtInCMapUrl, useCMap) {
       if (isName(encoding)) {
-        switch (encoding.name) {
-          case 'Identity-H':
-            return new IdentityCMap(false, 2);
-          case 'Identity-V':
-            return new IdentityCMap(true, 2);
-          default:
-            if (encoding.name in CMAP_CODESPACES) {
-              // XXX: Temporary hack so the correct amount of bytes are read in
-              // CMap.readCharCode.
-              var cMap = new CMap();
-              cMap.codespaceRanges = CMAP_CODESPACES[encoding.name];
-              return cMap;
-            }
-            return null;
-        }
+        return createBuiltInCMap(encoding.name, builtInCMapUrl);
       } else if (isStream(encoding)) {
         var cMap = new CMap();
         var lexer = new Lexer(encoding);
         try {
-          parseCmap(cMap, lexer);
+          parseCMap(cMap, lexer, builtInCMapUrl, useCMap);
         } catch (e) {
           warn('Invalid CMap data. ' + e);
         }
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index 2e85cbd..fd68580 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -828,41 +828,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       }
 
       // Based on 9.6.6 of the spec the encoding can come from multiple places
-      // but should be prioritized in the following order:
-      // 1. Encoding dictionary
-      // 2. Encoding within font file (Type1 or Type1C)
-      // 3. Default (depends on font type)
-      // Differences applied to the above.
-      // Note: we don't fill in the encoding from the font file(2) here but use
-      // the flag overridableEncoding to signal that the font can override the
-      // encoding if it has one built in.
-      var overridableEncoding = true;
-      var hasEncoding = false;
-      var flags = properties.flags;
+      // and depends on the font type. The base encoding and differences are
+      // read here, but the encoding that is actually used is chosen during
+      // glyph mapping in the font.
+      // TODO: Loading the built in encoding in the font would allow the
+      // differences to be merged in here not require us to hold on to it.
       var differences = [];
-      var baseEncoding = properties.type === 'TrueType' ?
-                          Encodings.WinAnsiEncoding :
-                          Encodings.StandardEncoding;
-      // The Symbolic attribute can be misused for regular fonts
-      // Heuristic: we have to check if the font is a standard one and has
-      // Symbolic font name
-      if (!!(flags & FontFlags.Symbolic)) {
-        baseEncoding = !properties.file && /Symbol/i.test(properties.name) ?
-          Encodings.SymbolSetEncoding : Encodings.MacRomanEncoding;
-      }
+      var baseEncodingName = null;
       if (dict.has('Encoding')) {
         var encoding = dict.get('Encoding');
         if (isDict(encoding)) {
-          var baseName = encoding.get('BaseEncoding');
-          if (baseName) {
-            overridableEncoding = false;
-            hasEncoding = true;
-            baseEncoding = Encodings[baseName.name];
-          }
-
+          baseEncodingName = encoding.get('BaseEncoding');
+          baseEncodingName = isName(baseEncodingName) ? baseEncodingName.name :
+            null;
           // Load the differences between the base and original
           if (encoding.has('Differences')) {
-            hasEncoding = true;
             var diffEncoding = encoding.get('Differences');
             var index = 0;
             for (var j = 0, jj = diffEncoding.length; j < jj; j++) {
@@ -874,38 +854,44 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
             }
           }
         } else if (isName(encoding)) {
-          overridableEncoding = false;
-          hasEncoding = true;
-          var currentEncoding = Encodings[encoding.name];
-
-          // Some bad PDF files contain fonts whose encoding name is not among
-          // the predefined encodings, causing baseEncoding to be undefined.
-          // In this case, fallback to using the baseEncoding as defined above
-          // and let the font override the encoding if one is available.
-          if (currentEncoding) {
-            baseEncoding = currentEncoding;
-          } else {
-            overridableEncoding = true;
-          }
+          baseEncodingName = encoding.name;
         } else {
           error('Encoding is not a Name nor a Dict');
         }
+        // According to table 114 if the encoding is a named encoding it must be
+        // one of these predefined encodings.
+        if ((baseEncodingName !== 'MacRomanEncoding' &&
+             baseEncodingName !== 'MacExpertEncoding' &&
+             baseEncodingName !== 'WinAnsiEncoding')) {
+          baseEncodingName = null;
+        }
+      }
+
+      if (baseEncodingName) {
+        properties.defaultEncoding = Encodings[baseEncodingName].slice();
+      } else {
+        var encoding = properties.type === 'TrueType' ?
+                Encodings.WinAnsiEncoding :
+                Encodings.StandardEncoding;
+        // The Symbolic attribute can be misused for regular fonts
+        // Heuristic: we have to check if the font is a standard one also
+        if (!!(properties.flags & FontFlags.Symbolic)) {
+          encoding = !properties.file && /Symbol/i.test(properties.name) ?
+            Encodings.SymbolSetEncoding : Encodings.MacRomanEncoding;
+        }
+        properties.defaultEncoding = encoding;
       }
 
       properties.differences = differences;
-      properties.baseEncoding = baseEncoding;
-      properties.hasEncoding = hasEncoding;
-      properties.overridableEncoding = overridableEncoding;
+      properties.baseEncodingName = baseEncodingName;
+      properties.dict = dict;
     },
 
-    readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref,
-                                                           properties) {
+    readToUnicode: function PartialEvaluator_readToUnicode(toUnicode) {
       var cmapObj = toUnicode;
       var charToUnicode = [];
       if (isName(cmapObj)) {
-        var isIdentityMap = cmapObj.name.substr(0, 9) == 'Identity-';
-        if (!isIdentityMap)
-          error('ToUnicode file cmap translation not implemented');
+        return CMapFactory.create(cmapObj).map;
       } else if (isStream(cmapObj)) {
         var cmap = CMapFactory.create(cmapObj).map;
         // Convert UTF-16BE
@@ -927,7 +913,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         });
         return cmap;
       }
-      return charToUnicode;
+      return null;
     },
     readCidToGidMap: function PartialEvaluator_readCidToGidMap(cidToGidStream) {
       // Extract the encoding from the CIDToGIDMap
@@ -1006,7 +992,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
           if (isName(baseFontName)) {
             var metrics = this.getBaseFontMetrics(baseFontName.name);
 
-            glyphsWidths = metrics.widths;
+            glyphsWidths = this.buildCharCodeToWidth(metrics.widths,
+                                                     properties);
             defaultWidth = metrics.defaultWidth;
           }
         }
@@ -1074,6 +1061,25 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       };
     },
 
+    buildCharCodeToWidth: function PartialEvaluator_bulildCharCodeToWidth(
+                            widthsByGlyphName, properties) {
+      var widths = Object.create(null);
+      var differences = properties.differences;
+      var encoding = properties.defaultEncoding;
+      for (var charCode = 0; charCode < 256; charCode++) {
+        if (charCode in differences &&
+            widthsByGlyphName[differences[charCode]]) {
+          widths[charCode] = widthsByGlyphName[differences[charCode]];
+          continue;
+        }
+        if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) {
+          widths[charCode] = widthsByGlyphName[encoding[charCode]];
+          continue;
+        }
+      }
+      return widths;
+    },
+
     translateFont: function PartialEvaluator_translateFont(dict,
                                                            xref) {
       var baseDict = dict;
@@ -1135,6 +1141,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
             lastChar: maxCharIndex
           };
           this.extractDataStructures(dict, dict, xref, properties);
+          properties.widths = this.buildCharCodeToWidth(metrics.widths,
+                                                        properties);
 
           return new Font(baseFontName, null, properties);
         }
@@ -1212,12 +1220,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         var cidEncoding = baseDict.get('Encoding');
         if (isName(cidEncoding)) {
           properties.cidEncoding = cidEncoding.name;
-          properties.vertical = /-V$/.test(cidEncoding.name);
         }
-        properties.cmap = CMapFactory.create(cidEncoding);
+        properties.cMap = CMapFactory.create(cidEncoding, PDFJS.cMapUrl, null);
+        properties.vertical = properties.cMap.vertical;
       }
-      this.extractWidths(dict, xref, descriptor, properties);
       this.extractDataStructures(dict, baseDict, xref, properties);
+      this.extractWidths(dict, xref, descriptor, properties);
 
       if (type.name === 'Type3') {
         properties.coded = true;
diff --git a/src/core/fonts.js b/src/core/fonts.js
index 1d92856..e6bf1f6 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -18,14 +18,15 @@
            ExpertSubsetCharset, FileReaderSync, GlyphsUnicode,
            info, isArray, isNum, ISOAdobeCharset, Stream,
            stringToBytes, TextDecoder, warn, Lexer, Util,
-           FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString */
+           FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString,
+           IdentityCMap, Name, CMapFactory, PDFJS */
 
 'use strict';
 
 // Unicode Private Use Area
-var CMAP_GLYPH_OFFSET = 0xE000;
-var GLYPH_AREA_SIZE = 0x1900;
-var SYMBOLIC_FONT_GLYPH_OFFSET = 0xF000;
+var PRIVATE_USE_OFFSET_START = 0xE000;
+var PRIVATE_USE_OFFSET_END = 0xF8FF;
+var SKIP_PRIVATE_USE_RANGE_F000_TO_F01F = false;
 
 // PDF Glyph Space Units are one Thousandth of a TextSpace Unit
 // except for Type 3 fonts
@@ -425,37 +426,6 @@ var symbolsFonts = {
   'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true
 };
 
-var CMapConverterList = {
-  'H': jis7ToUnicode,
-  'V': jis7ToUnicode,
-  'EUC-H': eucjpToUnicode,
-  'EUC-V': eucjpToUnicode,
-  '83pv-RKSJ-H': sjis83pvToUnicode,
-  '90pv-RKSJ-H': sjis90pvToUnicode,
-  '90ms-RKSJ-H': sjisToUnicode,
-  '90ms-RKSJ-V': sjisToUnicode,
-  '90msp-RKSJ-H': sjisToUnicode,
-  '90msp-RKSJ-V': sjisToUnicode,
-  'GBK-EUC-H': gbkToUnicode,
-  'GBKp-EUC-H': gbkToUnicode,
-  'B5pc-H': big5ToUnicode,
-  'ETenms-B5-H': big5ToUnicode,
-  'ETenms-B5-V': big5ToUnicode,
-};
-
-// CMaps using Hankaku (Halfwidth) Latin glyphs instead of proportional one.
-// We need to distinguish them to get correct widths from CIDFont dicts.
-var HalfwidthCMaps = {
-  'H': true,
-  'V': true,
-  'EUC-H': true,
-  'EUC-V': true,
-  '90ms-RKSJ-H': true,
-  '90ms-RKSJ-V': true,
-  'UniJIS-UCS2-HW-H': true,
-  'UniJIS-UCS2-HW-V': true
-};
-
 // Glyph map for well-known standard fonts. Sometimes Ghostscript uses CID fonts
 // but does not embed the CID to GID mapping. The mapping is incomplete for all
 // glyphs, but common for some set of the standard fonts.
@@ -526,75 +496,13 @@ var GlyphMapForStandardFonts = {
   '3316': 578, '3379': 42785, '3393': 1159, '3416': 8377
 };
 
-var decodeBytes;
-if (typeof TextDecoder !== 'undefined') {
-  // The encodings supported by TextDecoder can be found at:
-  // http://encoding.spec.whatwg.org/#concept-encoding-get
-  decodeBytes = function(bytes, encoding, fatal) {
-    return new TextDecoder(encoding, {fatal: !!fatal}).decode(bytes);
-  };
-} else if (typeof FileReaderSync !== 'undefined') {
-  decodeBytes = function(bytes, encoding) {
-    return new FileReaderSync().readAsText(new Blob([bytes]), encoding);
-  };
-} else {
-  // Clear the list so that decodeBytes will never be called.
-  CMapConverterList = {};
-}
-
-function jis7ToUnicode(str) {
-  var bytes = stringToBytes(str);
-  var length = bytes.length;
-  for (var i = 0; i < length; ++i) {
-    bytes[i] |= 0x80;
-  }
-  return decodeBytes(bytes, 'euc-jp');
-}
-
-function eucjpToUnicode(str) {
-  return decodeBytes(stringToBytes(str), 'euc-jp');
-}
-
-function sjisToUnicode(str) {
-  return decodeBytes(stringToBytes(str), 'shift_jis');
-}
-
-function sjis83pvToUnicode(str) {
-  var bytes = stringToBytes(str);
-  try {
-    // TODO: 83pv has incompatible mappings in ed40..ee9c range.
-    return decodeBytes(bytes, 'shift_jis', true);
-  } catch (e) {
-    warn('Unsupported 83pv character found');
-    // Just retry without checking errors for now.
-    return decodeBytes(bytes, 'shift_jis');
-  }
-}
-
-function sjis90pvToUnicode(str) {
-  var bytes = stringToBytes(str);
-  try {
-    // TODO: 90pv has incompatible mappings in 8740..879c and eb41..ee9c.
-    return decodeBytes(bytes, 'shift_jis', true);
-  } catch (e) {
-    warn('Unsupported 90pv character found');
-    // Just retry without checking errors for now.
-    return decodeBytes(bytes, 'shift_jis');
-  }
-}
-
-function gbkToUnicode(str) {
-  return decodeBytes(stringToBytes(str), 'gbk');
-}
-
-function big5ToUnicode(str) {
-  return decodeBytes(stringToBytes(str), 'big5');
-}
-
 // Some characters, e.g. copyrightserif, mapped to the private use area and
 // might not be displayed using standard fonts. Mapping/hacking well-known chars
 // to the similar equivalents in the normal characters range.
-function mapPrivateUseChars(code) {
+function mapSpecialUnicodeValues(code) {
+  if (code >= 0xFFF0 && code <= 0xFFFF) { // Specials unicode block.
+    return 0;
+  }
   switch (code) {
     case 0xF8E9: // copyrightsans
     case 0xF6D9: // copyrightserif
@@ -787,12 +695,6 @@ function isRTLRangeFor(value) {
   return false;
 }
 
-function isSpecialUnicode(unicode) {
-  return (unicode <= 0x1F || (unicode >= 127 && unicode < GLYPH_AREA_SIZE)) ||
-    (unicode >= CMAP_GLYPH_OFFSET &&
-    unicode < CMAP_GLYPH_OFFSET + GLYPH_AREA_SIZE);
-}
-
 // The normalization table is obtained by filtering the Unicode characters
 // database with <compat> entries.
 var NormalizedUnicodes = {
@@ -2251,19 +2153,25 @@ var Font = (function FontClosure() {
     this.defaultWidth = properties.defaultWidth;
     this.composite = properties.composite;
     this.wideChars = properties.wideChars;
-    this.hasEncoding = properties.hasEncoding;
-    this.cmap = properties.cmap;
+    this.cMap = properties.cMap;
     this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS;
     this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
-
     this.fontMatrix = properties.fontMatrix;
+
+    var unicode = this.buildToUnicode(properties);
+    this.toUnicode = properties.toUnicode = unicode.toUnicode;
+    this.isIdentityUnicode = properties.isIdentityUnicode = unicode.isIdentity;
+
+    this.toFontChar = [];
+
     if (properties.type == 'Type3') {
-      this.encoding = properties.baseEncoding;
+      for (var charCode = 0; charCode < 256; charCode++) {
+        this.toFontChar[charCode] = this.differences[charCode] ||
+                                    properties.defaultEncoding[charCode];
+      }
       return;
     }
 
-    // Trying to fix encoding using glyph CIDSystemInfo.
-    this.loadCidToUnicode(properties);
     this.cidEncoding = properties.cidEncoding;
     this.vertical = properties.vertical;
     if (this.vertical) {
@@ -2271,14 +2179,8 @@ var Font = (function FontClosure() {
       this.defaultVMetrics = properties.defaultVMetrics;
     }
 
-    if (properties.toUnicode && properties.toUnicode.length > 0)
-      this.toUnicode = properties.toUnicode;
-    else
-      this.rebuildToUnicode(properties);
-
-    this.toFontChar = this.buildToFontChar(this.toUnicode);
-
     if (!file) {
+      this.missingFile = true;
       // The file data is not specified. Trying to fix the font name
       // to be used with the canvas.font.
       var fontName = name.replace(/[,_]/g, '-');
@@ -2295,9 +2197,6 @@ var Font = (function FontClosure() {
 
       // if at least one width is present, remeasure all chars when exists
       this.remeasure = Object.keys(this.widths).length > 0;
-
-      this.encoding = properties.baseEncoding;
-      this.noUnicodeAdaptation = true;
       if (isStandardFont && type === 'CIDFontType2' &&
           properties.cidEncoding.indexOf('Identity-') === 0) {
         // Standard fonts might be embedded as CID font without glyph mapping.
@@ -2308,6 +2207,10 @@ var Font = (function FontClosure() {
         }
         this.toFontChar = map;
         this.toUnicode = map;
+      } else {
+        for (var charCode in this.toUnicode) {
+          this.toFontChar[charCode] = this.toUnicode[charCode].charCodeAt(0);
+        }
       }
       this.loadedName = fontName.split('-')[0];
       this.loading = false;
@@ -2368,11 +2271,6 @@ var Font = (function FontClosure() {
     this.loading = true;
   }
 
-  var numFonts = 0;
-  function getUniqueName() {
-    return 'pdfFont' + numFonts++;
-  }
-
   function stringToArray(str) {
     var array = [];
     for (var i = 0, ii = str.length; i < ii; ++i)
@@ -2483,26 +2381,95 @@ var Font = (function FontClosure() {
     file.virtualOffset += data.length;
   }
 
-  function getRanges(glyphs, deltas) {
+  /**
+   * Rebuilds the char code to glyph ID map by trying to replace the char codes
+   * with their unicode value. It also moves char codes that are in known
+   * problematic locations.
+   * @return {Object} Two properties:
+   * 'toFontChar' - maps original char codes(the value that will be read
+   * from commands such as show text) to the char codes that will be used in the
+   * font that we build
+   * 'charCodeToGlyphId' - maps the new font char codes to glyph ids
+   */
+  function adjustMapping(charCodeToGlyphId, properties) {
+    var toUnicode = properties.toUnicode;
+    var isSymbolic = !!(properties.flags & FontFlags.Symbolic);
+    var isIdentityUnicode = properties.isIdentityUnicode;
+    var newMap = Object.create(null);
+    var toFontChar = [];
+    var usedCharCodes = [];
+    var usedFontCharCodes = [];
+    var nextAvailableFontCharCode = PRIVATE_USE_OFFSET_START;
+    for (var originalCharCode in charCodeToGlyphId) {
+      originalCharCode |= 0;
+      var glyphId = charCodeToGlyphId[originalCharCode];
+      var fontCharCode = originalCharCode;
+      // First try to map the value to a unicode position if a non identity map
+      // was created.
+      if (!isIdentityUnicode && originalCharCode in toUnicode) {
+        var unicode = toUnicode[fontCharCode];
+        // TODO: Try to map ligatures to the correct spot.
+        if (unicode.length === 1) {
+          fontCharCode = unicode.charCodeAt(0);
+        }
+      }
+      // Try to move control characters, special characters and already mapped
+      // characters to the private use area since they will not be drawn by
+      // canvas if left in their current position. Also, move characters if the
+      // font was symbolic and there is only an identity unicode map since the
+      // characters probably aren't in the correct position (fixes an issue
+      // with firefox and thuluthfont).
+      if ((fontCharCode in usedFontCharCodes ||
+           fontCharCode <= 0x1f || // Control chars
+           fontCharCode === 0x7F || // Control char
+           fontCharCode === 0xAD || // Soft hyphen
+           (fontCharCode >= 0x80 && fontCharCode <= 0x9F) || // Control chars
+           (isSymbolic && isIdentityUnicode)) &&
+          nextAvailableFontCharCode <= PRIVATE_USE_OFFSET_END) { // Room left.
+        // Loop to try and find a free spot in the private use area.
+        do {
+          fontCharCode = nextAvailableFontCharCode++;
+
+          if (SKIP_PRIVATE_USE_RANGE_F000_TO_F01F && fontCharCode === 0xF000) {
+            fontCharCode = 0xF020;
+            nextAvailableFontCharCode = fontCharCode + 1;
+          }
+
+        } while (fontCharCode in usedFontCharCodes &&
+                 nextAvailableFontCharCode <= PRIVATE_USE_OFFSET_END);
+      }
+
+      newMap[fontCharCode] = glyphId;
+      toFontChar[originalCharCode] = fontCharCode;
+      usedFontCharCodes[fontCharCode] = true;
+    }
+    return {
+      toFontChar: toFontChar,
+      charCodeToGlyphId: newMap
+    };
+  }
+
+  function getRanges(glyphs) {
     // Array.sort() sorts by characters, not numerically, so convert to an
     // array of characters.
     var codes = [];
-    var length = glyphs.length;
-    for (var n = 0; n < length; ++n)
-      codes.push({ unicode: glyphs[n].unicode, code: n });
+    for (var charCode in glyphs) {
+      codes.push({ fontCharCode: charCode | 0, glyphId: glyphs[charCode] });
+    }
     codes.sort(function fontGetRangesSort(a, b) {
-      return a.unicode - b.unicode;
+      return a.fontCharCode - b.fontCharCode;
     });
 
     // Split the sorted codes into ranges.
     var ranges = [];
+    var length = codes.length;
     for (var n = 0; n < length; ) {
-      var start = codes[n].unicode;
-      var codeIndices = [deltas ? deltas[codes[n].code] : codes[n].code + 1];
+      var start = codes[n].fontCharCode;
+      var codeIndices = [codes[n].glyphId];
       ++n;
       var end = start;
-      while (n < length && end + 1 == codes[n].unicode) {
-        codeIndices.push(deltas ? deltas[codes[n].code] : codes[n].code + 1);
+      while (n < length && end + 1 == codes[n].fontCharCode) {
+        codeIndices.push(codes[n].glyphId);
         ++end;
         ++n;
         if (end === 0xFFFF) { break; }
@@ -2513,9 +2480,8 @@ var Font = (function FontClosure() {
     return ranges;
   }
 
-  function createCmapTable(glyphs, deltas) {
-    var ranges = getRanges(glyphs, deltas);
-
+  function createCmapTable(glyphs) {
+    var ranges = getRanges(glyphs);
     var numTables = ranges[ranges.length - 1][1] > 0xFFFF ? 2 : 1;
     var cmap = '\x00\x00' + // version
                string16(numTables) +  // numTables
@@ -2677,8 +2643,8 @@ var Font = (function FontClosure() {
     var lastCharIndex = 0;
 
     if (charstrings) {
-      for (var i = 0; i < charstrings.length; ++i) {
-        var code = charstrings[i].unicode;
+      for (var code in charstrings) {
+        code |= 0;
         if (firstCharIndex > code || !firstCharIndex)
           firstCharIndex = code;
         if (lastCharIndex < code)
@@ -2840,37 +2806,6 @@ var Font = (function FontClosure() {
     return nameTable;
   }
 
-  // Normalize the charcodes in the cmap table into unicode values
-  // that will work with the (3, 1) cmap table we will write out.
-  function cmapCharcodeToUnicode(charcode, symbolic, platformId, encodingId) {
-    var unicode;
-    if (symbolic) {
-      // These codes will be shifted into the range
-      // SYMBOLIC_FONT_GLYPH_OFFSET to (SYMBOLIC_FONT_GLYPH_OFFSET + 0xFF)
-      // so that they are not in the control character range that could
-      // be displayed as spaces by browsers.
-      if (platformId === 3 && encodingId === 0 ||
-          platformId === 1 && encodingId === 0) {
-        unicode = SYMBOLIC_FONT_GLYPH_OFFSET | (charcode & 0xFF);
-      }
-    } else {
-      if (platformId === 3 && encodingId === 1) {
-        // A (3, 1) table is alredy unicode (Microsoft Unicode format)
-        unicode = charcode;
-      } else if (platformId === 1 && encodingId === 0) {
-        // TODO(mack): Should apply the changes to convert the
-        // MacRomanEncoding to Mac OS Roman encoding in 9.6.6.4
-        // table 115 of the pdf spec
-        var glyphName = Encodings.MacRomanEncoding[charcode];
-        if (glyphName) {
-          unicode = GlyphsUnicode[glyphName];
-        }
-      }
-    }
-    return unicode;
-  }
-
-
   Font.prototype = {
     name: null,
     font: null,
@@ -2934,34 +2869,11 @@ var Font = (function FontClosure() {
         };
       }
 
-      function createGlyphNameMap(glyphs, ids, properties) {
-        var glyphNames = properties.glyphNames;
-        if (!glyphNames) {
-          properties.glyphNameMap = {};
-          return;
-        }
-        var glyphsLength = glyphs.length;
-        var glyphNameMap = {};
-        var encoding = [];
-        for (var i = 0; i < glyphsLength; ++i) {
-          var glyphName = glyphNames[ids[i]];
-          if (!glyphName)
-            continue;
-          var unicode = glyphs[i].unicode;
-          glyphNameMap[glyphName] = unicode;
-          var code = glyphs[i].code;
-          encoding[code] = glyphName;
-        }
-        properties.glyphNameMap = glyphNameMap;
-        if (properties.overridableEncoding)
-          properties.baseEncoding = encoding;
-      }
-
       /**
        * Read the appropriate subtable from the cmap according to 9.6.6.4 from
        * PDF spec
        */
-      function readCmapTable(cmap, font, hasEncoding, isSymbolicFont) {
+      function readCmapTable(cmap, font, isSymbolicFont) {
         var start = (font.start ? font.start : 0) + cmap.offset;
         font.pos = start;
 
@@ -2969,73 +2881,36 @@ var Font = (function FontClosure() {
         var numTables = int16(font.getBytes(2));
 
         var potentialTable;
-        var foundPreferredTable;
-        // There's an order of preference in terms of which cmap subtable we
-        // want to use. So scan through them to find our preferred table.
+        var canBreak = false;
+        // There's an order of preference in terms of which cmap subtable to
+        // use:
+        // - non-symbolic fonts the preference is a 3,1 table then a 1,0 table
+        // - symbolic fonts the preference is a 3,0 table then a 1,0 table
+        // The following takes advantage of the fact that the tables are sorted
+        // to work.
         for (var i = 0; i < numTables; i++) {
           var platformId = int16(font.getBytes(2));
           var encodingId = int16(font.getBytes(2));
           var offset = int32(font.getBytes(4));
           var useTable = false;
-          var canBreak = false;
 
-          // The following block implements the following from the spec:
-          //
-          //   When the font has no Encoding entry, or the font descriptor’s
-          //   Symbolic flag is set (in which case the Encoding entry
-          //   is ignored), this shall occur:
-          //      - If the font contains a (3, 0) subtable, the range of
-          //      - Otherwise, the (1, 0) subtable will be used.
-          //   Otherwise, if the font does have an encoding:
-          //      - Use the (3, 1) cmap subtable
-          //      - Otherwise, use the (1, 0) subtable if present
-          //
-          // The following diverges slightly from the above spec in order
-          // to handle the case that hasEncoding and isSymbolicFont are both
-          // true. In this, based on the ordering of the rules in the spec,
-          // my interpretation is that we should be acting as if the font is
-          // symbolic.
-          //
-          // However, in this case, the test pdf 'preistabelle.pdf'
-          // is interpreting this case as a non-symbolic font. In this case
-          // though, 'presitabelle.pdf' does contain a (3, 1) table and does
-          // not contain a (3, 0) table which indicates it is non-symbolic.
-          //
-          // Thus, I am using this heurisitic of looking at which table is
-          // found to truly determine whether or not the font is symbolic.
-          // That is, if the specific symbolic/non-symbolic font specific
-          // tables (3, 0) or (3, 1) is found, that information is used for
-          // deciding if the font is symbolic or not.
-          //
-          // TODO(mack): This section needs some more thought on whether the
-          // heuristic is good enough. For now, it passes all the regression
-          // tests.
-          if (isSymbolicFont && platformId === 3 && encodingId === 0) {
+          if (platformId == 1 && encodingId === 0) {
             useTable = true;
-            canBreak = true;
-            foundPreferredTable = true;
-          } else if (hasEncoding && platformId === 3 && encodingId === 1) {
+            // Continue the loop since there still may be a higher priority
+            // table.
+          } else if (!isSymbolicFont && platformId === 3 && encodingId === 1) {
             useTable = true;
             canBreak = true;
-            foundPreferredTable = true;
-            // Update the isSymbolicFont based on this heuristic
-            isSymbolicFont = false;
-          } else if (platformId === 1 && encodingId === 0 &&
-              !foundPreferredTable) {
-            useTable = true;
-            foundPreferredTable = true;
-          } else if (!potentialTable) {
-            // We will use an arbitrary table if we cannot find a preferred
-            // table
+          } else if (isSymbolicFont && platformId === 3 && encodingId === 0) {
             useTable = true;
+            canBreak = true;
           }
 
           if (useTable) {
             potentialTable = {
               platformId: platformId,
               encodingId: encodingId,
-              offset: offset,
-              isSymbolicFont: isSymbolicFont
+              offset: offset
             };
           }
           if (canBreak) {
@@ -3044,16 +2919,8 @@ var Font = (function FontClosure() {
         }
 
         if (!potentialTable) {
-          error('Could not find a cmap table');
-          return;
-        }
-
-        if (!foundPreferredTable) {
-          warn('Did not find a cmap of suitable format. Interpreting (' +
-               potentialTable.platformId + ', ' + potentialTable.encodingId +
-               ') as (3, 1) table');
-          potentialTable.platformId = 3;
-          potentialTable.encodingId = 1;
+          warn('Could not find a preferred cmap table.');
+          return [];
         }
 
         font.pos = start + potentialTable.offset;
@@ -3072,7 +2939,7 @@ var Font = (function FontClosure() {
               continue;
             }
             mappings.push({
-              charcode: j,
+              charCode: j,
               glyphId: index
             });
           }
@@ -3132,7 +2999,7 @@ var Font = (function FontClosure() {
                 continue;
               }
               mappings.push({
-                charcode: j,
+                charCode: j,
                 glyphId: glyphId
               });
             }
@@ -3150,10 +3017,10 @@ var Font = (function FontClosure() {
           var ids = [];
           for (var j = 0; j < entryCount; j++) {
             var glyphId = int16(font.getBytes(2));
-            var charcode = firstCode + j;
+            var charCode = firstCode + j;
 
             mappings.push({
-              charcode: charcode,
+              charCode: charCode,
               glyphId: glyphId
             });
           }
@@ -3163,10 +3030,10 @@ var Font = (function FontClosure() {
 
         // removing duplicate entries
         mappings.sort(function (a, b) {
-          return a.charcode - b.charcode;
+          return a.charCode - b.charCode;
         });
         for (var i = 1; i < mappings.length; i++) {
-          if (mappings[i - 1].charcode === mappings[i].charcode) {
+          if (mappings[i - 1].charCode === mappings[i].charCode) {
             mappings.splice(i, 1);
             i--;
           }
@@ -3175,7 +3042,6 @@ var Font = (function FontClosure() {
         return {
           platformId: potentialTable.platformId,
           encodingId: potentialTable.encodingId,
-          isSymbolicFont: potentialTable.isSymbolicFont,
           mappings: mappings,
           hasShortCmap: hasShortCmap
         };
@@ -3901,229 +3767,121 @@ var Font = (function FontClosure() {
         }
       }
 
-      var glyphs, ids;
+      var charCodeToGlyphId = [];
       if (properties.type == 'CIDFontType2') {
-        // Replace the old CMAP table with a shiny new one
-        // Type2 composite fonts map characters directly to glyphs so the cmap
-        // table must be replaced.
-        // canvas fillText will reencode some characters even if the font has a
-        // glyph at that position - e.g. newline is converted to a space and
-        // U+00AD (soft hyphen) is not drawn.
-        // So, offset all the glyphs by 0xFF to avoid these cases and use
-        // the encoding to map incoming characters to the new glyph positions
-        if (!tables.cmap) {
-          tables.cmap = {
-            tag: 'cmap',
-            data: null
-          };
-        }
-
         var cidToGidMap = properties.cidToGidMap || [];
-        var gidToCidMap = [0];
-        if (cidToGidMap.length > 0) {
-          for (var j = cidToGidMap.length - 1; j >= 0; j--) {
-            var gid = cidToGidMap[j];
-            if (gid)
-              gidToCidMap[gid] = j;
+        var cMap = properties.cMap.map;
+        for (var charCode in cMap) {
+          charCode |= 0;
+          var cid = cMap[charCode];
+          assert(cid.length === 1, 'Max size of CID is 65,535');
+          cid = cid.charCodeAt(0);
+          var glyphId = -1;
+          if (cidToGidMap.length === 0) {
+            glyphId = charCode;
+          } else if (cid in cidToGidMap) {
+            glyphId = cidToGidMap[cid];
           }
-          // filling the gaps using CID above the CIDs currently used in font
-          var nextCid = cidToGidMap.length;
-          for (var i = 1; i < numGlyphs; i++) {
-            if (!gidToCidMap[i])
-              gidToCidMap[i] = nextCid++;
-          }
-        } else {
-          for (var i = 1; i < numGlyphs; i++) {
-            gidToCidMap[i] = i;
-          }
-          if (dupFirstEntry) {
-            gidToCidMap[numGlyphs - 1] = 0;
+          if (glyphId >= 0 && glyphId < numGlyphs) {
+            charCodeToGlyphId[charCode] = glyphId;
           }
         }
-
-        glyphs = [];
-        ids = [];
-
-        var usedUnicodes = [];
-        var unassignedUnicodeItems = [];
-        var toFontChar = this.cidToFontChar || this.toFontChar;
-        for (var i = 1; i < numGlyphs; i++) {
-          var cid = gidToCidMap[i];
-          var unicode = toFontChar[cid];
-          if (!unicode || typeof unicode !== 'number' ||
-              isSpecialUnicode(unicode) || unicode in usedUnicodes) {
-            unassignedUnicodeItems.push(i);
-            continue;
-          }
-          usedUnicodes[unicode] = true;
-          glyphs.push({ unicode: unicode, code: cid });
-          ids.push(i);
-        }
-
-        // unassigned codepoints will never be used for non-Identity CMap
-        // because the input will be Unicode
-        if (!this.cidToFontChar) {
-          // trying to fit as many unassigned symbols as we can
-          // in the range allocated for the user defined symbols
-          var unusedUnicode = CMAP_GLYPH_OFFSET;
-          for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j++) {
-            var i = unassignedUnicodeItems[j];
-            var cid = gidToCidMap[i];
-            while (unusedUnicode in usedUnicodes)
-              unusedUnicode++;
-            if (unusedUnicode >= CMAP_GLYPH_OFFSET + GLYPH_AREA_SIZE)
-              break;
-            var unicode = unusedUnicode++;
-            this.toFontChar[cid] = unicode;
-            usedUnicodes[unicode] = true;
-            glyphs.push({ unicode: unicode, code: cid });
-            ids.push(i);
-          }
+        if (dupFirstEntry) {
+          charCodeToGlyphId[0] = numGlyphs - 1;
         }
       } else {
-        this.useToFontChar = true;
         // Most of the following logic in this code branch is based on the
         // 9.6.6.4 of the PDF spec.
-
-        // TODO(mack):
-        // We are using this.hasEncoding to mean that the encoding is either
-        // MacRomanEncoding or WinAnsiEncoding (following spec in 9.6.6.4),
-        // but this.hasEncoding is currently true for any encodings on the
-        // Encodings object (e.g. MacExpertEncoding). So should consider using
-        // better check for this.
-        var cmapTable = readCmapTable(tables.cmap, font, this.hasEncoding,
-            this.isSymbolicFont);
-
-        // TODO(mack): If the (3, 0) cmap table used, then the font is
-        // symbolic. The range of charcodes in the cmap table should be
-        // one of the following:
-        //   -> 0x0000 - 0x00FF
-        //   -> 0xF000 - 0xF0FF
-        //   -> 0xF100 - 0xF1FF
-        //   -> 0xF200 - 0xF2FF
-        // If it is not, we should change not consider this a symbolic font
-        this.isSymbolicFont = cmapTable.isSymbolicFont;
-
+        var cmapTable = readCmapTable(tables.cmap, font, this.isSymbolicFont);
         var cmapPlatformId = cmapTable.platformId;
         var cmapEncodingId = cmapTable.encodingId;
         var cmapMappings = cmapTable.mappings;
         var cmapMappingsLength = cmapMappings.length;
-        var glyphs = [];
-        var ids = [];
-        for (var i = 0; i < cmapMappingsLength; ++i) {
-          var cmapMapping = cmapMappings[i];
-          var charcode = cmapMapping.charcode;
-          var unicode = cmapCharcodeToUnicode(charcode, this.isSymbolicFont,
-              cmapPlatformId, cmapEncodingId);
-
-          if (!unicode) {
-            // TODO(mack): gotta check if skipping mappings where we cannot find
-            // a unicode is the correct behaviour
-            continue;
-          }
-          glyphs.push({
-            code: charcode,
-            unicode: unicode
-          });
-          ids.push(cmapMapping.glyphId);
-        }
-
-        var hasShortCmap = cmapTable.hasShortCmap;
-        var toFontChar = this.toFontChar;
-
-        if (hasShortCmap && ids.length == numGlyphs) {
-          // Fixes the short cmap tables -- some generators use incorrect
-          // glyph id.
-          for (var i = 0, ii = ids.length; i < ii; i++) {
-            ids[i] = i;
-          }
-        }
-
-        // Rewrite the whole toFontChar dictionary with a new one using the
-        // information from the mappings in the cmap table.
-        var newToFontChar = [];
-        if (this.isSymbolicFont) {
-          for (var i = 0, ii = glyphs.length; i < ii; i++) {
-            var glyph = glyphs[i];
-            // For (3, 0) cmap tables:
-            // The charcode key being stored in toFontChar is the lower byte
-            // of the two-byte charcodes of the cmap table since according to
-            // the spec: 'each byte from the string shall be prepended with the
-            // high byte of the range [of charcodes in the cmap table], to form
-            // a two-byte character, which shall be used to select the
-            // associated glyph description from the subtable'.
-            //
-            // For (1, 0) cmap tables:
-            // 'single bytes from the string shall be used to look up the
-            // associated glyph descriptions from the subtable'. This means
-            // charcodes in the cmap will be single bytes, so no-op since
-            // glyph.code & 0xFF === glyph.code
-            newToFontChar[glyph.code & 0xFF] = glyph.unicode;
+        var hasEncoding = properties.differences.length ||
+                          !!properties.baseEncodingName;
+
+        // The spec seems to imply that if the font is symbolic the encoding
+        // should be ignored, this doesn't appear to work for 'preistabelle.pdf'
+        // where the the font is symbolic and it has an encoding.
+        if (hasEncoding &&
+            (cmapPlatformId === 3 && cmapEncodingId === 1 ||
+             cmapPlatformId === 1 && cmapEncodingId === 0)) {
+          var baseEncoding = [];
+          if (properties.baseEncodingName === 'MacRomanEncoding' ||
+              properties.baseEncodingName === 'WinAnsiEncoding') {
+            baseEncoding = Encodings[properties.baseEncodingName];
           }
-        } else {
-
-          var encoding = properties.baseEncoding;
-          var differences = properties.differences;
-
-          // TODO(mack): check if it is necessary to shift control characters
-          // for non-symbolic fonts so that browsers dont't render them using
-          // space characters
-
-          var glyphCodeMapping = cmapTable.glyphCodeMapping;
-          for (var charcode = 0; charcode < encoding.length; ++charcode) {
-            if (!encoding.hasOwnProperty(charcode)) {
+          for (var charCode = 0; charCode < 256; charCode++) {
+            var glyphName;
+            if (this.differences && charCode in this.differences) {
+              glyphName = this.differences[charCode];
+            } else if (charCode in baseEncoding &&
+                       baseEncoding[charCode] !== '') {
+              glyphName = baseEncoding[charCode];
+            } else {
+              glyphName = Encodings.StandardEncoding[charCode];
+            }
+            if (!glyphName) {
               continue;
             }
-
-            // Since the cmap table that we will be writing out is a (3, 1)
-            // unicode table, in this section we will rewrites the charcodes
-            // in the pdf into unicodes
-
-            var glyphName = encoding[charcode];
-            // A nonsymbolic font should not have a Differences array, but
-            // if it does have one, we should still use it
-            if (charcode in differences) {
-              glyphName = differences[charcode];
+            var unicodeOrCharCode;
+            if (cmapPlatformId === 3 && cmapEncodingId === 1) {
+              unicodeOrCharCode = GlyphsUnicode[glyphName];
+            } else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
+              // TODO: the encoding needs to be updated with mac os table.
+              unicodeOrCharCode = Encodings.MacRomanEncoding.indexOf(glyphName);
             }
 
-            // Finally, any undefined entries in the table shall be filled
-            // using StandardEncoding
-            if (!glyphName) {
-              glyphName = Encodings.StandardEncoding[charcode];
+            var found = false;
+            for (var i = 0; i < cmapMappingsLength; ++i) {
+              if (cmapMappings[i].charCode === unicodeOrCharCode) {
+                charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
+                found = true;
+                break;
+              }
             }
-
-            // TODO(mack): Handle the case that the glyph name cannot be
-            // mapped as specified, in which case the glyph name shall be
-            // looked up in the font program's 'post' table (if one is
-            // present) and the associated glyph id shall be used.
-            //
-            // For now, we're just using the '.notdef' glyph name in this
-            // case.
-            glyphName = glyphName || '.notdef';
-
-            var unicode = GlyphsUnicode[glyphName];
-            newToFontChar[charcode] = unicode;
+            if (!found && properties.glyphNames) {
+              // Try to map using the post table. There are currently no known
+              // pdfs that this fixes.
+              var glyphId = properties.glyphNames.indexOf(glyphName);
+              if (glyphId > 0) {
+                charCodeToGlyphId[charCode] = glyphId;
+              }
+            }
+          }
+        } else {
+          // For (3, 0) cmap tables:
+          // The charcode key being stored in charCodeToGlyphId is the lower
+          // byte of the two-byte charcodes of the cmap table since according to
+          // the spec: 'each byte from the string shall be prepended with the
+          // high byte of the range [of charcodes in the cmap table], to form
+          // a two-byte character, which shall be used to select the
+          // associated glyph description from the subtable'.
+          //
+          // For (1, 0) cmap tables:
+          // 'single bytes from the string shall be used to look up the
+          // associated glyph descriptions from the subtable'. This means
+          // charcodes in the cmap will be single bytes, so no-op since
+          // glyph.charCode & 0xFF === glyph.charCode
+          for (var i = 0; i < cmapMappingsLength; ++i) {
+            var charCode = cmapMappings[i].charCode & 0xFF;
+            charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
           }
         }
-        this.toFontChar = toFontChar = newToFontChar;
-
-        createGlyphNameMap(glyphs, ids, properties);
-        this.glyphNameMap = properties.glyphNameMap;
       }
 
-      if (glyphs.length === 0) {
+      if (charCodeToGlyphId.length === 0) {
         // defines at least one glyph
-        glyphs.push({ unicode: 0xF000, code: 0xF000, glyph: '.notdef' });
-        ids.push(0);
+        charCodeToGlyphId[0] = 0;
       }
 
       // Converting glyphs and ids into font's cmap table
-      tables.cmap.data = createCmapTable(glyphs, ids);
-      var unicodeIsEnabled = [];
-      for (var i = 0, ii = glyphs.length; i < ii; i++) {
-        unicodeIsEnabled[glyphs[i].unicode] = true;
-      }
-      this.unicodeIsEnabled = unicodeIsEnabled;
+      var newMapping = adjustMapping(charCodeToGlyphId, properties);
+      this.toFontChar = newMapping.toFontChar;
+      tables.cmap = {
+        tag: 'cmap',
+        data: createCmapTable(newMapping.charCodeToGlyphId)
+      };
 
       if (!tables['OS/2'] || !validateOS2Table(tables['OS/2'])) {
         // extract some more font properties from the OpenType head and
@@ -4138,7 +3896,9 @@ var Font = (function FontClosure() {
 
         tables['OS/2'] = {
           tag: 'OS/2',
-          data: stringToArray(createOS2Table(properties, glyphs, override))
+          data: stringToArray(createOS2Table(properties,
+                                             newMapping.charCodeToGlyphId,
+                                             override))
         };
       }
 
@@ -4222,63 +3982,49 @@ var Font = (function FontClosure() {
 
       createOpenTypeHeader('\x4F\x54\x54\x4F', otf, 9);
 
-      var charstrings = font.charstrings;
-      properties.fixedPitch = isFixedPitch(charstrings);
+      properties.fixedPitch = false; //isFixedPitch(charstrings);
 
-      var glyphNameMap = {};
-      for (var i = 0; i < charstrings.length; ++i) {
-        var charstring = charstrings[i];
-        glyphNameMap[charstring.glyph] = charstring.unicode;
-      }
-      this.glyphNameMap = glyphNameMap;
+      var mapping = font.getGlyphMapping(properties);
+      var newMapping = adjustMapping(mapping, properties);
+      this.toFontChar = newMapping.toFontChar;
+      var numGlyphs = font.numGlyphs;
 
       var seacs = font.seacs;
-      if (SEAC_ANALYSIS_ENABLED && seacs) {
-        var seacMap = [];
+      if (SEAC_ANALYSIS_ENABLED && seacs && seacs.length) {
         var matrix = properties.fontMatrix || FONT_IDENTITY_MATRIX;
-        for (var i = 0; i < charstrings.length; ++i) {
-          var charstring = charstrings[i];
-          var seac = seacs[charstring.gid];
-          if (!seac) {
-            continue;
-          }
+        var charset = font.getCharset();
+        var charCodeToGlyphId = mapping;
+        var toFontChar = newMapping.toFontChar;
+        var seacs = font.seacs;
+        var seacMap = Object.create(null);
+        var glyphIdToCharCode = Object.create(null);
+        for (var charCode in charCodeToGlyphId) {
+          glyphIdToCharCode[charCodeToGlyphId[charCode]] = charCode | 0;
+        }
+        for (var glyphId in seacs) {
+          glyphId |= 0;
+          var seac = seacs[glyphId];
           var baseGlyphName = Encodings.StandardEncoding[seac[2]];
-          var baseUnicode = glyphNameMap[baseGlyphName];
           var accentGlyphName = Encodings.StandardEncoding[seac[3]];
-          var accentUnicode = glyphNameMap[accentGlyphName];
-          if (!baseUnicode || !accentUnicode) {
+          var baseGlyphId = charset.indexOf(baseGlyphName);
+          var accentGlyphId = charset.indexOf(accentGlyphName);
+          if (baseGlyphId < 0 || accentGlyphId < 0) {
             continue;
           }
           var accentOffset = {
             x: seac[0] * matrix[0] + seac[1] * matrix[2] + matrix[4],
             y: seac[0] * matrix[1] + seac[1] * matrix[3] + matrix[5]
           };
-          seacMap[charstring.unicode] = {
-            baseUnicode: baseUnicode,
-            accentUnicode: accentUnicode,
+          var charCode = glyphIdToCharCode[glyphId];
+          seacMap[charCode] = {
+            baseFontCharCode: toFontChar[glyphIdToCharCode[baseGlyphId]],
+            accentFontCharCode: toFontChar[glyphIdToCharCode[accentGlyphId]],
             accentOffset: accentOffset
           };
         }
         properties.seacMap = seacMap;
       }
 
-      if (properties.overridableEncoding && (properties.subtype == 'Type1C' ||
-          properties.subtype == 'CIDFontType0C')) {
-        var encoding = [];
-        for (var i = 0; i < charstrings.length; ++i) {
-          var charstring = charstrings[i];
-          encoding[charstring.code] = charstring.glyph;
-        }
-        properties.baseEncoding = encoding;
-      }
-      if (properties.subtype == 'CIDFontType0C') {
-        var toFontChar = [];
-        for (var i = 0; i < charstrings.length; ++i) {
-          var charstring = charstrings[i];
-          toFontChar[charstring.code] = charstring.unicode;
-        }
-        this.toFontChar = toFontChar;
-      }
       var unitsPerEm = 1 / (properties.fontMatrix || FONT_IDENTITY_MATRIX)[0];
 
       var fields = {
@@ -4286,11 +4032,11 @@ var Font = (function FontClosure() {
         'CFF ': font.data,
 
         // OS/2 and Windows Specific metrics
-        'OS/2': stringToArray(createOS2Table(properties, charstrings)),
+        'OS/2': stringToArray(createOS2Table(properties,
+                                             newMapping.charCodeToGlyphId)),
 
         // Character to glyphs mapping
-        'cmap': createCmapTable(charstrings.slice(),
-                                ('glyphIds' in font) ? font.glyphIds : null),
+        'cmap': createCmapTable(newMapping.charCodeToGlyphId),
 
         // Font header
         'head': (function fontFieldsHead() {
@@ -4334,14 +4080,17 @@ var Font = (function FontClosure() {
               '\x00\x00' + // -reserved-
               '\x00\x00' + // -reserved-
               '\x00\x00' + // metricDataFormat
-              string16(charstrings.length + 1)); // Number of HMetrics
+              string16(numGlyphs + 1)); // Number of HMetrics
         })(),
 
         // Horizontal metrics
         'hmtx': (function fontFieldsHmtx() {
+          var charstrings = font.charstrings;
           var hmtx = '\x00\x00\x00\x00'; // Fake .notdef
-          for (var i = 0, ii = charstrings.length; i < ii; i++) {
-            var charstring = charstrings[i];
+          for (var i = 0, ii = numGlyphs; i < ii; i++) {
+            // TODO: For CFF fonts the width should technically match th x in
+            // the glyph, but it doesn't seem to matter.
+            var charstring = charstrings ? charstrings[i] : {};
             var width = 'width' in charstring ? charstring.width : 0;
             hmtx += string16(width) + string16(0);
           }
@@ -4352,7 +4101,7 @@ var Font = (function FontClosure() {
         'maxp': (function fontFieldsMaxp() {
           return stringToArray(
               '\x00\x00\x50\x00' + // Version number
-             string16(charstrings.length + 1)); // Num of glyphs
+             string16(numGlyphs + 1)); // Num of glyphs
         })(),
 
         // Naming tables
@@ -4373,124 +4122,103 @@ var Font = (function FontClosure() {
       return stringToArray(otf.file);
     },
 
-    buildToFontChar: function Font_buildToFontChar(toUnicode) {
-      var result = [];
-      var unusedUnicode = CMAP_GLYPH_OFFSET;
-      for (var i = 0, ii = toUnicode.length; i < ii; i++) {
-        var unicode = toUnicode[i];
-        var fontCharCode = typeof unicode === 'object' ? unusedUnicode++ :
-          unicode;
-        if (typeof unicode !== 'undefined') {
-          if (isString(fontCharCode) && fontCharCode.length === 1) {
-            fontCharCode = fontCharCode.charCodeAt(0);
-          }
-          result[i] = fontCharCode;
-        }
+    /**
+     * Builds a char code to unicode map based on section 9.10 of the spec.
+     * @param {Object} properties Font properties object.
+     * @return {Object} Has two properties: 'toUnicode' which maps char codes to
+     * unicode (string) values and 'isIdentity' which is true if an identity map
+     * is used.
+     */
+    buildToUnicode: function Font_buildToUnicode(properties) {
+      var map = {
+        isIdentity: false,
+        toUnicode: null
+      };
+      // Section 9.10.2 Mapping Character Codes to Unicode Values
+      if (properties.toUnicode) {
+        map.toUnicode = properties.toUnicode;
+        return map;
       }
-      return result;
-    },
-
-    rebuildToUnicode: function Font_rebuildToUnicode(properties) {
-      var firstChar = properties.firstChar, lastChar = properties.lastChar;
-      var map = [];
-      var toUnicode = this.toUnicode || this.cidToUnicode;
-      if (toUnicode) {
-        var isIdentityMap = toUnicode.length === 0;
-        for (var i = firstChar, ii = lastChar; i <= ii; i++) {
-          // TODO missing map the character according font's CMap
-          map[i] = isIdentityMap ? String.fromCharCode(i) : toUnicode[i];
+      // According to the spec if the font is a simple font we should only map
+      // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
+      // the differences array only contains adobe standard or symbol set names,
+      // in pratice it seems better to always try to create a toUnicode
+      // map based of the default encoding.
+      if (!properties.composite /* is simple font */) {
+        var toUnicode = [];
+        var encoding = properties.defaultEncoding.slice();
+        // Merge in the differences array.
+        var differences = properties.differences;
+        for (var charcode in differences) {
+          encoding[charcode] = differences[charcode];
         }
-      } else {
-        for (var i = firstChar, ii = lastChar; i <= ii; i++) {
-          var glyph = properties.differences[i];
-          if (!glyph)
-            glyph = properties.baseEncoding[i];
-          if (!!glyph && (glyph in GlyphsUnicode))
-            map[i] = String.fromCharCode(GlyphsUnicode[glyph]);
-        }
-      }
-      this.toUnicode = map;
-    },
-
-    loadCidToUnicode: function Font_loadCidToUnicode(properties) {
-      if (!properties.cidSystemInfo)
-        return;
-
-      var cidToUnicodeMap = [], unicodeToCIDMap = [];
-      this.cidToUnicode = cidToUnicodeMap;
-      this.unicodeToCID = unicodeToCIDMap;
-
-      var cidEncoding = properties.cidEncoding;
-      if (properties.toUnicode) {
-        if (cidEncoding && cidEncoding.indexOf('Identity-') !== 0) {
-          warn('Need to create a reverse mapping from \'ToUnicode\' CMap');
+        for (var charcode in encoding) {
+          // a) Map the character code to a character name.
+          var glyphName = encoding[charcode];
+          // b) Look up the character name in the Adobe Glyph List (see the
+          //    Bibliography) to obtain the corresponding Unicode value.
+          if (glyphName === '' || !(glyphName in GlyphsUnicode)) {
+            continue;
+          }
+          toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]);
         }
-        return; // 'ToUnicode' CMap will be used
+        map.toUnicode = toUnicode;
+        return map;
       }
-
-      var cidSystemInfo = properties.cidSystemInfo;
-      var cidToUnicode;
-      if (cidSystemInfo) {
-        cidToUnicode = CIDToUnicodeMaps[
-          cidSystemInfo.registry + '-' + cidSystemInfo.ordering];
-      }
-
-      if (!cidToUnicode)
-        return; // identity encoding
-
-      var overwrite = HalfwidthCMaps[cidEncoding];
-      var cid = 1, i, j, k, ii;
-      for (i = 0, ii = cidToUnicode.length; i < ii; ++i) {
-        var unicode = cidToUnicode[i];
-        if (isArray(unicode)) {
-          var length = unicode.length;
-          for (j = 0; j < length; j++) {
-            cidToUnicodeMap[cid] = k = unicode[j];
-            if (!unicodeToCIDMap[k] || overwrite) {
-              unicodeToCIDMap[k] = cid;
-            }
-          }
-          cid++;
-        } else if (typeof unicode === 'object') {
-          var fillLength = unicode.f;
-          if (fillLength) {
-            k = unicode.c;
-            for (j = 0; j < fillLength; ++j) {
-              cidToUnicodeMap[cid] = k;
-              if (!unicodeToCIDMap[k] || overwrite) {
-                unicodeToCIDMap[k] = cid;
-              }
-              cid++;
-              k++;
-            }
-          } else
-            cid += unicode.s;
-        } else if (unicode) {
-          cidToUnicodeMap[cid] = unicode;
-          if (!unicodeToCIDMap[unicode] || overwrite) {
-            unicodeToCIDMap[unicode] = cid;
+      // If the font is a composite font that uses one of the predefined CMaps
+      // listed in Table 118 (except Identity–H and Identity–V) or whose
+      // descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
+      // Adobe-Korea1 character collection:
+      if (properties.composite && (
+           (properties.cMap.builtInCMap &&
+            !(properties.cMap instanceof IdentityCMap)) ||
+           (properties.cidSystemInfo.registry === 'Adobe' &&
+             (properties.cidSystemInfo.ordering === 'GB1' ||
+              properties.cidSystemInfo.ordering === 'CNS1' ||
+              properties.cidSystemInfo.ordering === 'Japan1' ||
+              properties.cidSystemInfo.ordering === 'Korea1')))) {
+        // Then:
+        // a) Map the character code to a character identifier (CID) according
+        // to the font’s CMap.
+        // b) Obtain the registry and ordering of the character collection used
+        // by the font’s CMap (for example, Adobe and Japan1) from its
+        // CIDSystemInfo dictionary.
+        var registry = properties.cidSystemInfo.registry;
+        var ordering = properties.cidSystemInfo.ordering;
+        // c) Construct a second CMap name by concatenating the registry and
+        // ordering obtained in step (b) in the format registry–ordering–UCS2
+        // (for example, Adobe–Japan1–UCS2).
+        var ucs2CMapName = new Name(registry + '-' + ordering + '-UCS2');
+        // d) Obtain the CMap with the name constructed in step (c) (available
+        // from the ASN Web site; see the Bibliography).
+        var ucs2CMap = CMapFactory.create(ucs2CMapName, PDFJS.cMapUrl, null);
+        var cMap = properties.cMap;
+        var toUnicode = [];
+        for (var charcode in cMap.map) {
+          var cid = cMap.map[charcode];
+          assert(cid.length === 1, 'Max size of CID is 65,535');
+          // e) Map the CID obtained in step (a) according to the CMap obtained
+          // in step (d), producing a Unicode value.
+          var ucs2 = ucs2CMap.map[cid.charCodeAt(0)];
+          if (!ucs2) {
+            continue;
           }
-          cid++;
-        } else
-          cid++;
+          toUnicode[charcode] = String.fromCharCode((ucs2.charCodeAt(0) << 8) +
+                                                    ucs2.charCodeAt(1));
+        }
+        map.toUnicode = toUnicode;
+        return map;
       }
 
-      if (!cidEncoding) {
-        return;
-      }
-      if (cidEncoding.indexOf('Identity-') !== 0) {
-        // input is already Unicode for non-Identity CMap encodings.
-        this.cidToUnicode = [];
-        // For CIDFontType2, however, we need cid-to-Unicode conversion
-        // to rebuild cmap.
-        if (properties.type == 'CIDFontType2') {
-          this.cidToFontChar = cidToUnicodeMap;
-        }
-      } else {
-        // We don't have to do reverse conversions if the string is
-        // already CID.
-        this.unicodeToCID = [];
+      // The viewer's choice, just use an identity map.
+      var toUnicode = [];
+      var firstChar = properties.firstChar, lastChar = properties.lastChar;
+      for (var i = firstChar, ii = lastChar; i <= ii; i++) {
+        toUnicode[i] = String.fromCharCode(i);
       }
+      map.isIdentity = true;
+      map.toUnicode = toUnicode;
+      return map;
     },
 
     get spaceWidth() {
@@ -4511,8 +4239,11 @@ var Font = (function FontClosure() {
         var glyphUnicode = GlyphsUnicode[glyphName];
         // finding the charcode via unicodeToCID map
         var charcode = 0;
-        if (this.composite)
-          charcode = this.unicodeToCID[glyphUnicode];
+        if (this.composite) {
+          if (glyphUnicode in this.cMap.map) {
+            charcode = this.cMap.lookup(glyphUnicode).charCodeAt(0);
+          }
+        }
         // ... via toUnicode map
         if (!charcode && 'toUnicode' in this)
           charcode = this.toUnicode.indexOf(glyphUnicode);
@@ -4532,99 +4263,39 @@ var Font = (function FontClosure() {
     },
 
     charToGlyph: function Font_charToGlyph(charcode) {
-      var fontCharCode, width, operatorList, disabled;
-
-      var width = this.widths[charcode];
-      var vmetric = this.vmetrics && this.vmetrics[charcode];
-
-      switch (this.type) {
-        case 'CIDFontType0':
-          var cid = this.unicodeToCID[charcode] || charcode;
-          if (this.unicodeToCID.length > 0) {
-            width = this.widths[cid];
-            vmetric = this.vmetrics && this.vmetrics[cid];
-          }
-          if (this.noUnicodeAdaptation) {
-            fontCharCode = this.toFontChar[charcode] || charcode;
-            break;
-          }
-          // CIDFontType0 is not encoded in Unicode.
-          fontCharCode = this.toFontChar[cid] || cid;
-          break;
-        case 'CIDFontType2':
-          if (this.unicodeToCID.length > 0) {
-            var cid = this.unicodeToCID[charcode] || charcode;
-            width = this.widths[cid];
-            vmetric = this.vmetrics && this.vmetrics[cid];
-            fontCharCode = charcode;
-            break;
-          }
-          fontCharCode = this.toFontChar[charcode] || charcode;
-          break;
-        case 'MMType1': // XXX at the moment only "standard" fonts are supported
-        case 'Type1':
-          var glyphName = this.differences[charcode] || this.encoding[charcode];
-          if (!isNum(width))
-            width = this.widths[glyphName];
-          if (this.noUnicodeAdaptation) {
-            fontCharCode = mapPrivateUseChars(GlyphsUnicode[glyphName] ||
-              charcode);
-            break;
-          }
-          fontCharCode = this.glyphNameMap[glyphName] ||
-            GlyphsUnicode[glyphName] || charcode;
-          break;
-        case 'Type3':
-          var glyphName = this.differences[charcode] || this.encoding[charcode];
-          operatorList = this.charProcOperatorList[glyphName];
-          fontCharCode = charcode;
-          break;
-        case 'TrueType':
-          if (this.useToFontChar) {
-            fontCharCode = this.toFontChar[charcode] || charcode;
-            break;
-          }
-          var glyphName = this.differences[charcode] || this.encoding[charcode];
-          if (!glyphName)
-            glyphName = Encodings.StandardEncoding[charcode];
-          if (!isNum(width))
-            width = this.widths[glyphName];
-          if (this.noUnicodeAdaptation) {
-            fontCharCode = GlyphsUnicode[glyphName] || charcode;
-            break;
-          }
-          if (!this.hasEncoding || this.isSymbolicFont) {
-            fontCharCode = this.useToFontChar ? this.toFontChar[charcode] :
-              charcode;
-            break;
-          }
-
-          // MacRoman encoding address by re-encoding the cmap table
+      var fontCharCode, width, operatorList;
 
-          fontCharCode = glyphName in this.glyphNameMap ?
-            this.glyphNameMap[glyphName] : GlyphsUnicode[glyphName];
-          break;
-        default:
-          warn('Unsupported font type: ' + this.type);
-          break;
+      var widthCode = charcode;
+      if (this.cMap && charcode in this.cMap.map) {
+        widthCode = this.cMap.map[charcode].charCodeAt(0);
       }
+      var width = this.widths[widthCode];
+      width = isNum(width) ? width : this.defaultWidth;
+      var vmetric = this.vmetrics && this.vmetrics[widthCode];
 
-      var unicodeChars = !('toUnicode' in this) ? charcode :
-        this.toUnicode[charcode] || charcode;
+      var unicodeChars = this.toUnicode[charcode] || charcode;
       if (typeof unicodeChars === 'number') {
         unicodeChars = String.fromCharCode(unicodeChars);
       }
 
-      width = isNum(width) ? width : this.defaultWidth;
-      disabled = this.unicodeIsEnabled ?
-        !this.unicodeIsEnabled[fontCharCode] : false;
+      // First try the toFontChar map, if it's not there then try falling
+      // back to the char code.
+      fontCharCode = this.toFontChar[charcode] || charcode;
+      if (this.missingFile) {
+        fontCharCode = mapSpecialUnicodeValues(fontCharCode);
+      }
+
+      if (this.type === 'Type3') {
+        // Font char code in this case is actually a glyph name.
+        operatorList = this.charProcOperatorList[fontCharCode];
+      }
 
       var accent = null;
-      if (this.seacMap && this.seacMap[fontCharCode]) {
-        var seac = this.seacMap[fontCharCode];
-        fontCharCode = seac.baseUnicode;
+      if (this.seacMap && this.seacMap[charcode]) {
+        var seac = this.seacMap[charcode];
+        fontCharCode = seac.baseFontCharCode;
         accent = {
-          fontChar: String.fromCharCode(seac.accentUnicode),
+          fontChar: String.fromCharCode(seac.accentFontCharCode),
           offset: seac.accentOffset
         };
       }
@@ -4635,7 +4306,6 @@ var Font = (function FontClosure() {
         accent: accent,
         width: width,
         vmetric: vmetric,
-        disabled: disabled,
         operatorList: operatorList
       };
     },
@@ -4658,23 +4328,12 @@ var Font = (function FontClosure() {
       glyphs = [];
       var charsCacheKey = chars;
 
-      var converter;
-      var cidEncoding = this.cidEncoding;
-      if (cidEncoding) {
-        converter = CMapConverterList[cidEncoding];
-        if (converter) {
-          chars = converter(chars);
-        } else if (cidEncoding.indexOf('Uni') !== 0 &&
-                   cidEncoding.indexOf('Identity-') !== 0) {
-          warn('Unsupported CMap: ' + cidEncoding);
-        }
-      }
-      if (!converter && this.cmap) {
+      if (this.cMap) {
         var i = 0;
         // composite fonts have multi-byte strings convert the string from
         // single-byte to multi-byte
         while (i < chars.length) {
-          var c = this.cmap.readCharCode(chars, i);
+          var c = this.cMap.readCharCode(chars, i);
           var charcode = c[0];
           var length = c[1];
           i += length;
@@ -4686,8 +4345,7 @@ var Font = (function FontClosure() {
             glyphs.push(null);
           }
         }
-      }
-      else {
+      } else {
         for (var i = 0, ii = chars.length; i < ii; ++i) {
           var charcode = chars.charCodeAt(i);
           var glyph = this.charToGlyph(charcode);
@@ -4722,6 +4380,59 @@ var ErrorFont = (function ErrorFontClosure() {
   return ErrorFont;
 })();
 
+/**
+ * Shared logic for building a char code to glyph id mapping for Type1 and
+ * simple CFF fonts. See section 9.6.6.2 of the spec.
+ * @param {Object} properties Font properties object.
+ * @param {Object} builtInEncoding The encoding contained within the actual font
+ * data.
+ * @param {Array} Array of glyph names where the index is the glyph ID.
+ * @returns {Object} A char code to glyph ID map.
+ */
+function type1FontGlyphMapping(properties, builtInEncoding, glyphNames) {
+  var charCodeToGlyphId = Object.create(null);
+  if (properties.baseEncodingName) {
+    // If a valid base encoding name was used, the mapping is initialized with
+    // that.
+    var baseEncoding = Encodings[properties.baseEncodingName];
+    for (var charCode = 0; charCode < baseEncoding.length; charCode++) {
+      var glyphId = glyphNames.indexOf(baseEncoding[charCode]);
+      if (glyphId >= 0) {
+        charCodeToGlyphId[charCode] = glyphId;
+      }
+    }
+  } else if (!!(properties.flags & FontFlags.Symbolic)) {
+    // For a symbolic font the encoding should be the fonts built-in
+    // encoding.
+    for (var charCode in builtInEncoding) {
+      charCodeToGlyphId[charCode] = builtInEncoding[charCode];
+    }
+  } else {
+    // For non-symbolic fonts that don't have a base encoding the standard
+    // encoding should be used.
+    var baseEncoding = Encodings.StandardEncoding;
+    for (var charCode = 0; charCode < baseEncoding.length; charCode++) {
+      var glyphId = glyphNames.indexOf(baseEncoding[charCode]);
+      if (glyphId >= 0) {
+        charCodeToGlyphId[charCode] = glyphId;
+      }
+    }
+  }
+
+  // Lastly, merge in the differences.
+  var differences = properties.differences;
+  if (differences) {
+    for (var charCode in differences) {
+      var glyphName = differences[charCode];
+      var glyphId = glyphNames.indexOf(glyphName);
+      if (glyphId >= 0) {
+        charCodeToGlyphId[charCode] = glyphId;
+      }
+    }
+  }
+  return charCodeToGlyphId;
+}
+
 /*
  * CharStrings are encoded following the the CharString Encoding sequence
  * describe in Chapter 6 of the "Adobe Type1 Font Format" specification.
@@ -5290,11 +5001,11 @@ var Type1Parser = (function Type1ParserClosure() {
           output = [14];
         }
         program.charstrings.push({
-          glyph: glyph,
-          data: output,
-          seac: charString.seac,
+          glyphName: glyph,
+          charstring: output,
+          width: charString.width,
           lsb: charString.lsb,
-          width: charString.width
+          seac: charString.seac
         });
       }
 
@@ -5343,10 +5054,7 @@ var Type1Parser = (function Type1ParserClosure() {
                 this.getToken(); // read the in 'put'
               }
             }
-            if (properties.overridableEncoding && encoding) {
-              properties.baseEncoding = encoding;
-              break;
-            }
+            properties.builtInEncoding = encoding;
             break;
           case 'FontBBox':
             var fontBBox = this.readNumberArray();
@@ -5469,7 +5177,7 @@ var Type1Font = function Type1Font(name, file, properties) {
   for (var info in data.properties)
     properties[info] = data.properties[info];
 
-  var charstrings = this.getOrderedCharStrings(data.charstrings, properties);
+  var charstrings = data.charstrings;
   var type2Charstrings = this.getType2Charstrings(charstrings);
   var subrs = this.getType2Subrs(data.subrs);
 
@@ -5480,35 +5188,37 @@ var Type1Font = function Type1Font(name, file, properties) {
 };
 
 Type1Font.prototype = {
-  getOrderedCharStrings: function Type1Font_getOrderedCharStrings(glyphs,
-                                                            properties) {
-    var charstrings = [];
-    var usedUnicodes = [];
-    var i, length, glyphName;
-    var unusedUnicode = CMAP_GLYPH_OFFSET;
-    for (i = 0, length = glyphs.length; i < length; i++) {
-      var item = glyphs[i];
-      var glyphName = item.glyph;
-      var unicode = glyphName in GlyphsUnicode ?
-        GlyphsUnicode[glyphName] : unusedUnicode++;
-      while (usedUnicodes[unicode]) {
-        unicode = unusedUnicode++;
+  get numGlyphs() {
+    return this.charstrings.length;
+  },
+
+  getCharset: function Type1Font_getCharset() {
+    var charset = ['.notdef'];
+    var charstrings = this.charstrings;
+    for (var glyphId = 0; glyphId < charstrings.length; glyphId++) {
+      charset.push(charstrings[glyphId].glyphName);
+    }
+    return charset;
+  },
+
+  getGlyphMapping: function Type1Font_getGlyphMapping(properties) {
+    var charstrings = this.charstrings;
+    var glyphNames = ['.notdef'];
+    for (var glyphId = 0; glyphId < charstrings.length; glyphId++) {
+      glyphNames.push(charstrings[glyphId].glyphName);
+    }
+    var encoding = properties.builtInEncoding;
+    if (encoding) {
+      var builtInEncoding = {};
+      for (var charCode in encoding) {
+        var glyphId = glyphNames.indexOf(encoding[charCode]);
+        if (glyphId >= 0) {
+          builtInEncoding[charCode] = glyphId;
+        }
       }
-      usedUnicodes[unicode] = true;
-      charstrings.push({
-        glyph: glyphName,
-        unicode: unicode,
-        gid: i,
-        charstring: item.data,
-        width: item.width,
-        lsb: item.lsb
-      });
     }
 
-    charstrings.sort(function charstrings_sort(a, b) {
-      return a.unicode - b.unicode;
-    });
-    return charstrings;
+    return type1FontGlyphMapping(properties, builtInEncoding, glyphNames);
   },
 
   getSeacs: function Type1Font_getSeacs(charstrings) {
@@ -5517,7 +5227,8 @@ Type1Font.prototype = {
     for (i = 0, ii = charstrings.length; i < ii; i++) {
       var charstring = charstrings[i];
       if (charstring.seac) {
-        seacMap[i] = charstring.seac;
+        // Offset by 1 for .notdef
+        seacMap[i + 1] = charstring.seac;
       }
     }
     return seacMap;
@@ -5589,11 +5300,11 @@ Type1Font.prototype = {
     var count = glyphs.length;
     var charsetArray = [0];
     for (var i = 0; i < count; i++) {
-      var index = CFFStandardStrings.indexOf(charstrings[i].glyph);
-      // Some characters like asterikmath && circlecopyrt are
-      // missing from the original strings, for the moment let's
-      // map them to .notdef and see later if it cause any
-      // problems
+      var index = CFFStandardStrings.indexOf(charstrings[i].glyphName);
+      // TODO: Insert the string and correctly map it.  Previously it was
+      // thought mapping names that aren't in the standard strings to .notdef
+      // was fine, however in issue818 when mapping them all to .notdef the
+      // adieresis glyph no longer worked.
       if (index == -1)
         index = 0;
 
@@ -5660,7 +5371,7 @@ var CFFFont = (function CFFFontClosure() {
     var parser = new CFFParser(file, properties);
     this.cff = parser.parse();
     var compiler = new CFFCompiler(this.cff);
-    this.readExtra();
+    this.seacs = this.cff.seacs;
     try {
       this.data = compiler.compile();
     } catch (e) {
@@ -5672,111 +5383,38 @@ var CFFFont = (function CFFFontClosure() {
   }
 
   CFFFont.prototype = {
-    readExtra: function CFFFont_readExtra() {
-      // charstrings contains info about glyphs (one element per glyph
-      // containing mappings for {unicode, width})
-      var charstrings = this.getCharStrings();
-
-      // create the mapping between charstring and glyph id
-      var glyphIds = [];
-      for (var i = 0, ii = charstrings.length; i < ii; i++)
-        glyphIds.push(charstrings[i].gid);
-
-      this.charstrings = charstrings;
-      this.glyphIds = glyphIds;
-      this.seacs = this.cff.seacs;
+    get numGlyphs() {
+      return this.cff.charStrings.count;
+    },
+    getCharset: function CFFFont_getCharset() {
+      return this.cff.charset.charset;
     },
-    getCharStrings: function CFFFont_getCharStrings() {
+    getGlyphMapping: function CFFFont_getGlyphMapping() {
       var cff = this.cff;
       var charsets = cff.charset.charset;
-      var encoding = cff.encoding ? cff.encoding.encoding : null;
-      var charstrings = [];
-      var unicodeUsed = [];
-      var unassignedUnicodeItems = [];
-      var inverseEncoding = [];
-      var gidStart = 0;
-      if (charsets[0] === '.notdef') {
-        gidStart = 1;
-      }
-      // According to section 9.7.4.2 CIDFontType0C glyph selection should be
-      // handled differently.
-      if (this.properties.subtype === 'CIDFontType0C') {
+      var charCodeToGlyphId = Object.create(null);
+
+      if (this.properties.composite) {
         if (this.cff.isCIDFont) {
           // If the font is actually a CID font then we should use the charset
           // to map CIDs to GIDs.
-          inverseEncoding = charsets;
+          for (var glyphId = 0; glyphId < charsets.length; glyphId++) {
+            var cidString = String.fromCharCode(charsets[glyphId]);
+            var charCode = this.properties.cMap.map.indexOf(cidString);
+            charCodeToGlyphId[charCode] = glyphId;
+          }
         } else {
           // If it is NOT actually a CID font then CIDs should be mapped
           // directly to GIDs.
-          inverseEncoding = [];
-          for (var i = 0, ii = cff.charStrings.count; i < ii; i++) {
-            inverseEncoding.push(i);
-          }
-          // Use the identity map for charsets as well.
-          charsets = inverseEncoding;
-        }
-      } else {
-        for (var charcode in encoding) {
-          var gid = encoding[charcode];
-          if (gid in inverseEncoding) {
-            // Glyphs can be multiply-encoded if there was an encoding
-            // supplement. Convert to an array and append the charcode.
-            var previousCharcode = inverseEncoding[gid];
-            if (!isArray(previousCharcode)) {
-              inverseEncoding[gid] = [previousCharcode];
-            }
-            inverseEncoding[gid].push(charcode | 0);
-          } else {
-            inverseEncoding[gid] = charcode | 0;
+          for (var glyphId = 0; glyphId < cff.charStrings.count; glyphId++) {
+            charCodeToGlyphId[glyphId] = glyphId;
           }
         }
+        return charCodeToGlyphId;
       }
 
-      for (var i = gidStart, ii = charsets.length; i < ii; i++) {
-        var glyph = charsets[i];
-
-        var codes = inverseEncoding[i];
-        if (!isArray(codes)) {
-          codes = [codes];
-        }
-
-        for (var j = 0; j < codes.length; j++) {
-          var code = codes[j];
-
-          if (!code || isSpecialUnicode(code)) {
-            unassignedUnicodeItems.push(i, code);
-            continue;
-          }
-          charstrings.push({
-            unicode: code,
-            code: code,
-            gid: i,
-            glyph: glyph
-          });
-          unicodeUsed[code] = true;
-        }
-      }
-
-      var nextUnusedUnicode = CMAP_GLYPH_OFFSET;
-      for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j += 2) {
-        var i = unassignedUnicodeItems[j];
-        // giving unicode value anyway
-        while (nextUnusedUnicode in unicodeUsed)
-          nextUnusedUnicode++;
-        var unicode = nextUnusedUnicode++;
-        charstrings.push({
-          unicode: unicode,
-          code: unassignedUnicodeItems[j + 1] || 0,
-          gid: i,
-          glyph: charsets[i]
-        });
-      }
-
-      // sort the array by the unicode value (again)
-      charstrings.sort(function getCharStringsSort(a, b) {
-        return a.unicode - b.unicode;
-      });
-      return charstrings;
+      var encoding = cff.encoding ? cff.encoding.encoding : null;
+      return type1FontGlyphMapping(this.properties, encoding, charsets);
     }
   };
 
@@ -7163,6 +6801,6 @@ var CFFCompiler = (function CFFCompilerClosure() {
 // https://github.com/mozilla/pdf.js/issues/1689
 (function checkChromeWindows() {
   if (/Windows.*Chrome/.test(navigator.userAgent)) {
-    SYMBOLIC_FONT_GLYPH_OFFSET = 0xF100;
+    SKIP_PRIVATE_USE_RANGE_F000_TO_F01F = true;
   }
 })();
diff --git a/src/core/worker.js b/src/core/worker.js
index b72d545..307e184 100644
--- a/src/core/worker.js
+++ b/src/core/worker.js
@@ -239,6 +239,8 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
       PDFJS.disableFontFace = data.disableFontFace;
       PDFJS.disableCreateObjectURL = data.disableCreateObjectURL;
       PDFJS.verbosity = data.verbosity;
+      PDFJS.cMapUrl = data.cMapUrl === undefined ?
+                           null : data.cMapUrl;
 
       getPdfManager(data).then(function () {
         pdfManager.onLoadedStream().then(function(stream) {
diff --git a/src/display/api.js b/src/display/api.js
index 74f0bec..ece5816 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -30,6 +30,13 @@
 PDFJS.maxImageSize = PDFJS.maxImageSize === undefined ? -1 : PDFJS.maxImageSize;
 
 /**
+ * The url of where the predefined Adobe CMaps are located. Include trailing
+ * slash.
+ * @var {string}
+ */
+PDFJS.cMapUrl = PDFJS.cMapUrl === undefined ? null : PDFJS.cMapUrl;
+
+/*
  * By default fonts are converted to OpenType fonts and loaded via font face
  * rules. If disabled, the font will be rendered using a built in font renderer
  * that constructs the glyphs with primitive path commands.
@@ -917,6 +924,7 @@ var WorkerTransport = (function WorkerTransportClosure() {
         source: source,
         disableRange: PDFJS.disableRange,
         maxImageSize: PDFJS.maxImageSize,
+        cMapUrl: PDFJS.cMapUrl,
         disableFontFace: PDFJS.disableFontFace,
         disableCreateObjectURL: PDFJS.disableCreateObjectURL,
         verbosity: PDFJS.verbosity
diff --git a/src/display/canvas.js b/src/display/canvas.js
index 7381aa6..e1348a4 100644
--- a/src/display/canvas.js
+++ b/src/display/canvas.js
@@ -1383,38 +1383,37 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
           var accent = glyph.accent;
 
           var scaledX, scaledY, scaledAccentX, scaledAccentY;
-          if (!glyph.disabled) {
-            if (vertical) {
-              scaledX = vx / fontSizeScale;
-              scaledY = (x + vy) / fontSizeScale;
-            } else {
-              scaledX = x / fontSizeScale;
-              scaledY = 0;
-            }
 
-            if (font.remeasure && width > 0 && this.isFontSubpixelAAEnabled) {
-              // some standard fonts may not have the exact width, trying to
-              // rescale per character
-              var measuredWidth = ctx.measureText(character).width * 1000 /
-                current.fontSize * current.fontSizeScale;
-              var characterScaleX = width / measuredWidth;
-              restoreNeeded = true;
-              ctx.save();
-              ctx.scale(characterScaleX, 1);
-              scaledX /= characterScaleX;
-              if (accent) {
-                scaledAccentX /= characterScaleX;
-              }
-            }
+          if (vertical) {
+            scaledX = vx / fontSizeScale;
+            scaledY = (x + vy) / fontSizeScale;
+          } else {
+            scaledX = x / fontSizeScale;
+            scaledY = 0;
+          }
 
-            this.paintChar(character, scaledX, scaledY);
+          if (font.remeasure && width > 0 && this.isFontSubpixelAAEnabled) {
+            // some standard fonts may not have the exact width, trying to
+            // rescale per character
+            var measuredWidth = ctx.measureText(character).width * 1000 /
+              current.fontSize * current.fontSizeScale;
+            var characterScaleX = width / measuredWidth;
+            restoreNeeded = true;
+            ctx.save();
+            ctx.scale(characterScaleX, 1);
+            scaledX /= characterScaleX;
             if (accent) {
-              scaledAccentX = scaledX + accent.offset.x / fontSizeScale;
-              scaledAccentY = scaledY - accent.offset.y / fontSizeScale;
-              this.paintChar(accent.fontChar, scaledAccentX, scaledAccentY);
+              scaledAccentX /= characterScaleX;
             }
           }
 
+          this.paintChar(character, scaledX, scaledY);
+          if (accent) {
+            scaledAccentX = scaledX + accent.offset.x / fontSizeScale;
+            scaledAccentY = scaledY - accent.offset.y / fontSizeScale;
+            this.paintChar(accent.fontChar, scaledAccentX, scaledAccentY);
+          }
+
           x += charWidth;
 
           canvasWidth += charWidth;
diff --git a/test/driver.js b/test/driver.js
index 3d69dc4..464eb2a 100644
--- a/test/driver.js
+++ b/test/driver.js
@@ -28,6 +28,7 @@
 //   "firefox-bin: Fatal IO error 12 (Cannot allocate memory) on X server :1."
 // PDFJS.disableWorker = true;
 PDFJS.enableStats = true;
+PDFJS.cMapUrl = '../external/cmaps/';
 
 var appPath, masterMode, browser, canvas, dummyCanvas, currentTaskIdx,
     manifest, stdout;
diff --git a/test/font/font_fpgm_spec.js b/test/font/font_fpgm_spec.js
index 088cfa0..c040899 100644
--- a/test/font/font_fpgm_spec.js
+++ b/test/font/font_fpgm_spec.js
@@ -9,7 +9,9 @@ describe('font_fpgm', function() {
       var font = new Font("font", new Stream(font2324), {
         loadedName: 'font',
         type: 'CIDFontType2',
-        differences: []
+        differences: [],
+        defaultEncoding: [],
+        cMap: CMapFactory.create(new Name('Identity-H'))
       });
       ttx(font.data, function(result) { output = result; });
       runs(function() {
diff --git a/test/font/font_os2_spec.js b/test/font/font_os2_spec.js
index 320881b..70e87d0 100644
--- a/test/font/font_os2_spec.js
+++ b/test/font/font_os2_spec.js
@@ -11,7 +11,7 @@ describe('font_post', function() {
         loadedName: 'font',
         type: 'TrueType',
         differences: [],
-        baseEncoding: []
+        defaultEncoding: []
       });
       ttx(font.data, function(result) { output = result; });
       runs(function() {
@@ -26,7 +26,9 @@ describe('font_post', function() {
       var font = new Font("font", new Stream(font1282), {
         loadedName: 'font',
         type: 'CIDFontType2',
-        differences: []
+        differences: [],
+        defaultEncoding: [],
+        cMap: CMapFactory.create(new Name('Identity-H'))
       });
       ttx(font.data, function(result) { output = result; });
       runs(function() {
diff --git a/test/font/font_post_spec.js b/test/font/font_post_spec.js
index acd07b8..3de7209 100644
--- a/test/font/font_post_spec.js
+++ b/test/font/font_post_spec.js
@@ -12,7 +12,8 @@ describe('font_post', function() {
         loadedName: 'font',
         type: 'CIDFontType2',
         differences: [],
-        baseEncoding: []
+        defaultEncoding: [],
+        cMap: CMapFactory.create(new Name('Identity-H'))
       });
       ttx(font.data, function(result) { output = result; });
       runs(function() {
@@ -28,7 +29,7 @@ describe('font_post', function() {
         loadedName: 'font',
         type: 'TrueType',
         differences: [],
-        baseEncoding: []
+        defaultEncoding: []
       });
       ttx(font.data, function(result) { output = result; });
       runs(function() {
@@ -44,7 +45,7 @@ describe('font_post', function() {
         loadedName: 'font',
         type: 'TrueType',
         differences: [],
-        baseEncoding: []
+        defaultEncoding: []
       });
       ttx(font.data, function(result) { output = result; });
       runs(function() {
diff --git a/test/font/font_test.html b/test/font/font_test.html
index 4fc43a7..12f1819 100644
--- a/test/font/font_test.html
+++ b/test/font/font_test.html
@@ -28,6 +28,7 @@
   <script type="text/javascript" src="../../src/core/crypto.js"></script>
   <script type="text/javascript" src="../../src/core/pattern.js"></script>
   <script type="text/javascript" src="../../src/core/evaluator.js"></script>
+  <script type="text/javascript" src="../../src/core/cmap.js"></script>
   <script type="text/javascript" src="../../src/core/fonts.js"></script>
   <script type="text/javascript" src="../../src/core/glyphlist.js"></script>
   <script type="text/javascript" src="../../src/core/image.js"></script>
diff --git a/test/test.py b/test/test.py
index 51f1d61..8ddbf13 100644
--- a/test/test.py
+++ b/test/test.py
@@ -220,6 +220,11 @@ class TestHandlerBase(BaseHTTPRequestHandler):
             self.sendIndex(url.path, url.query)
             return
 
+        pieces = path.split(os.sep);
+        if pieces[len(pieces) - 2] == 'cmaps':
+            self.sendFile(path, '.properties');
+            return
+
         if not (prefix == DOC_ROOT
                 and os.path.isfile(path)
                 and ext in MIMEs):
diff --git a/test/unit/cmap_spec.js b/test/unit/cmap_spec.js
index be0e412..6d8350b 100644
--- a/test/unit/cmap_spec.js
+++ b/test/unit/cmap_spec.js
@@ -1,6 +1,6 @@
 /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
-/* globals expect, it, describe, StringStream, Lexer, CMapFactory */
+/* globals expect, it, describe, StringStream, Lexer, CMapFactory, Name */
 
 'use strict';
 
@@ -82,5 +82,22 @@ describe('cmap', function() {
     expect(c[0]).toEqual(0x8EA1A1A1);
     expect(c[1]).toEqual(4);
   });
+  it('read usecmap', function() {
+    var str = '/Adobe-Japan1-1 usecmap\n';
+    var stream = new StringStream(str);
+    var cmap = CMapFactory.create(stream, null, '../../external/cmaps/');
+    expect(cmap.useCMap).toBeDefined();
+  });
+  it('parses wmode', function() {
+    var str = '/WMode 1 def\n';
+    var stream = new StringStream(str);
+    var cmap = CMapFactory.create(stream);
+    expect(cmap.vertical).toEqual(true);
+  });
+  it('loads built in cmap', function() {
+    var cmap = CMapFactory.create(new Name('Adobe-Japan1-1'),
+                                  '../../external/cmaps/',
+                                  null);
+  });
 });
 
diff --git a/test/unit/font_spec.js b/test/unit/font_spec.js
index 4efccb9..aa50cc5 100644
--- a/test/unit/font_spec.js
+++ b/test/unit/font_spec.js
@@ -386,7 +386,7 @@ describe('font', function() {
       var parser = new Type1Parser(stream);
       var props = { overridableEncoding: true };
       var program = parser.extractFontHeader(props);
-      expect(props.baseEncoding[33]).toEqual('arrowright');
+      expect(props.builtInEncoding[33]).toEqual('arrowright');
     });
   });
 });
diff --git a/web/viewer.js b/web/viewer.js
index a7b4d25..a1dc43a 100644
--- a/web/viewer.js
+++ b/web/viewer.js
@@ -62,6 +62,11 @@ PDFJS.imageResourcesPath = './images/';
 //#if (FIREFOX || MOZCENTRAL || B2G || GENERIC || CHROME)
 //PDFJS.workerSrc = '../build/pdf.worker.js';
 //#endif
+//#if !PRODUCTION
+PDFJS.cMapUrl = '../external/cmaps/';
+//#else
+//PDFJS.cMapUrl = '../web/cmaps/';
+//#endif
 
 var mozL10n = document.mozL10n || document.webL10n;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list