[Pkg-javascript-commits] [pdf.js] 85/115: Fix the `charCodeOf` method in `IdentityToUnicodeMap` in order to prevent text selection from breaking

David Prévot taffit at moszumanska.debian.org
Wed Dec 16 20:03:19 UTC 2015


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 4810b7b8fc69780ed3e9d181faa5228f98a14637
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date:   Sat Dec 5 12:22:09 2015 +0100

    Fix the `charCodeOf` method in `IdentityToUnicodeMap` in order to prevent text selection from breaking
    
    After PR 6590, `font.spaceWidth` is now called in more cases than before (in `PartialEvaluator_getTextContent`), which exposed an underlying issue with `IdentityToUnicodeMap_charCodeOf` throwing an error.
    This breaks text-selection in some PDF files found in the wild, hence this patch replaces the `error` with an actual function instead (modelled after `IdentityCMap_charCodeOf`).
---
 src/core/fonts.js                             |  6 +--
 test/pdfs/.gitignore                          |  1 +
 test/pdfs/IdentityToUnicodeMap_charCodeOf.pdf | 68 +++++++++++++++++++++++++++
 test/test_manifest.json                       |  7 +++
 4 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/src/core/fonts.js b/src/core/fonts.js
index a13a7ea..45a984e 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -15,7 +15,7 @@
 /* globals FONT_IDENTITY_MATRIX, FontType, warn, GlyphsUnicode, error, string32,
            readUint32, Stream, FontRendererFactory, shadow, stringToBytes,
            bytesToString, info, assert, IdentityCMap, Name, CMapFactory, PDFJS,
-           isNum, Lexer, isArray, ISOAdobeCharset, ExpertCharset,
+           isNum, Lexer, isArray, ISOAdobeCharset, ExpertCharset, isInt,
            ExpertSubsetCharset, Util, DingbatsGlyphsUnicode */
 
 'use strict';
@@ -2256,7 +2256,7 @@ var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() {
     },
 
     charCodeOf: function (v) {
-      error('should not call .charCodeOf');
+      return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1;
     }
   };
 
@@ -4700,7 +4700,7 @@ var Font = (function FontClosure() {
           }
         }
         // ... via toUnicode map
-        if (!charcode && 'toUnicode' in this) {
+        if (!charcode && this.toUnicode) {
           charcode = this.toUnicode.charCodeOf(glyphUnicode);
         }
         // setting it to unicode if negative or undefined
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index 49ec4be..35dac7b 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -179,6 +179,7 @@
 !issue5549.pdf
 !issue5475.pdf
 !annotation-border-styles.pdf
+!IdentityToUnicodeMap_charCodeOf.pdf
 !issue5481.pdf
 !issue5567.pdf
 !issue5701.pdf
diff --git a/test/pdfs/IdentityToUnicodeMap_charCodeOf.pdf b/test/pdfs/IdentityToUnicodeMap_charCodeOf.pdf
new file mode 100644
index 0000000..01daa8b
--- /dev/null
+++ b/test/pdfs/IdentityToUnicodeMap_charCodeOf.pdf
@@ -0,0 +1,68 @@
+%PDF-1.7
+%����
+1 0 obj 
+<<
+/Pages 2 0 R
+/Type /Catalog
+>>
+endobj 
+2 0 obj 
+<<
+/Kids [3 0 R]
+/Count 1
+/Type /Pages
+>>
+endobj 
+3 0 obj 
+<<
+/Parent 2 0 R
+/MediaBox [0 0 200 50]
+/Resources 
+<<
+/Font 
+<<
+/F1 4 0 R
+>>
+>>
+/Contents 5 0 R
+/Type /Page
+>>
+endobj 
+4 0 obj 
+<<
+/BaseFont /Times-Roman
+/Subtype /Type1
+/ToUnicode /Identity-H
+/Encoding /WinAnsiEncoding
+/Type /Font
+>>
+endobj 
+5 0 obj 
+<<
+/Length 37
+>>
+stream
+BT
+10 20 TD
+/F1 20 Tf
+(ABCdef) Tj
+ET
+
+endstream 
+endobj xref
+0 6
+0000000000 65535 f 
+0000000015 00000 n 
+0000000066 00000 n 
+0000000125 00000 n 
+0000000254 00000 n 
+0000000378 00000 n 
+trailer
+
+<<
+/Root 1 0 R
+/Size 6
+>>
+startxref
+467
+%%EOF
diff --git a/test/test_manifest.json b/test/test_manifest.json
index 1ead7b4..3e04c71 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -1453,6 +1453,13 @@
       "lastPage": 1,
       "type": "eq"
     },
+    {  "id": "IdentityToUnicodeMap_charCodeOf",
+       "file": "pdfs/IdentityToUnicodeMap_charCodeOf.pdf",
+       "md5": "da030686418c5e37d889127a05dafb83",
+       "rounds": 1,
+       "link": false,
+       "type": "text"
+    },
     {  "id": "bug894572",
        "file": "pdfs/bug894572.pdf",
        "md5": "e54a6b0451939f685ed37e3d46e16158",

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list