[Pkg-javascript-commits] [pdf.js] 39/109: Make `XRef_indexObjects` more robust against bad PDF files (issue 5752)
David Prévot
taffit at moszumanska.debian.org
Fri Sep 25 03:04:15 UTC 2015
This is an automated email from the git hooks/post-receive script.
taffit pushed a commit to branch master
in repository pdf.js.
commit 56a43a3181e4670c75ce977cea851a648e6c1fb5
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date: Fri Aug 21 16:57:08 2015 +0200
Make `XRef_indexObjects` more robust against bad PDF files (issue 5752)
This patch improves the detection of `xref` in files where it is followed by an arbitrary whitespace character (not just a line-breaking char).
It also adds a check for missing whitespace, e.g. `1 0 obj<<`, to speed up `readToken` for the PDF file in the referenced issue.
Finally, the patch also replaces a bunch of magic numbers with suitably named constants.
Fixes 5752.
Also improves 6243, but there are still issues.
---
src/core/obj.js | 17 ++++++++++++-----
test/pdfs/issue5752.pdf.link | 1 +
test/test_manifest.json | 9 +++++++++
3 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/src/core/obj.js b/src/core/obj.js
index a8bd5d3..161e8ca 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -1013,9 +1013,12 @@ var XRef = (function XRefClosure() {
indexObjects: function XRef_indexObjects() {
// Simple scan through the PDF content to find objects,
// trailers and XRef streams.
+ var TAB = 0x9, LF = 0xA, CR = 0xD, SPACE = 0x20;
+ var PERCENT = 0x25, LT = 0x3C;
+
function readToken(data, offset) {
var token = '', ch = data[offset];
- while (ch !== 13 && ch !== 10) {
+ while (ch !== LF && ch !== CR && ch !== LT) {
if (++offset >= data.length) {
break;
}
@@ -1047,6 +1050,9 @@ var XRef = (function XRefClosure() {
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
+ // Clear out any existing entries, since they may be bogus.
+ this.entries.length = 0;
+
var stream = this.stream;
stream.pos = 0;
var buffer = stream.getBytes();
@@ -1054,23 +1060,24 @@ var XRef = (function XRefClosure() {
var trailers = [], xrefStms = [];
while (position < length) {
var ch = buffer[position];
- if (ch === 32 || ch === 9 || ch === 13 || ch === 10) {
+ if (ch === TAB || ch === LF || ch === CR || ch === SPACE) {
++position;
continue;
}
- if (ch === 37) { // %-comment
+ if (ch === PERCENT) { // %-comment
do {
++position;
if (position >= length) {
break;
}
ch = buffer[position];
- } while (ch !== 13 && ch !== 10);
+ } while (ch !== LF && ch !== CR);
continue;
}
var token = readToken(buffer, position);
var m;
- if (token === 'xref') {
+ if (token.indexOf('xref') === 0 &&
+ (token.length === 4 || /\s/.test(token[4]))) {
position += skipUntil(buffer, position, trailerBytes);
trailers.push(position);
position += skipUntil(buffer, position, startxrefBytes);
diff --git a/test/pdfs/issue5752.pdf.link b/test/pdfs/issue5752.pdf.link
new file mode 100644
index 0000000..0af8c7b
--- /dev/null
+++ b/test/pdfs/issue5752.pdf.link
@@ -0,0 +1 @@
+http://web.archive.org/web/20150821144004/http://222.247.54.152/Fulltext/qkyxlcyjy200504007.pdf
diff --git a/test/test_manifest.json b/test/test_manifest.json
index 9b6a9e8..220074c 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -1071,6 +1071,15 @@
"lastPage": 1,
"type": "eq"
},
+ { "id": "issue5752",
+ "file": "pdfs/issue5752.pdf",
+ "md5": "aa20ad7cff71e9481c0cd623ddbff3b7",
+ "rounds": 1,
+ "link": true,
+ "firstPage": 1,
+ "lastPage": 1,
+ "type": "eq"
+ },
{ "id": "issue2931",
"file": "pdfs/issue2931.pdf",
"md5": "ea40940eaf3541b312bda9329167da11",
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git
More information about the Pkg-javascript-commits
mailing list