[Pkg-javascript-commits] [pdf.js] 35/139: fixes caching of inline images during parsing.

David Prévot taffit at moszumanska.debian.org
Fri Jan 9 21:18:24 UTC 2015


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 970c048d50335658f1646b88acd2bf17d9c88230
Author: Fabian Lange <lange.fabian at gmail.com>
Date:   Sun Oct 26 17:03:44 2014 +0100

    fixes caching of inline images during parsing.
    
    As described in #5444, the evaluator will perform identity checking of
    paintImageMaskXObjects to decide if it can use
    paintImageMaskXObjectRepeat instead of paintImageMaskXObjectGroup.
    
    This can only ever work if the entry is a cache hit. However the
    previous caching implementation was doing a lazy caching, which would
    only consider a image cache worthy if it is repeated.
    Only then the repeated instance would be cached.
    As a result of this the sequence of identical images A1 A2 A3 A4 would
    be seen as A1 A2 A2 A2 by the evaluator, which prevents using the
    "repeat" optimization. Also only the last encountered image is cached,
    so A1 B1 A2 B2, would stay A1 B1 A2 B2.
    
    The new implementation drops the "lazy" init of the cache. The threshold
    for enabling an image to be cached is rather small, so the potential waste
    in storage and adler32 calculation is rather low. It also caches any
    eligible image by its adler32.
    
    The two example from above would now be A1 A1 A1 A1 and A1 B1 A1 B1
    which not only saves temporary storage, but also prevents computing
    identical masks over and over again (which is the main performance impact
    of #2618)
---
 src/core/evaluator.js | 31 ++++++++++++++++++-------------
 src/core/parser.js    | 40 ++++++++++++++++------------------------
 2 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index 753d248..fb0d746 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -171,7 +171,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
     buildPaintImageXObject:
         function PartialEvaluator_buildPaintImageXObject(resources, image,
                                                          inline, operatorList,
-                                                         cacheKey, cache) {
+                                                         cacheKey, imageCache) {
       var self = this;
       var dict = image.dict;
       var w = dict.get('Width', 'W');
@@ -209,9 +209,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         args = [imgData];
         operatorList.addOp(OPS.paintImageMaskXObject, args);
         if (cacheKey) {
-          cache.key = cacheKey;
-          cache.fn = OPS.paintImageMaskXObject;
-          cache.args = args;
+          imageCache[cacheKey] = {
+            fn: OPS.paintImageMaskXObject,
+            args: args
+          };
         }
         return;
       }
@@ -260,9 +261,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 
       operatorList.addOp(OPS.paintImageXObject, args);
       if (cacheKey) {
-        cache.key = cacheKey;
-        cache.fn = OPS.paintImageXObject;
-        cache.args = args;
+        imageCache[cacheKey] = {
+          fn: OPS.paintImageXObject,
+          args: args
+        };
       }
     },
 
@@ -656,8 +658,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
               }
               // eagerly compile XForm objects
               var name = args[0].name;
-              if (imageCache.key === name) {
-                operatorList.addOp(imageCache.fn, imageCache.args);
+              if (imageCache[name] !== undefined) {
+                operatorList.addOp(imageCache[name].fn, imageCache[name].args);
                 args = null;
                 continue;
               }
@@ -706,10 +708,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
                 }, reject);
             case OPS.endInlineImage:
               var cacheKey = args[0].cacheKey;
-              if (cacheKey && imageCache.key === cacheKey) {
-                operatorList.addOp(imageCache.fn, imageCache.args);
-                args = null;
-                continue;
+              if (cacheKey) {
+                var cacheEntry = imageCache[cacheKey];
+                if (cacheEntry !== undefined) {
+                  operatorList.addOp(cacheEntry.fn, cacheEntry.args);
+                  args = null;
+                  continue;
+                }
               }
               self.buildPaintImageXObject(resources, args[0], true,
                 operatorList, cacheKey, imageCache);
diff --git a/src/core/parser.js b/src/core/parser.js
index 73eb756..f039ac3 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -28,16 +28,14 @@ function isEOF(v) {
   return (v === EOF);
 }
 
+var MAX_LENGTH_TO_CACHE = 1000;
+
 var Parser = (function ParserClosure() {
   function Parser(lexer, allowStreams, xref) {
     this.lexer = lexer;
     this.allowStreams = allowStreams;
     this.xref = xref;
-    this.imageCache = {
-      length: 0,
-      adler32: 0,
-      stream: null
-    };
+    this.imageCache = {};
     this.refill();
   }
 
@@ -185,34 +183,29 @@ var Parser = (function ParserClosure() {
       var length = (stream.pos - 4) - startPos;
       var imageStream = stream.makeSubStream(startPos, length, dict);
 
-      // trying to cache repeat images, first we are trying to "warm up" caching
-      // using length, then comparing adler32
-      var MAX_LENGTH_TO_CACHE = 1000;
-      var cacheImage = false, adler32;
-      if (length < MAX_LENGTH_TO_CACHE && this.imageCache.length === length) {
+      // cache all images below the MAX_LENGTH_TO_CACHE threshold by their
+      // adler32 checksum.
+      var adler32;
+      if (length < MAX_LENGTH_TO_CACHE) {
         var imageBytes = imageStream.getBytes();
         imageStream.reset();
 
         var a = 1;
         var b = 0;
         for (i = 0, ii = imageBytes.length; i < ii; ++i) {
-          a = (a + (imageBytes[i] & 0xff)) % 65521;
-          b = (b + a) % 65521;
+          // no modulo required in the loop if imageBytes.length < 5552
+          a += imageBytes[i] & 0xff;
+          b += a;
         }
-        adler32 = (b << 16) | a;
+        adler32 = ((b % 65521) << 16) | (a % 65521);
 
-        if (this.imageCache.stream && this.imageCache.adler32 === adler32) {
+        if (this.imageCache.adler32 === adler32) {
           this.buf2 = Cmd.get('EI');
           this.shift();
 
-          this.imageCache.stream.reset();
-          return this.imageCache.stream;
+          this.imageCache[adler32].reset();
+          return this.imageCache[adler32];
         }
-        cacheImage = true;
-      }
-      if (!cacheImage && !this.imageCache.stream) {
-        this.imageCache.length = length;
-        this.imageCache.stream = null;
       }
 
       if (cipherTransform) {
@@ -221,10 +214,9 @@ var Parser = (function ParserClosure() {
 
       imageStream = this.filter(imageStream, dict, length);
       imageStream.dict = dict;
-      if (cacheImage) {
+      if (adler32 !== undefined) {
         imageStream.cacheKey = 'inline_' + length + '_' + adler32;
-        this.imageCache.adler32 = adler32;
-        this.imageCache.stream = imageStream;
+        this.imageCache[adler32] = imageStream;
       }
 
       this.buf2 = Cmd.get('EI');

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list