[Pkg-javascript-commits] [pdf.js] 15/161: Estimate the size of decoded streams in advance.

David Prévot taffit at moszumanska.debian.org
Sat Apr 19 14:16:17 UTC 2014


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit b3024db67708eee5b9106708406deae87f525eda
Author: Nicholas Nethercote <nnethercote at mozilla.com>
Date:   Mon Mar 10 22:18:30 2014 -0700

    Estimate the size of decoded streams in advance.
    
    When decoding a stream, the decode buffer is often grown multiple times, its
    byte size increasing like so: 512, 1024, 2048, etc. This patch estimates the
    minimum size in advance (using the length of the encoded stream), often
    allowing the smaller sizes to be skipped. It also renames numerous |length|
    variables as |maybeLength| to make it clear that they can be |null|.
    
    I measured this change on eight documents. This change reduces the cumulative
    size of decode buffer allocations by 0--32%, with 10--20% being typical. This
    reduces peak RSS by 10 or 20 MiB for several of them.
---
 src/core/crypto.js       |  4 +--
 src/core/evaluator.js    | 21 +++++++------
 src/core/parser.js       | 36 +++++++++++----------
 src/core/stream.js       | 82 ++++++++++++++++++++++++++++++------------------
 test/unit/stream_spec.js |  2 +-
 5 files changed, 85 insertions(+), 60 deletions(-)

diff --git a/src/core/crypto.js b/src/core/crypto.js
index cc54f6e..dca99a0 100644
--- a/src/core/crypto.js
+++ b/src/core/crypto.js
@@ -431,9 +431,9 @@ var CipherTransform = (function CipherTransformClosure() {
     this.streamCipherConstructor = streamCipherConstructor;
   }
   CipherTransform.prototype = {
-    createStream: function CipherTransform_createStream(stream) {
+    createStream: function CipherTransform_createStream(stream, length) {
       var cipher = new this.streamCipherConstructor();
-      return new DecryptStream(stream,
+      return new DecryptStream(stream, length,
         function cipherTransformDecryptStream(data, finalize) {
           return cipher.decryptBlock(data, finalize);
         }
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index 2cf00d0..17ed7f3 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -643,7 +643,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       var preprocessor = new EvaluatorPreprocessor(stream, xref);
       var res = resources;
 
-      var chunk = '';
+      var chunkBuf = [];
       var font = null;
       var charSpace = 0, wordSpace = 0;
       var operation;
@@ -692,37 +692,37 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
               var items = args[0];
               for (var j = 0, jj = items.length; j < jj; j++) {
                 if (typeof items[j] === 'string') {
-                  chunk += fontCharsToUnicode(items[j], font);
+                  chunkBuf.push(fontCharsToUnicode(items[j], font));
                 } else if (items[j] < 0 && font.spaceWidth > 0) {
                   var fakeSpaces = -items[j] / font.spaceWidth;
                   if (fakeSpaces > MULTI_SPACE_FACTOR) {
                     fakeSpaces = Math.round(fakeSpaces);
                     while (fakeSpaces--) {
-                      chunk += ' ';
+                      chunkBuf.push(' ');
                     }
                   } else if (fakeSpaces > SPACE_FACTOR) {
-                    chunk += ' ';
+                    chunkBuf.push(' ');
                   }
                 }
               }
               break;
             case OPS.showText:
-              chunk += fontCharsToUnicode(args[0], font);
+              chunkBuf.push(fontCharsToUnicode(args[0], font));
               break;
             case OPS.nextLineShowText:
               // For search, adding a extra white space for line breaks would be
               // better here, but that causes too much spaces in the
               // text-selection divs.
-              chunk += fontCharsToUnicode(args[0], font);
+              chunkBuf.push(fontCharsToUnicode(args[0], font));
               break;
             case OPS.nextLineSetSpacingShowText:
               // Note comment in "'"
-              chunk += fontCharsToUnicode(args[2], font);
+              chunkBuf.push(fontCharsToUnicode(args[2], font));
               break;
             case OPS.paintXObject:
               // Set the chunk such that the following if won't add something
               // to the state.
-              chunk = '';
+              chunkBuf.length = 0;
 
               if (args[0].code) {
                 break;
@@ -771,7 +771,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
               break;
           } // switch
 
-          if (chunk !== '') {
+          if (chunkBuf.length > 0) {
+            var chunk = chunkBuf.join('');
             var bidiResult = PDFJS.bidi(chunk, -1, font.vertical);
             var bidiText = {
               str: bidiResult.str,
@@ -793,7 +794,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
             bidiText.size = fontHeight;
             bidiTexts.push(bidiText);
 
-            chunk = '';
+            chunkBuf.length = 0;
           }
       } // while
 
diff --git a/src/core/parser.js b/src/core/parser.js
index 28440eb..5d7aba4 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -170,7 +170,7 @@ var Parser = (function ParserClosure() {
       var length = (stream.pos - 4) - startPos;
       var imageStream = stream.makeSubStream(startPos, length, dict);
       if (cipherTransform)
-        imageStream = cipherTransform.createStream(imageStream);
+        imageStream = cipherTransform.createStream(imageStream, length);
       imageStream = this.filter(imageStream, dict, length);
       imageStream.dict = dict;
 
@@ -251,7 +251,7 @@ var Parser = (function ParserClosure() {
 
       stream = stream.makeSubStream(pos, length, dict);
       if (cipherTransform)
-        stream = cipherTransform.createStream(stream);
+        stream = cipherTransform.createStream(stream, length);
       stream = this.filter(stream, dict, length);
       stream.dict = dict;
       return stream;
@@ -261,6 +261,8 @@ var Parser = (function ParserClosure() {
       var params = this.fetchIfRef(dict.get('DecodeParms', 'DP'));
       if (isName(filter))
         return this.makeFilter(stream, filter.name, length, params);
+
+      var maybeLength = length;
       if (isArray(filter)) {
         var filterArray = filter;
         var paramsArray = params;
@@ -272,22 +274,23 @@ var Parser = (function ParserClosure() {
           params = null;
           if (isArray(paramsArray) && (i in paramsArray))
             params = paramsArray[i];
-          stream = this.makeFilter(stream, filter.name, length, params);
+          stream = this.makeFilter(stream, filter.name, maybeLength, params);
           // after the first stream the length variable is invalid
-          length = null;
+          maybeLength = null;
         }
       }
       return stream;
     },
-    makeFilter: function Parser_makeFilter(stream, name, length, params) {
+    makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
       if (stream.dict.get('Length') === 0) {
         return new NullStream(stream);
       }
       if (name == 'FlateDecode' || name == 'Fl') {
         if (params) {
-          return new PredictorStream(new FlateStream(stream), params);
+          return new PredictorStream(new FlateStream(stream, maybeLength),
+                                     maybeLength, params);
         }
-        return new FlateStream(stream);
+        return new FlateStream(stream, maybeLength);
       }
       if (name == 'LZWDecode' || name == 'LZW') {
         var earlyChange = 1;
@@ -295,30 +298,31 @@ var Parser = (function ParserClosure() {
           if (params.has('EarlyChange'))
             earlyChange = params.get('EarlyChange');
           return new PredictorStream(
-            new LZWStream(stream, earlyChange), params);
+            new LZWStream(stream, maybeLength, earlyChange),
+            maybeLength, params);
         }
-        return new LZWStream(stream, earlyChange);
+        return new LZWStream(stream, maybeLength, earlyChange);
       }
       if (name == 'DCTDecode' || name == 'DCT') {
-        return new JpegStream(stream, length, stream.dict, this.xref);
+        return new JpegStream(stream, maybeLength, stream.dict, this.xref);
       }
       if (name == 'JPXDecode' || name == 'JPX') {
-        return new JpxStream(stream, length, stream.dict);
+        return new JpxStream(stream, maybeLength, stream.dict);
       }
       if (name == 'ASCII85Decode' || name == 'A85') {
-        return new Ascii85Stream(stream);
+        return new Ascii85Stream(stream, maybeLength);
       }
       if (name == 'ASCIIHexDecode' || name == 'AHx') {
-        return new AsciiHexStream(stream);
+        return new AsciiHexStream(stream, maybeLength);
       }
       if (name == 'CCITTFaxDecode' || name == 'CCF') {
-        return new CCITTFaxStream(stream, params);
+        return new CCITTFaxStream(stream, maybeLength, params);
       }
       if (name == 'RunLengthDecode' || name == 'RL') {
-        return new RunLengthStream(stream);
+        return new RunLengthStream(stream, maybeLength);
       }
       if (name == 'JBIG2Decode') {
-        return new Jbig2Stream(stream, length, stream.dict);
+        return new Jbig2Stream(stream, maybeLength, stream.dict);
       }
       warn('filter "' + name + '" not supported yet');
       return stream;
diff --git a/src/core/stream.js b/src/core/stream.js
index 8198795..fe8337b 100644
--- a/src/core/stream.js
+++ b/src/core/stream.js
@@ -98,11 +98,18 @@ var StringStream = (function StringStreamClosure() {
 
 // super class for the decoding streams
 var DecodeStream = (function DecodeStreamClosure() {
-  function DecodeStream() {
+  function DecodeStream(maybeMinBufferLength) {
     this.pos = 0;
     this.bufferLength = 0;
     this.eof = false;
     this.buffer = null;
+    this.minBufferLength = 512;
+    if (maybeMinBufferLength) {
+      // Compute the first power of two that is as big as maybeMinBufferLength.
+      while (this.minBufferLength < maybeMinBufferLength) {
+        this.minBufferLength *= 2;
+      }
+    }
   }
 
   DecodeStream.prototype = {
@@ -117,7 +124,7 @@ var DecodeStream = (function DecodeStreamClosure() {
       } else {
         current = 0;
       }
-      var size = 512;
+      var size = this.minBufferLength;
       while (size < requested) {
         size *= 2;
       }
@@ -197,7 +204,7 @@ var DecodeStream = (function DecodeStreamClosure() {
 var StreamsSequenceStream = (function StreamsSequenceStreamClosure() {
   function StreamsSequenceStream(streams) {
     this.streams = streams;
-    DecodeStream.call(this);
+    DecodeStream.call(this, /* maybeLength = */ null);
   }
 
   StreamsSequenceStream.prototype = Object.create(DecodeStream.prototype);
@@ -328,7 +335,7 @@ var FlateStream = (function FlateStreamClosure() {
     0x50003, 0x50013, 0x5000b, 0x5001b, 0x50007, 0x50017, 0x5000f, 0x00000
   ]), 5];
 
-  function FlateStream(str) {
+  function FlateStream(str, maybeLength) {
     this.str = str;
     this.dict = str.dict;
 
@@ -346,7 +353,7 @@ var FlateStream = (function FlateStreamClosure() {
     this.codeSize = 0;
     this.codeBuf = 0;
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
   }
 
   FlateStream.prototype = Object.create(DecodeStream.prototype);
@@ -581,7 +588,7 @@ var FlateStream = (function FlateStreamClosure() {
 })();
 
 var PredictorStream = (function PredictorStreamClosure() {
-  function PredictorStream(str, params) {
+  function PredictorStream(str, maybeLength, params) {
     var predictor = this.predictor = params.get('Predictor') || 1;
 
     if (predictor <= 1)
@@ -604,7 +611,7 @@ var PredictorStream = (function PredictorStreamClosure() {
     this.pixBytes = (colors * bits + 7) >> 3;
     this.rowBytes = (columns * colors * bits + 7) >> 3;
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
     return this;
   }
 
@@ -774,21 +781,22 @@ var PredictorStream = (function PredictorStreamClosure() {
  * DecodeStreams.
  */
 var JpegStream = (function JpegStreamClosure() {
-  function JpegStream(stream, length, dict, xref) {
+  function JpegStream(stream, maybeLength, dict, xref) {
     // TODO: per poppler, some images may have 'junk' before that
     // need to be removed
     this.stream = stream;
-    this.length = length;
+    this.maybeLength = maybeLength;
     this.dict = dict;
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
   }
 
   JpegStream.prototype = Object.create(DecodeStream.prototype);
 
   Object.defineProperty(JpegStream.prototype, 'bytes', {
     get: function JpegStream_bytes() {
-      return shadow(this, 'bytes', this.stream.getBytes(this.length));
+      // If this.maybeLength is null, we'll get the entire stream.
+      return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
     },
     configurable: true
   });
@@ -841,19 +849,20 @@ var JpegStream = (function JpegStreamClosure() {
  * the stream behaves like all the other DecodeStreams.
  */
 var JpxStream = (function JpxStreamClosure() {
-  function JpxStream(stream, length, dict) {
+  function JpxStream(stream, maybeLength, dict) {
     this.stream = stream;
-    this.length = length;
+    this.maybeLength = maybeLength;
     this.dict = dict;
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
   }
 
   JpxStream.prototype = Object.create(DecodeStream.prototype);
 
   Object.defineProperty(JpxStream.prototype, 'bytes', {
     get: function JpxStream_bytes() {
-      return shadow(this, 'bytes', this.stream.getBytes(this.length));
+      // If this.maybeLength is null, we'll get the entire stream.
+      return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
     },
     configurable: true
   });
@@ -948,19 +957,20 @@ var JpxStream = (function JpxStreamClosure() {
  * the stream behaves like all the other DecodeStreams.
  */
 var Jbig2Stream = (function Jbig2StreamClosure() {
-  function Jbig2Stream(stream, length, dict) {
+  function Jbig2Stream(stream, maybeLength, dict) {
     this.stream = stream;
-    this.length = length;
+    this.maybeLength = maybeLength;
     this.dict = dict;
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
   }
 
   Jbig2Stream.prototype = Object.create(DecodeStream.prototype);
 
   Object.defineProperty(Jbig2Stream.prototype, 'bytes', {
     get: function Jbig2Stream_bytes() {
-      return shadow(this, 'bytes', this.stream.getBytes(this.length));
+      // If this.maybeLength is null, we'll get the entire stream.
+      return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
     },
     configurable: true
   });
@@ -1004,14 +1014,14 @@ var Jbig2Stream = (function Jbig2StreamClosure() {
 })();
 
 var DecryptStream = (function DecryptStreamClosure() {
-  function DecryptStream(str, decrypt) {
+  function DecryptStream(str, maybeLength, decrypt) {
     this.str = str;
     this.dict = str.dict;
     this.decrypt = decrypt;
     this.nextChunk = null;
     this.initialized = false;
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
   }
 
   var chunkSize = 512;
@@ -1048,12 +1058,17 @@ var DecryptStream = (function DecryptStreamClosure() {
 })();
 
 var Ascii85Stream = (function Ascii85StreamClosure() {
-  function Ascii85Stream(str) {
+  function Ascii85Stream(str, maybeLength) {
     this.str = str;
     this.dict = str.dict;
     this.input = new Uint8Array(5);
 
-    DecodeStream.call(this);
+    // Most streams increase in size when decoded, but Ascii85 streams
+    // typically shrink by ~20%.
+    if (maybeLength) {
+      maybeLength = 0.8 * maybeLength;
+    }
+    DecodeStream.call(this, maybeLength);
   }
 
   Ascii85Stream.prototype = Object.create(DecodeStream.prototype);
@@ -1121,13 +1136,18 @@ var Ascii85Stream = (function Ascii85StreamClosure() {
 })();
 
 var AsciiHexStream = (function AsciiHexStreamClosure() {
-  function AsciiHexStream(str) {
+  function AsciiHexStream(str, maybeLength) {
     this.str = str;
     this.dict = str.dict;
 
     this.firstDigit = -1;
 
-    DecodeStream.call(this);
+    // Most streams increase in size when decoded, but AsciiHex streams shrink
+    // by 50%.
+    if (maybeLength) {
+      maybeLength = 0.5 * maybeLength;
+    }
+    DecodeStream.call(this, maybeLength);
   }
 
   AsciiHexStream.prototype = Object.create(DecodeStream.prototype);
@@ -1178,11 +1198,11 @@ var AsciiHexStream = (function AsciiHexStreamClosure() {
 })();
 
 var RunLengthStream = (function RunLengthStreamClosure() {
-  function RunLengthStream(str) {
+  function RunLengthStream(str, maybeLength) {
     this.str = str;
     this.dict = str.dict;
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
   }
 
   RunLengthStream.prototype = Object.create(DecodeStream.prototype);
@@ -1650,7 +1670,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
     [2, 2], [2, 2], [2, 2], [2, 2]
   ];
 
-  function CCITTFaxStream(str, params) {
+  function CCITTFaxStream(str, maybeLength, params) {
     this.str = str;
     this.dict = str.dict;
 
@@ -1691,7 +1711,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
       this.eatBits(1);
     }
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
   }
 
   CCITTFaxStream.prototype = Object.create(DecodeStream.prototype);
@@ -2186,7 +2206,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
 })();
 
 var LZWStream = (function LZWStreamClosure() {
-  function LZWStream(str, earlyChange) {
+  function LZWStream(str, maybeLength, earlyChange) {
     this.str = str;
     this.dict = str.dict;
     this.cachedData = 0;
@@ -2209,7 +2229,7 @@ var LZWStream = (function LZWStreamClosure() {
     }
     this.lzwState = lzwState;
 
-    DecodeStream.call(this);
+    DecodeStream.call(this, maybeLength);
   }
 
   LZWStream.prototype = Object.create(DecodeStream.prototype);
diff --git a/test/unit/stream_spec.js b/test/unit/stream_spec.js
index 05a42d7..443ae6a 100644
--- a/test/unit/stream_spec.js
+++ b/test/unit/stream_spec.js
@@ -30,7 +30,7 @@ describe('stream', function() {
 
       var input = new Stream(new Uint8Array([2, 100, 3, 2, 1, 255, 2, 1, 255]),
         0, 9, dict);
-      var predictor = new PredictorStream(input, dict);
+      var predictor = new PredictorStream(input, /* length = */ 9, dict);
       var result = predictor.getBytes(6);
 
       expect(result).toMatchTypedArray(

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list