[Pkg-javascript-commits] [pdf.js] 98/207: Telemetry for used stream and font types

David Prévot taffit at moszumanska.debian.org
Mon Jul 28 15:36:36 UTC 2014


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 0cd28ebfa35d334d25523ca0994c3cf1b984944f
Author: Yury Delendik <ydelendik at mozilla.com>
Date:   Mon Jun 16 09:52:04 2014 -0500

    Telemetry for used stream and font types
---
 .../firefox/content/PdfJsTelemetry-addon.jsm       |  7 ++++-
 extensions/firefox/content/PdfJsTelemetry.jsm      |  4 +++
 extensions/firefox/content/PdfStreamConverter.jsm  | 35 ++++++++++++++++-----
 src/core/evaluator.js                              | 35 ++++++++++++++++-----
 src/core/fonts.js                                  | 36 ++++++++++++++++++++--
 src/core/obj.js                                    |  8 +++--
 src/core/parser.js                                 | 13 +++++++-
 src/core/worker.js                                 |  6 ++++
 src/display/api.js                                 | 19 ++++++++++++
 src/shared/util.js                                 | 27 ++++++++++++++++
 web/page_view.js                                   |  8 ++++-
 11 files changed, 174 insertions(+), 24 deletions(-)

diff --git a/extensions/firefox/content/PdfJsTelemetry-addon.jsm b/extensions/firefox/content/PdfJsTelemetry-addon.jsm
index c29d5ab..e22903d 100644
--- a/extensions/firefox/content/PdfJsTelemetry-addon.jsm
+++ b/extensions/firefox/content/PdfJsTelemetry-addon.jsm
@@ -31,9 +31,10 @@ Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FALLBACK_SHOWN", 1, 2, 3,
 Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_VERSION", 1, 10, 11, Telemetry.HISTOGRAM_LINEAR);
 Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_GENERATOR", 1, 25, 26, Telemetry.HISTOGRAM_LINEAR);
 Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_SIZE_KB", 2, 64 * 1024, 20, Telemetry.HISTOGRAM_EXPONENTIAL);
+Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FONT_TYPES", 1, 19, 20, Telemetry.HISTOGRAM_LINEAR);
 Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FORM", 1, 2, 3, Telemetry.HISTOGRAM_BOOLEAN);
 Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_PRINT", 1, 2, 3, Telemetry.HISTOGRAM_BOOLEAN);
-Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_STREAM_TYPES", 1, 9, 10, Telemetry.HISTOGRAM_LINEAR);
+Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_STREAM_TYPES", 1, 19, 20, Telemetry.HISTOGRAM_LINEAR);
 Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_TIME_TO_VIEW_MS", 1, 10000, 50, Telemetry.HISTOGRAM_EXPONENTIAL);
 
 
@@ -58,6 +59,10 @@ this.PdfJsTelemetry = {
     let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_GENERATOR");
     histogram.add(generatorId);
   },
+  onFontType: function (fontTypeId) {
+    let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_FONT_TYPES");
+    histogram.add(fontTypeId);
+  },
   onForm: function (isAcroform) {
     let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_FORM");
     histogram.add(isAcroform);
diff --git a/extensions/firefox/content/PdfJsTelemetry.jsm b/extensions/firefox/content/PdfJsTelemetry.jsm
index 5d1691d..dd5c0c0 100644
--- a/extensions/firefox/content/PdfJsTelemetry.jsm
+++ b/extensions/firefox/content/PdfJsTelemetry.jsm
@@ -44,6 +44,10 @@ this.PdfJsTelemetry = {
     let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_DOCUMENT_GENERATOR");
     histogram.add(generatorId);
   },
+  onFontType: function (fontTypeId) {
+    let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_FONT_TYPES");
+    histogram.add(fontTypeId);
+  },
   onForm: function (isAcroform) {
     let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_FORM");
     histogram.add(isAcroform);
diff --git a/extensions/firefox/content/PdfStreamConverter.jsm b/extensions/firefox/content/PdfStreamConverter.jsm
index 7e7aed7..03b319e 100644
--- a/extensions/firefox/content/PdfStreamConverter.jsm
+++ b/extensions/firefox/content/PdfStreamConverter.jsm
@@ -247,6 +247,7 @@ function ChromeActions(domWindow, contentDispositionFilename) {
     documentInfo: false,
     firstPageInfo: false,
     streamTypesUsed: [],
+    fontTypesUsed: [],
     startAt: Date.now()
   };
 }
@@ -388,16 +389,34 @@ ChromeActions.prototype = {
           this.telemetryState.firstPageInfo = true;
         }
         break;
-      case 'streamInfo':
-        if (!Array.isArray(probeInfo.streamTypes)) {
+      case 'documentStats':
+        // documentStats can be called several times for one documents.
+        // if stream/font types are reported, trying not to submit the same
+        // enumeration value multiple times.
+        var documentStats = probeInfo.stats;
+        if (!documentStats || typeof documentStats !== 'object') {
           break;
         }
-        for (var i = 0; i < probeInfo.streamTypes.length; i++) {
-          var streamTypeId = probeInfo.streamTypes[i] | 0;
-          if (streamTypeId >= 0 && streamTypeId < 10 &&
-              !this.telemetryState.streamTypesUsed[streamTypeId]) {
-            PdfJsTelemetry.onStreamType(streamTypeId);
-            this.telemetryState.streamTypesUsed[streamTypeId] = true;
+        var streamTypes = documentStats.streamTypes;
+        if (Array.isArray(streamTypes)) {
+          var STREAM_TYPE_ID_LIMIT = 20;
+          for (var i = 0; i < STREAM_TYPE_ID_LIMIT; i++) {
+            if (streamTypes[i] &&
+                !this.telemetryState.streamTypesUsed[i]) {
+              PdfJsTelemetry.onStreamType(i);
+              this.telemetryState.streamTypesUsed[i] = true;
+            }
+          }
+        }
+        var fontTypes = documentStats.fontTypes;
+        if (Array.isArray(fontTypes)) {
+          var FONT_TYPE_ID_LIMIT = 20;
+          for (var i = 0; i < FONT_TYPE_ID_LIMIT; i++) {
+            if (fontTypes[i] &&
+                !this.telemetryState.fontTypesUsed[i]) {
+              PdfJsTelemetry.onFontType(i);
+              this.telemetryState.fontTypesUsed[i] = true;
+            }
           }
         }
         break;
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index e353280..0fb8df2 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -22,7 +22,8 @@
            stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise,
            RefSetCache, isRef, TextRenderingMode, CMapFactory, OPS,
            UNSUPPORTED_FEATURES, UnsupportedManager, NormalizedUnicodes,
-           IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability */
+           IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability,
+           getFontType */
 
 'use strict';
 
@@ -546,11 +547,28 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       }
 
       translatedPromise.then(function (translatedFont) {
+        if (translatedFont.fontType !== undefined) {
+          var xrefFontStats = xref.stats.fontTypes;
+          xrefFontStats[translatedFont.fontType] = true;
+        }
+
         fontCapability.resolve(new TranslatedFont(font.loadedName,
           translatedFont, font));
       }, function (reason) {
         // TODO fontCapability.reject?
         UnsupportedManager.notify(UNSUPPORTED_FEATURES.font);
+
+        try {
+          // error, but it's still nice to have font type reported
+          var descriptor = preEvaluatedFont.descriptor;
+          var fontFile3 = descriptor && descriptor.get('FontFile3');
+          var subtype = fontFile3 && fontFile3.get('Subtype');
+          var fontType = getFontType(preEvaluatedFont.type,
+                                     subtype && subtype.name);
+          var xrefFontStats = xref.stats.fontTypes;
+          xrefFontStats[fontType] = true;
+        } catch (ex) { }
+
         fontCapability.resolve(new TranslatedFont(font.loadedName,
           new ErrorFont(reason instanceof Error ? reason.message : reason),
           font));
@@ -1542,6 +1560,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         dict: dict,
         baseDict: baseDict,
         composite: composite,
+        type: type.name,
         hash: hash ? hash.hexdigest() : ''
       };
     },
@@ -1552,16 +1571,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       var dict = preEvaluatedFont.dict;
       var composite = preEvaluatedFont.composite;
       var descriptor = preEvaluatedFont.descriptor;
-      var type = dict.get('Subtype');
+      var type = preEvaluatedFont.type;
       var maxCharIndex = (composite ? 0xFFFF : 0xFF);
       var properties;
 
       if (!descriptor) {
-        if (type.name === 'Type3') {
+        if (type === 'Type3') {
           // FontDescriptor is only required for Type3 fonts when the document
           // is a tagged pdf. Create a barbebones one to get by.
           descriptor = new Dict(null);
-          descriptor.set('FontName', Name.get(type.name));
+          descriptor.set('FontName', Name.get(type));
         } else {
           // Before PDF 1.5 if the font was one of the base 14 fonts, having a
           // FontDescriptor was not required.
@@ -1584,7 +1603,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
                                              FontFlags.Nonsymbolic);
 
           properties = {
-            type: type.name,
+            type: type,
             name: baseFontName,
             widths: metrics.widths,
             defaultWidth: metrics.defaultWidth,
@@ -1617,7 +1636,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         baseFont = Name.get(baseFont);
       }
 
-      if (type.name !== 'Type3') {
+      if (type !== 'Type3') {
         var fontNameStr = fontName && fontName.name;
         var baseFontStr = baseFont && baseFont.name;
         if (fontNameStr !== baseFontStr) {
@@ -1649,7 +1668,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       }
 
       properties = {
-        type: type.name,
+        type: type,
         name: fontName.name,
         subtype: subtype,
         file: fontFile,
@@ -1684,7 +1703,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       this.extractDataStructures(dict, baseDict, xref, properties);
       this.extractWidths(dict, xref, descriptor, properties);
 
-      if (type.name === 'Type3') {
+      if (type === 'Type3') {
         properties.isType3Font = true;
       }
 
diff --git a/src/core/fonts.js b/src/core/fonts.js
index 1e18e23..9f38c85 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -19,7 +19,7 @@
            isNum, ISOAdobeCharset, Stream, stringToArray, stringToBytes,
            string32, TextDecoder, warn, Lexer, Util, FONT_IDENTITY_MATRIX,
            FontRendererFactory, shadow, isString, IdentityCMap, Name,
-           CMapFactory, PDFJS, readUint32 */
+           CMapFactory, PDFJS, readUint32, FontType */
 
 'use strict';
 
@@ -2117,6 +2117,28 @@ function adjustWidths(properties) {
   properties.defaultWidth *= scale;
 }
 
+function getFontType(type, subtype) {
+  switch (type) {
+    case 'Type1':
+      return subtype === 'Type1C' ? FontType.TYPE1C : FontType.TYPE1;
+    case 'CIDFontType0':
+      return subtype === 'CIDFontType0C' ? FontType.CIDFONTTYPE0C :
+        FontType.CIDFONTTYPE0;
+    case 'OpenType':
+      return FontType.OPENTYPE;
+    case 'TrueType':
+      return FontType.TRUETYPE;
+    case 'CIDFontType2':
+      return FontType.CIDFONTTYPE2;
+    case 'MMType1':
+      return FontType.MMTYPE1;
+    case 'Type0':
+      return FontType.TYPE0;
+    default:
+      return FontType.UNKNOWN;
+  }
+}
+
 var Glyph = (function GlyphClosure() {
   function Glyph(fontChar, unicode, accent, width, vmetric, operatorListId) {
     this.fontChar = fontChar;
@@ -2167,6 +2189,7 @@ var Font = (function FontClosure() {
     this.isMonospace = !!(properties.flags & FontFlags.FixedPitch);
 
     var type = properties.type;
+    var subtype = properties.subtype;
     this.type = type;
 
     this.fallbackName = (this.isMonospace ? 'monospace' :
@@ -2193,6 +2216,7 @@ var Font = (function FontClosure() {
         this.toFontChar[charCode] = (this.differences[charCode] ||
                                      properties.defaultEncoding[charCode]);
       }
+      this.fontType = FontType.TYPE3;
       return;
     }
 
@@ -2260,11 +2284,11 @@ var Font = (function FontClosure() {
       }
       this.loadedName = fontName.split('-')[0];
       this.loading = false;
+      this.fontType = getFontType(type, subtype);
       return;
     }
 
     // Some fonts might use wrong font types for Type1C or CIDFontType0C
-    var subtype = properties.subtype;
     if (subtype == 'Type1C' && (type != 'Type1' && type != 'MMType1')) {
       // Some TrueType fonts by mistake claim Type1C
       if (isTrueTypeFile(file)) {
@@ -2288,7 +2312,7 @@ var Font = (function FontClosure() {
       case 'CIDFontType0':
         this.mimetype = 'font/opentype';
 
-        var cff = (subtype == 'Type1C' || subtype == 'CIDFontType0C') ?
+        var cff = (subtype === 'Type1C' || subtype === 'CIDFontType0C') ?
           new CFFFont(file, properties) : new Type1Font(name, file, properties);
 
         adjustWidths(properties);
@@ -2305,6 +2329,9 @@ var Font = (function FontClosure() {
         // Repair the TrueType file. It is can be damaged in the point of
         // view of the sanitizer
         data = this.checkAndRepair(name, file, properties);
+        if (this.isOpenType) {
+          type = 'OpenType';
+        }
         break;
 
       default:
@@ -2313,6 +2340,7 @@ var Font = (function FontClosure() {
     }
 
     this.data = data;
+    this.fontType = getFontType(type, subtype);
 
     // Transfer some properties again that could change during font conversion
     this.fontMatrix = properties.fontMatrix;
@@ -3752,10 +3780,12 @@ var Font = (function FontClosure() {
         delete tables.fpgm;
         delete tables.prep;
         delete tables['cvt '];
+        this.isOpenType = true;
       } else {
         if (!tables.glyf || !tables.loca) {
           error('Required "glyf" or "loca" tables are not found');
         }
+        this.isOpenType = false;
       }
 
       if (!tables.maxp) {
diff --git a/src/core/obj.js b/src/core/obj.js
index 858e006..af13e5a 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -692,6 +692,10 @@ var XRef = (function XRefClosure() {
     // prepare the XRef cache
     this.cache = [];
     this.password = password;
+    this.stats = {
+      streamTypes: [],
+      fontTypes: []
+    };
   }
 
   XRef.prototype = {
@@ -1040,7 +1044,7 @@ var XRef = (function XRefClosure() {
       var dict;
       for (i = 0, ii = trailers.length; i < ii; ++i) {
         stream.pos = trailers[i];
-        var parser = new Parser(new Lexer(stream), true, null);
+        var parser = new Parser(new Lexer(stream), true, this);
         var obj = parser.getObj();
         if (!isCmd(obj, 'trailer')) {
           continue;
@@ -1072,7 +1076,7 @@ var XRef = (function XRefClosure() {
 
           stream.pos = startXRef + stream.start;
 
-          var parser = new Parser(new Lexer(stream), true, null);
+          var parser = new Parser(new Lexer(stream), true, this);
           var obj = parser.getObj();
           var dict;
 
diff --git a/src/core/parser.js b/src/core/parser.js
index e1c1961..22d8c27 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -17,7 +17,8 @@
 /* globals Ascii85Stream, AsciiHexStream, CCITTFaxStream, Cmd, Dict, error,
            FlateStream, isArray, isCmd, isDict, isInt, isName, isNum, isRef,
            isString, Jbig2Stream, JpegStream, JpxStream, LZWStream, Name,
-           NullStream, PredictorStream, Ref, RunLengthStream, warn, info */
+           NullStream, PredictorStream, Ref, RunLengthStream, warn, info,
+           StreamType */
 
 'use strict';
 
@@ -343,7 +344,9 @@ var Parser = (function ParserClosure() {
       if (stream.dict.get('Length') === 0) {
         return new NullStream(stream);
       }
+      var xrefStreamStats = this.xref.stats.streamTypes;
       if (name == 'FlateDecode' || name == 'Fl') {
+        xrefStreamStats[StreamType.FLATE] = true;
         if (params) {
           return new PredictorStream(new FlateStream(stream, maybeLength),
                                      maybeLength, params);
@@ -351,6 +354,7 @@ var Parser = (function ParserClosure() {
         return new FlateStream(stream, maybeLength);
       }
       if (name == 'LZWDecode' || name == 'LZW') {
+        xrefStreamStats[StreamType.LZW] = true;
         var earlyChange = 1;
         if (params) {
           if (params.has('EarlyChange')) {
@@ -363,24 +367,31 @@ var Parser = (function ParserClosure() {
         return new LZWStream(stream, maybeLength, earlyChange);
       }
       if (name == 'DCTDecode' || name == 'DCT') {
+        xrefStreamStats[StreamType.DCT] = true;
         return new JpegStream(stream, maybeLength, stream.dict, this.xref);
       }
       if (name == 'JPXDecode' || name == 'JPX') {
+        xrefStreamStats[StreamType.JPX] = true;
         return new JpxStream(stream, maybeLength, stream.dict);
       }
       if (name == 'ASCII85Decode' || name == 'A85') {
+        xrefStreamStats[StreamType.A85] = true;
         return new Ascii85Stream(stream, maybeLength);
       }
       if (name == 'ASCIIHexDecode' || name == 'AHx') {
+        xrefStreamStats[StreamType.AHX] = true;
         return new AsciiHexStream(stream, maybeLength);
       }
       if (name == 'CCITTFaxDecode' || name == 'CCF') {
+        xrefStreamStats[StreamType.CCF] = true;
         return new CCITTFaxStream(stream, maybeLength, params);
       }
       if (name == 'RunLengthDecode' || name == 'RL') {
+        xrefStreamStats[StreamType.RL] = true;
         return new RunLengthStream(stream, maybeLength);
       }
       if (name == 'JBIG2Decode') {
+        xrefStreamStats[StreamType.JBIG] = true;
         return new Jbig2Stream(stream, maybeLength, stream.dict);
       }
       warn('filter "' + name + '" not supported yet');
diff --git a/src/core/worker.js b/src/core/worker.js
index 6d9fba2..d8cdf87 100644
--- a/src/core/worker.js
+++ b/src/core/worker.js
@@ -319,6 +319,12 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
       });
     });
 
+    handler.on('GetStats',
+      function wphSetupGetStats(data) {
+        return pdfManager.pdfDocument.xref.stats;
+      }
+    );
+
     handler.on('UpdatePassword', function wphSetupUpdatePassword(data) {
       pdfManager.updatePassword(data);
     });
diff --git a/src/display/api.js b/src/display/api.js
index b5e9039..e1ece26 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -165,6 +165,14 @@ PDFJS.maxCanvasPixels = (PDFJS.maxCanvasPixels === undefined ?
  */
 
 /**
+ * @typedef {Object} PDFDocumentStats
+ * @property {Array} streamTypes - Used stream types in the document (an item
+ *   is set to true if specific stream ID was used in the document).
+ * @property {Array} fontTypes - Used font type in the document (an item is set
+ *   to true if specific font ID was used in the document).
+ */
+
+/**
  * This is the main entry point for loading a PDF and interacting with it.
  * NOTE: If a URL is used to fetch the PDF data a standard XMLHttpRequest(XHR)
  * is used, which means it must follow the same origin rules that any XHR does
@@ -332,6 +340,13 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
       return this.transport.downloadInfoCapability.promise;
     },
     /**
+     * @returns {Promise} A promise this is resolved with current stats about
+     * document structures (see {@link PDFDocumentStats}).
+     */
+    getStats: function PDFDocumentProxy_getStats() {
+      return this.transport.getStats();
+    },
+    /**
      * Cleans up resources allocated by the document, e.g. created @font-face.
      */
     cleanup: function PDFDocumentProxy_cleanup() {
@@ -1056,6 +1071,10 @@ var WorkerTransport = (function WorkerTransportClosure() {
       });
     },
 
+    getStats: function WorkerTransport_getStats() {
+      return this.messageHandler.sendWithPromise('GetStats', null);
+    },
+
     startCleanup: function WorkerTransport_startCleanup() {
       this.messageHandler.sendWithPromise('Cleanup', null).
         then(function endCleanup() {
diff --git a/src/shared/util.js b/src/shared/util.js
index ae4d85d..3f1facd 100644
--- a/src/shared/util.js
+++ b/src/shared/util.js
@@ -44,6 +44,33 @@ var ImageKind = {
   RGBA_32BPP: 3
 };
 
+var StreamType = {
+  UNKNOWN: 0,
+  FLATE: 1,
+  LZW: 2,
+  DCT: 3,
+  JPX: 4,
+  JBIG: 5,
+  A85: 6,
+  AHX: 7,
+  CCF: 8,
+  RL: 9
+};
+
+var FontType = {
+  UNKNOWN: 0,
+  TYPE1: 1,
+  TYPE1C: 2,
+  CIDFONTTYPE0: 3,
+  CIDFONTTYPE0C: 4,
+  TRUETYPE: 5,
+  CIDFONTTYPE2: 6,
+  TYPE3: 7,
+  OPENTYPE: 8,
+  TYPE0: 9,
+  MMTYPE1: 10
+};
+
 // The global PDFJS object exposes the API
 // In production, it will be declared outside a global wrapper
 // In development, it will be declared here
diff --git a/web/page_view.js b/web/page_view.js
index 856b158..636e477 100644
--- a/web/page_view.js
+++ b/web/page_view.js
@@ -630,7 +630,13 @@ var PageView = function pageView(container, id, scale,
 //    FirefoxCom.request('reportTelemetry', JSON.stringify({
 //      type: 'pageInfo'
 //    }));
-//    // TODO add stream types report here
+//    // It is a good time to report stream and font types
+//    PDFView.pdfDocument.getStats().then(function (stats) {
+//      FirefoxCom.request('reportTelemetry', JSON.stringify({
+//        type: 'documentStats',
+//        stats: stats
+//      }));
+//    });
 //#endif
       callback();
     }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list