[Pkg-javascript-commits] [pdf.js] 98/207: Telemetry for used stream and font types
David Prévot
taffit at moszumanska.debian.org
Mon Jul 28 15:36:36 UTC 2014
This is an automated email from the git hooks/post-receive script.
taffit pushed a commit to branch master
in repository pdf.js.
commit 0cd28ebfa35d334d25523ca0994c3cf1b984944f
Author: Yury Delendik <ydelendik at mozilla.com>
Date: Mon Jun 16 09:52:04 2014 -0500
Telemetry for used stream and font types
---
.../firefox/content/PdfJsTelemetry-addon.jsm | 7 ++++-
extensions/firefox/content/PdfJsTelemetry.jsm | 4 +++
extensions/firefox/content/PdfStreamConverter.jsm | 35 ++++++++++++++++-----
src/core/evaluator.js | 35 ++++++++++++++++-----
src/core/fonts.js | 36 ++++++++++++++++++++--
src/core/obj.js | 8 +++--
src/core/parser.js | 13 +++++++-
src/core/worker.js | 6 ++++
src/display/api.js | 19 ++++++++++++
src/shared/util.js | 27 ++++++++++++++++
web/page_view.js | 8 ++++-
11 files changed, 174 insertions(+), 24 deletions(-)
diff --git a/extensions/firefox/content/PdfJsTelemetry-addon.jsm b/extensions/firefox/content/PdfJsTelemetry-addon.jsm
index c29d5ab..e22903d 100644
--- a/extensions/firefox/content/PdfJsTelemetry-addon.jsm
+++ b/extensions/firefox/content/PdfJsTelemetry-addon.jsm
@@ -31,9 +31,10 @@ Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FALLBACK_SHOWN", 1, 2, 3,
Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_VERSION", 1, 10, 11, Telemetry.HISTOGRAM_LINEAR);
Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_GENERATOR", 1, 25, 26, Telemetry.HISTOGRAM_LINEAR);
Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_SIZE_KB", 2, 64 * 1024, 20, Telemetry.HISTOGRAM_EXPONENTIAL);
+Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FONT_TYPES", 1, 19, 20, Telemetry.HISTOGRAM_LINEAR);
Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FORM", 1, 2, 3, Telemetry.HISTOGRAM_BOOLEAN);
Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_PRINT", 1, 2, 3, Telemetry.HISTOGRAM_BOOLEAN);
-Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_STREAM_TYPES", 1, 9, 10, Telemetry.HISTOGRAM_LINEAR);
+Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_STREAM_TYPES", 1, 19, 20, Telemetry.HISTOGRAM_LINEAR);
Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_TIME_TO_VIEW_MS", 1, 10000, 50, Telemetry.HISTOGRAM_EXPONENTIAL);
@@ -58,6 +59,10 @@ this.PdfJsTelemetry = {
let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_GENERATOR");
histogram.add(generatorId);
},
+ onFontType: function (fontTypeId) {
+ let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_FONT_TYPES");
+ histogram.add(fontTypeId);
+ },
onForm: function (isAcroform) {
let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_FORM");
histogram.add(isAcroform);
diff --git a/extensions/firefox/content/PdfJsTelemetry.jsm b/extensions/firefox/content/PdfJsTelemetry.jsm
index 5d1691d..dd5c0c0 100644
--- a/extensions/firefox/content/PdfJsTelemetry.jsm
+++ b/extensions/firefox/content/PdfJsTelemetry.jsm
@@ -44,6 +44,10 @@ this.PdfJsTelemetry = {
let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_DOCUMENT_GENERATOR");
histogram.add(generatorId);
},
+ onFontType: function (fontTypeId) {
+ let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_FONT_TYPES");
+ histogram.add(fontTypeId);
+ },
onForm: function (isAcroform) {
let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_FORM");
histogram.add(isAcroform);
diff --git a/extensions/firefox/content/PdfStreamConverter.jsm b/extensions/firefox/content/PdfStreamConverter.jsm
index 7e7aed7..03b319e 100644
--- a/extensions/firefox/content/PdfStreamConverter.jsm
+++ b/extensions/firefox/content/PdfStreamConverter.jsm
@@ -247,6 +247,7 @@ function ChromeActions(domWindow, contentDispositionFilename) {
documentInfo: false,
firstPageInfo: false,
streamTypesUsed: [],
+ fontTypesUsed: [],
startAt: Date.now()
};
}
@@ -388,16 +389,34 @@ ChromeActions.prototype = {
this.telemetryState.firstPageInfo = true;
}
break;
- case 'streamInfo':
- if (!Array.isArray(probeInfo.streamTypes)) {
+ case 'documentStats':
+ // documentStats can be called several times for one documents.
+ // if stream/font types are reported, trying not to submit the same
+ // enumeration value multiple times.
+ var documentStats = probeInfo.stats;
+ if (!documentStats || typeof documentStats !== 'object') {
break;
}
- for (var i = 0; i < probeInfo.streamTypes.length; i++) {
- var streamTypeId = probeInfo.streamTypes[i] | 0;
- if (streamTypeId >= 0 && streamTypeId < 10 &&
- !this.telemetryState.streamTypesUsed[streamTypeId]) {
- PdfJsTelemetry.onStreamType(streamTypeId);
- this.telemetryState.streamTypesUsed[streamTypeId] = true;
+ var streamTypes = documentStats.streamTypes;
+ if (Array.isArray(streamTypes)) {
+ var STREAM_TYPE_ID_LIMIT = 20;
+ for (var i = 0; i < STREAM_TYPE_ID_LIMIT; i++) {
+ if (streamTypes[i] &&
+ !this.telemetryState.streamTypesUsed[i]) {
+ PdfJsTelemetry.onStreamType(i);
+ this.telemetryState.streamTypesUsed[i] = true;
+ }
+ }
+ }
+ var fontTypes = documentStats.fontTypes;
+ if (Array.isArray(fontTypes)) {
+ var FONT_TYPE_ID_LIMIT = 20;
+ for (var i = 0; i < FONT_TYPE_ID_LIMIT; i++) {
+ if (fontTypes[i] &&
+ !this.telemetryState.fontTypesUsed[i]) {
+ PdfJsTelemetry.onFontType(i);
+ this.telemetryState.fontTypesUsed[i] = true;
+ }
}
}
break;
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index e353280..0fb8df2 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -22,7 +22,8 @@
stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise,
RefSetCache, isRef, TextRenderingMode, CMapFactory, OPS,
UNSUPPORTED_FEATURES, UnsupportedManager, NormalizedUnicodes,
- IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability */
+ IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability,
+ getFontType */
'use strict';
@@ -546,11 +547,28 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
translatedPromise.then(function (translatedFont) {
+ if (translatedFont.fontType !== undefined) {
+ var xrefFontStats = xref.stats.fontTypes;
+ xrefFontStats[translatedFont.fontType] = true;
+ }
+
fontCapability.resolve(new TranslatedFont(font.loadedName,
translatedFont, font));
}, function (reason) {
// TODO fontCapability.reject?
UnsupportedManager.notify(UNSUPPORTED_FEATURES.font);
+
+ try {
+ // error, but it's still nice to have font type reported
+ var descriptor = preEvaluatedFont.descriptor;
+ var fontFile3 = descriptor && descriptor.get('FontFile3');
+ var subtype = fontFile3 && fontFile3.get('Subtype');
+ var fontType = getFontType(preEvaluatedFont.type,
+ subtype && subtype.name);
+ var xrefFontStats = xref.stats.fontTypes;
+ xrefFontStats[fontType] = true;
+ } catch (ex) { }
+
fontCapability.resolve(new TranslatedFont(font.loadedName,
new ErrorFont(reason instanceof Error ? reason.message : reason),
font));
@@ -1542,6 +1560,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
dict: dict,
baseDict: baseDict,
composite: composite,
+ type: type.name,
hash: hash ? hash.hexdigest() : ''
};
},
@@ -1552,16 +1571,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var dict = preEvaluatedFont.dict;
var composite = preEvaluatedFont.composite;
var descriptor = preEvaluatedFont.descriptor;
- var type = dict.get('Subtype');
+ var type = preEvaluatedFont.type;
var maxCharIndex = (composite ? 0xFFFF : 0xFF);
var properties;
if (!descriptor) {
- if (type.name === 'Type3') {
+ if (type === 'Type3') {
// FontDescriptor is only required for Type3 fonts when the document
// is a tagged pdf. Create a barbebones one to get by.
descriptor = new Dict(null);
- descriptor.set('FontName', Name.get(type.name));
+ descriptor.set('FontName', Name.get(type));
} else {
// Before PDF 1.5 if the font was one of the base 14 fonts, having a
// FontDescriptor was not required.
@@ -1584,7 +1603,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
FontFlags.Nonsymbolic);
properties = {
- type: type.name,
+ type: type,
name: baseFontName,
widths: metrics.widths,
defaultWidth: metrics.defaultWidth,
@@ -1617,7 +1636,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
baseFont = Name.get(baseFont);
}
- if (type.name !== 'Type3') {
+ if (type !== 'Type3') {
var fontNameStr = fontName && fontName.name;
var baseFontStr = baseFont && baseFont.name;
if (fontNameStr !== baseFontStr) {
@@ -1649,7 +1668,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
properties = {
- type: type.name,
+ type: type,
name: fontName.name,
subtype: subtype,
file: fontFile,
@@ -1684,7 +1703,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
this.extractDataStructures(dict, baseDict, xref, properties);
this.extractWidths(dict, xref, descriptor, properties);
- if (type.name === 'Type3') {
+ if (type === 'Type3') {
properties.isType3Font = true;
}
diff --git a/src/core/fonts.js b/src/core/fonts.js
index 1e18e23..9f38c85 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -19,7 +19,7 @@
isNum, ISOAdobeCharset, Stream, stringToArray, stringToBytes,
string32, TextDecoder, warn, Lexer, Util, FONT_IDENTITY_MATRIX,
FontRendererFactory, shadow, isString, IdentityCMap, Name,
- CMapFactory, PDFJS, readUint32 */
+ CMapFactory, PDFJS, readUint32, FontType */
'use strict';
@@ -2117,6 +2117,28 @@ function adjustWidths(properties) {
properties.defaultWidth *= scale;
}
+function getFontType(type, subtype) {
+ switch (type) {
+ case 'Type1':
+ return subtype === 'Type1C' ? FontType.TYPE1C : FontType.TYPE1;
+ case 'CIDFontType0':
+ return subtype === 'CIDFontType0C' ? FontType.CIDFONTTYPE0C :
+ FontType.CIDFONTTYPE0;
+ case 'OpenType':
+ return FontType.OPENTYPE;
+ case 'TrueType':
+ return FontType.TRUETYPE;
+ case 'CIDFontType2':
+ return FontType.CIDFONTTYPE2;
+ case 'MMType1':
+ return FontType.MMTYPE1;
+ case 'Type0':
+ return FontType.TYPE0;
+ default:
+ return FontType.UNKNOWN;
+ }
+}
+
var Glyph = (function GlyphClosure() {
function Glyph(fontChar, unicode, accent, width, vmetric, operatorListId) {
this.fontChar = fontChar;
@@ -2167,6 +2189,7 @@ var Font = (function FontClosure() {
this.isMonospace = !!(properties.flags & FontFlags.FixedPitch);
var type = properties.type;
+ var subtype = properties.subtype;
this.type = type;
this.fallbackName = (this.isMonospace ? 'monospace' :
@@ -2193,6 +2216,7 @@ var Font = (function FontClosure() {
this.toFontChar[charCode] = (this.differences[charCode] ||
properties.defaultEncoding[charCode]);
}
+ this.fontType = FontType.TYPE3;
return;
}
@@ -2260,11 +2284,11 @@ var Font = (function FontClosure() {
}
this.loadedName = fontName.split('-')[0];
this.loading = false;
+ this.fontType = getFontType(type, subtype);
return;
}
// Some fonts might use wrong font types for Type1C or CIDFontType0C
- var subtype = properties.subtype;
if (subtype == 'Type1C' && (type != 'Type1' && type != 'MMType1')) {
// Some TrueType fonts by mistake claim Type1C
if (isTrueTypeFile(file)) {
@@ -2288,7 +2312,7 @@ var Font = (function FontClosure() {
case 'CIDFontType0':
this.mimetype = 'font/opentype';
- var cff = (subtype == 'Type1C' || subtype == 'CIDFontType0C') ?
+ var cff = (subtype === 'Type1C' || subtype === 'CIDFontType0C') ?
new CFFFont(file, properties) : new Type1Font(name, file, properties);
adjustWidths(properties);
@@ -2305,6 +2329,9 @@ var Font = (function FontClosure() {
// Repair the TrueType file. It is can be damaged in the point of
// view of the sanitizer
data = this.checkAndRepair(name, file, properties);
+ if (this.isOpenType) {
+ type = 'OpenType';
+ }
break;
default:
@@ -2313,6 +2340,7 @@ var Font = (function FontClosure() {
}
this.data = data;
+ this.fontType = getFontType(type, subtype);
// Transfer some properties again that could change during font conversion
this.fontMatrix = properties.fontMatrix;
@@ -3752,10 +3780,12 @@ var Font = (function FontClosure() {
delete tables.fpgm;
delete tables.prep;
delete tables['cvt '];
+ this.isOpenType = true;
} else {
if (!tables.glyf || !tables.loca) {
error('Required "glyf" or "loca" tables are not found');
}
+ this.isOpenType = false;
}
if (!tables.maxp) {
diff --git a/src/core/obj.js b/src/core/obj.js
index 858e006..af13e5a 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -692,6 +692,10 @@ var XRef = (function XRefClosure() {
// prepare the XRef cache
this.cache = [];
this.password = password;
+ this.stats = {
+ streamTypes: [],
+ fontTypes: []
+ };
}
XRef.prototype = {
@@ -1040,7 +1044,7 @@ var XRef = (function XRefClosure() {
var dict;
for (i = 0, ii = trailers.length; i < ii; ++i) {
stream.pos = trailers[i];
- var parser = new Parser(new Lexer(stream), true, null);
+ var parser = new Parser(new Lexer(stream), true, this);
var obj = parser.getObj();
if (!isCmd(obj, 'trailer')) {
continue;
@@ -1072,7 +1076,7 @@ var XRef = (function XRefClosure() {
stream.pos = startXRef + stream.start;
- var parser = new Parser(new Lexer(stream), true, null);
+ var parser = new Parser(new Lexer(stream), true, this);
var obj = parser.getObj();
var dict;
diff --git a/src/core/parser.js b/src/core/parser.js
index e1c1961..22d8c27 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -17,7 +17,8 @@
/* globals Ascii85Stream, AsciiHexStream, CCITTFaxStream, Cmd, Dict, error,
FlateStream, isArray, isCmd, isDict, isInt, isName, isNum, isRef,
isString, Jbig2Stream, JpegStream, JpxStream, LZWStream, Name,
- NullStream, PredictorStream, Ref, RunLengthStream, warn, info */
+ NullStream, PredictorStream, Ref, RunLengthStream, warn, info,
+ StreamType */
'use strict';
@@ -343,7 +344,9 @@ var Parser = (function ParserClosure() {
if (stream.dict.get('Length') === 0) {
return new NullStream(stream);
}
+ var xrefStreamStats = this.xref.stats.streamTypes;
if (name == 'FlateDecode' || name == 'Fl') {
+ xrefStreamStats[StreamType.FLATE] = true;
if (params) {
return new PredictorStream(new FlateStream(stream, maybeLength),
maybeLength, params);
@@ -351,6 +354,7 @@ var Parser = (function ParserClosure() {
return new FlateStream(stream, maybeLength);
}
if (name == 'LZWDecode' || name == 'LZW') {
+ xrefStreamStats[StreamType.LZW] = true;
var earlyChange = 1;
if (params) {
if (params.has('EarlyChange')) {
@@ -363,24 +367,31 @@ var Parser = (function ParserClosure() {
return new LZWStream(stream, maybeLength, earlyChange);
}
if (name == 'DCTDecode' || name == 'DCT') {
+ xrefStreamStats[StreamType.DCT] = true;
return new JpegStream(stream, maybeLength, stream.dict, this.xref);
}
if (name == 'JPXDecode' || name == 'JPX') {
+ xrefStreamStats[StreamType.JPX] = true;
return new JpxStream(stream, maybeLength, stream.dict);
}
if (name == 'ASCII85Decode' || name == 'A85') {
+ xrefStreamStats[StreamType.A85] = true;
return new Ascii85Stream(stream, maybeLength);
}
if (name == 'ASCIIHexDecode' || name == 'AHx') {
+ xrefStreamStats[StreamType.AHX] = true;
return new AsciiHexStream(stream, maybeLength);
}
if (name == 'CCITTFaxDecode' || name == 'CCF') {
+ xrefStreamStats[StreamType.CCF] = true;
return new CCITTFaxStream(stream, maybeLength, params);
}
if (name == 'RunLengthDecode' || name == 'RL') {
+ xrefStreamStats[StreamType.RL] = true;
return new RunLengthStream(stream, maybeLength);
}
if (name == 'JBIG2Decode') {
+ xrefStreamStats[StreamType.JBIG] = true;
return new Jbig2Stream(stream, maybeLength, stream.dict);
}
warn('filter "' + name + '" not supported yet');
diff --git a/src/core/worker.js b/src/core/worker.js
index 6d9fba2..d8cdf87 100644
--- a/src/core/worker.js
+++ b/src/core/worker.js
@@ -319,6 +319,12 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
});
});
+ handler.on('GetStats',
+ function wphSetupGetStats(data) {
+ return pdfManager.pdfDocument.xref.stats;
+ }
+ );
+
handler.on('UpdatePassword', function wphSetupUpdatePassword(data) {
pdfManager.updatePassword(data);
});
diff --git a/src/display/api.js b/src/display/api.js
index b5e9039..e1ece26 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -165,6 +165,14 @@ PDFJS.maxCanvasPixels = (PDFJS.maxCanvasPixels === undefined ?
*/
/**
+ * @typedef {Object} PDFDocumentStats
+ * @property {Array} streamTypes - Used stream types in the document (an item
+ * is set to true if specific stream ID was used in the document).
+ * @property {Array} fontTypes - Used font type in the document (an item is set
+ * to true if specific font ID was used in the document).
+ */
+
+/**
* This is the main entry point for loading a PDF and interacting with it.
* NOTE: If a URL is used to fetch the PDF data a standard XMLHttpRequest(XHR)
* is used, which means it must follow the same origin rules that any XHR does
@@ -332,6 +340,13 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
return this.transport.downloadInfoCapability.promise;
},
/**
+ * @returns {Promise} A promise this is resolved with current stats about
+ * document structures (see {@link PDFDocumentStats}).
+ */
+ getStats: function PDFDocumentProxy_getStats() {
+ return this.transport.getStats();
+ },
+ /**
* Cleans up resources allocated by the document, e.g. created @font-face.
*/
cleanup: function PDFDocumentProxy_cleanup() {
@@ -1056,6 +1071,10 @@ var WorkerTransport = (function WorkerTransportClosure() {
});
},
+ getStats: function WorkerTransport_getStats() {
+ return this.messageHandler.sendWithPromise('GetStats', null);
+ },
+
startCleanup: function WorkerTransport_startCleanup() {
this.messageHandler.sendWithPromise('Cleanup', null).
then(function endCleanup() {
diff --git a/src/shared/util.js b/src/shared/util.js
index ae4d85d..3f1facd 100644
--- a/src/shared/util.js
+++ b/src/shared/util.js
@@ -44,6 +44,33 @@ var ImageKind = {
RGBA_32BPP: 3
};
+var StreamType = {
+ UNKNOWN: 0,
+ FLATE: 1,
+ LZW: 2,
+ DCT: 3,
+ JPX: 4,
+ JBIG: 5,
+ A85: 6,
+ AHX: 7,
+ CCF: 8,
+ RL: 9
+};
+
+var FontType = {
+ UNKNOWN: 0,
+ TYPE1: 1,
+ TYPE1C: 2,
+ CIDFONTTYPE0: 3,
+ CIDFONTTYPE0C: 4,
+ TRUETYPE: 5,
+ CIDFONTTYPE2: 6,
+ TYPE3: 7,
+ OPENTYPE: 8,
+ TYPE0: 9,
+ MMTYPE1: 10
+};
+
// The global PDFJS object exposes the API
// In production, it will be declared outside a global wrapper
// In development, it will be declared here
diff --git a/web/page_view.js b/web/page_view.js
index 856b158..636e477 100644
--- a/web/page_view.js
+++ b/web/page_view.js
@@ -630,7 +630,13 @@ var PageView = function pageView(container, id, scale,
// FirefoxCom.request('reportTelemetry', JSON.stringify({
// type: 'pageInfo'
// }));
-// // TODO add stream types report here
+// // It is a good time to report stream and font types
+// PDFView.pdfDocument.getStats().then(function (stats) {
+// FirefoxCom.request('reportTelemetry', JSON.stringify({
+// type: 'documentStats',
+// stats: stats
+// }));
+// });
//#endif
callback();
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git
More information about the Pkg-javascript-commits
mailing list