[Pkg-javascript-commits] [pdf.js] 140/207: Implement text normalization for page content and queries
David Prévot
taffit at moszumanska.debian.org
Mon Jul 28 15:36:40 UTC 2014
This is an automated email from the git hooks/post-receive script.
taffit pushed a commit to branch master
in repository pdf.js.
commit fbfb9458d65695f3ccfa33be247b14d8e91f933c
Author: Tim van der Meij <timvandermeij at gmail.com>
Date: Fri Jun 20 21:49:16 2014 +0200
Implement text normalization for page content and queries
---
web/pdf_find_controller.js | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js
index a4c472b..cabf645 100644
--- a/web/pdf_find_controller.js
+++ b/web/pdf_find_controller.js
@@ -43,6 +43,19 @@ var PDFFindController = {
findTimeout: null,
pdfPageSource: null,
integratedFind: false,
+ charactersToNormalize: {
+ '\u2018': '\'', // Left single quotation mark
+ '\u2019': '\'', // Right single quotation mark
+ '\u201A': '\'', // Single low-9 quotation mark
+ '\u201B': '\'', // Single high-reversed-9 quotation mark
+ '\u201C': '"', // Left double quotation mark
+ '\u201D': '"', // Right double quotation mark
+ '\u201E': '"', // Double low-9 quotation mark
+ '\u201F': '"', // Double high-reversed-9 quotation mark
+ '\u00BC': '1/4', // Vulgar fraction one quarter
+ '\u00BD': '1/2', // Vulgar fraction one half
+ '\u00BE': '3/4' // Vulgar fraction three quarters
+ },
initialize: function(options) {
if (typeof PDFFindBar === 'undefined' || PDFFindBar === null) {
@@ -53,6 +66,10 @@ var PDFFindController = {
this.pdfPageSource = options.pdfPageSource;
this.integratedFind = options.integratedFind;
+ // Compile the regular expression for text normalization once
+ var replace = Object.keys(this.charactersToNormalize).join('');
+ this.normalizationRegex = new RegExp('[' + replace + ']', 'g');
+
var events = [
'find',
'findagain',
@@ -76,9 +93,15 @@ var PDFFindController = {
this.active = false;
},
+ normalize: function pdfFindControllerNormalize(text) {
+ return text.replace(this.normalizationRegex, function (ch) {
+ return PDFFindController.charactersToNormalize[ch];
+ });
+ },
+
calcFindMatch: function(pageIndex) {
- var pageContent = this.pageContents[pageIndex];
- var query = this.state.query;
+ var pageContent = this.normalize(this.pageContents[pageIndex]);
+ var query = this.normalize(this.state.query);
var caseSensitive = this.state.caseSensitive;
var queryLen = query.length;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git
More information about the Pkg-javascript-commits
mailing list