[Pkg-javascript-commits] [node-regenerate] 01/09: Imported Upstream version 1.3.0

Julien Puydt julien.puydt at laposte.net
Fri May 27 18:54:12 UTC 2016


This is an automated email from the git hooks/post-receive script.

jpuydt-guest pushed a commit to branch master
in repository node-regenerate.

commit 480a69bcddf271150a9c5968b708749c7fdd238d
Author: Julien Puydt <julien.puydt at laposte.net>
Date:   Fri May 27 20:27:23 2016 +0200

    Imported Upstream version 1.3.0
---
 .travis.yml    |  8 ++++++--
 Gruntfile.js   |  5 +----
 README.md      | 16 ++++++++++++++-
 bower.json     |  2 +-
 component.json |  2 +-
 package.json   |  2 +-
 regenerate.js  | 61 +++++++++++++++++++++++++++++++++++++++++++++++++---------
 tests/tests.js | 28 +++++++++++++++++++++++++++
 8 files changed, 105 insertions(+), 19 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 74f02a3..b8bcdab 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,13 +1,17 @@
 language: node_js
 node_js:
   - "0.10"
+  - "0.12"
+  - "4"
+  - "5"
+  - "6"
 before_script:
   - "npm install -g grunt-cli"
   # Narwhal uses a hardcoded path to openjdk v6, so use that version
   - "sudo apt-get update -qq"
   - "sudo apt-get install -qq openjdk-6-jre"
-  - "PACKAGE=rhino1_7R3; wget http://ftp.mozilla.org/pub/mozilla.org/js/$PACKAGE.zip && sudo unzip $PACKAGE -d /opt/ && rm $PACKAGE.zip"
-  - "PACKAGE=rhino1_7R3; echo -e '#!/bin/sh\\njava -jar /opt/'$PACKAGE'/js.jar $@' | sudo tee /usr/local/bin/rhino && sudo chmod +x /usr/local/bin/rhino"
+  - "PACKAGE=rhino1_7R5; wget https://github.com/mozilla/rhino/releases/download/Rhino1_7R5_RELEASE/$PACKAGE.zip && sudo unzip $PACKAGE -d /opt/ && rm $PACKAGE.zip"
+  - "PACKAGE=rhino1_7R5; echo -e '#!/bin/sh\\njava -jar /opt/'$PACKAGE'/js.jar $@' | sudo tee /usr/local/bin/rhino && sudo chmod +x /usr/local/bin/rhino"
   - "PACKAGE=ringojs-0.11; wget https://github.com/ringo/ringojs/releases/download/v0.11.0/$PACKAGE.zip && sudo unzip $PACKAGE -d /opt/ && rm $PACKAGE.zip"
   - "PACKAGE=ringojs-0.11; sudo ln -s /opt/$PACKAGE/bin/ringo /usr/local/bin/ringo && sudo chmod +x /usr/local/bin/ringo"
   - "PACKAGE=v0.3.2; wget https://github.com/280north/narwhal/archive/$PACKAGE.zip && sudo unzip $PACKAGE -d /opt/ && rm $PACKAGE.zip"
diff --git a/Gruntfile.js b/Gruntfile.js
index 9baae57..17af026 100644
--- a/Gruntfile.js
+++ b/Gruntfile.js
@@ -11,7 +11,7 @@ module.exports = function(grunt) {
 				'command': 'istanbul cover --report "html" --verbose --dir "coverage" "tests/tests.js"'
 			},
 			'cover-coveralls': {
-				'command': 'istanbul cover --verbose --dir "coverage" "tests/tests.js" && cat coverage/lcov.info | coveralls; rm -rf coverage/lcov*'
+				'command': 'istanbul cover --verbose --dir "coverage" "tests/tests.js" && coveralls < coverage/lcov.info; rm -rf -- coverage/lcov*'
 			},
 			'test-narwhal': {
 				'command': 'echo "Testing in Narwhal..."; export NARWHAL_OPTIMIZATION=-1; narwhal "tests/tests.js"'
@@ -19,9 +19,6 @@ module.exports = function(grunt) {
 			'test-phantomjs': {
 				'command': 'echo "Testing in PhantomJS..."; phantomjs "tests/tests.js"'
 			},
-			// Rhino 1.7R4 has a bug that makes it impossible to test in.
-			// https://bugzilla.mozilla.org/show_bug.cgi?id=775566
-			// To test, use Rhino 1.7R3, or wait (heh) for the 1.7R5 release.
 			'test-rhino': {
 				'command': 'echo "Testing in Rhino..."; rhino -opt -1 "tests.js"',
 				'options': {
diff --git a/README.md b/README.md
index d4f3a3f..d194cab 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Regenerate [![Build status](https://travis-ci.org/mathiasbynens/regenerate.svg?branch=master)](https://travis-ci.org/mathiasbynens/regenerate) [![Code coverage status](http://img.shields.io/coveralls/mathiasbynens/regenerate/master.svg)](https://coveralls.io/r/mathiasbynens/regenerate) [![Dependency status](https://gemnasium.com/mathiasbynens/regenerate.svg)](https://gemnasium.com/mathiasbynens/regenerate)
+# Regenerate [![Build status](https://travis-ci.org/mathiasbynens/regenerate.svg?branch=master)](https://travis-ci.org/mathiasbynens/regenerate) [![Code coverage status](https://coveralls.io/repos/mathiasbynens/regenerate/badge.svg)](https://coveralls.io/r/mathiasbynens/regenerate) [![Dependency status](https://gemnasium.com/mathiasbynens/regenerate.svg)](https://gemnasium.com/mathiasbynens/regenerate)
 
 _Regenerate_ is a Unicode-aware regex generator for JavaScript. It allows you to easily generate JavaScript-compatible regular expressions based on a given set of Unicode symbols or code points. (This is trickier than you might think, because of [how JavaScript deals with astral symbols](https://mathiasbynens.be/notes/javascript-unicode).)
 
@@ -243,6 +243,20 @@ lowSurrogates.toString({ 'bmpOnly': true });
 // → '[\\uDC00-\\uDFFF]'
 ```
 
+Note that lone low surrogates cannot be matched accurately using regular expressions in JavaScript. Regenerate’s output makes a best-effort approach but [there can be false negatives in this regard](https://github.com/mathiasbynens/regenerate/issues/28#issuecomment-72224808).
+
+If the `hasUnicodeFlag` property of the optional `options` object is set to `true`, the output makes use of Unicode code point escapes (`\u{…}`) where applicable. This simplifies the output at the cost of compatibility and portability, since it means the output can only be used as a pattern in a regular expression with [the ES6 `u` flag](https://mathiasbynens.be/notes/es6-unicode-regex) enabled.
+
+```js
+var set = regenerate().addRange(0x0, 0x10FFFF);
+
+set.toString();
+// → '[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]''
+
+set.toString({ 'hasUnicodeFlag': true });
+// → '[\\0-\\u{10FFFF}]'
+```
+
 ### `regenerate.prototype.toRegExp(flags = '')`
 
 Returns a regular expression that matches all the symbols mapped to the code points within the set. Optionally, you can pass [flags](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Parameters) to be added to the regular expression.
diff --git a/bower.json b/bower.json
index 9ce033e..07ac966 100644
--- a/bower.json
+++ b/bower.json
@@ -1,6 +1,6 @@
 {
 	"name": "regenerate",
-	"version": "1.2.1",
+	"version": "1.3.0",
 	"main": "regenerate.js",
 	"ignore": [
 		"coverage",
diff --git a/component.json b/component.json
index ca116ed..6775753 100644
--- a/component.json
+++ b/component.json
@@ -1,6 +1,6 @@
 {
 	"name": "regenerate",
-	"version": "1.2.1",
+	"version": "1.3.0",
 	"description": "Generate JavaScript-compatible regular expressions based on a given set of Unicode symbols or code points.",
 	"repo": "mathiasbynens/regenerate",
 	"license": "MIT",
diff --git a/package.json b/package.json
index 0ad623c..dd5ece8 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "regenerate",
-	"version": "1.2.1",
+	"version": "1.3.0",
 	"description": "Generate JavaScript-compatible regular expressions based on a given set of Unicode symbols or code points.",
 	"homepage": "https://mths.be/regenerate",
 	"main": "regenerate.js",
diff --git a/regenerate.js b/regenerate.js
index 06de02c..3ed14ad 100644
--- a/regenerate.js
+++ b/regenerate.js
@@ -1,4 +1,4 @@
-/*! https://mths.be/regenerate v1.2.1 by @mathias | MIT license */
+/*! https://mths.be/regenerate v1.3.0 by @mathias | MIT license */
 ;(function(root) {
 
 	// Detect free variables `exports`.
@@ -8,7 +8,7 @@
 	var freeModule = typeof module == 'object' && module &&
 		module.exports == freeExports && module;
 
-	// Detect free variable `global`, from Node.js or Browserified code,
+	// Detect free variable `global`, from Node.js/io.js or Browserified code,
 	// and use it as `root`.
 	var freeGlobal = typeof global == 'object' && global;
 	if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
@@ -30,8 +30,8 @@
 	var LOW_SURROGATE_MIN = 0xDC00;
 	var LOW_SURROGATE_MAX = 0xDFFF;
 
-	// In Regenerate output, `\0` will never be preceded by `\` because we sort
-	// by code point value, so let’s keep this regular expression simple.
+	// In Regenerate output, `\0` is never preceded by `\` because we sort by
+	// code point value, so let’s keep this regular expression simple.
 	var regexNull = /\\x00([^0123456789]|$)/g;
 
 	var object = {};
@@ -580,6 +580,13 @@
 		return string;
 	};
 
+	var codePointToStringUnicode = function(codePoint) {
+		if (codePoint <= 0xFFFF) {
+			return codePointToString(codePoint);
+		}
+		return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
+	};
+
 	var symbolToCodePoint = function(symbol) {
 		var length = symbol.length;
 		var first = symbol.charCodeAt(0);
@@ -623,6 +630,31 @@
 		return '[' + result + ']';
 	};
 
+	var createUnicodeCharacterClasses = function(data) {
+		// Iterate over the data per `(start, end)` pair.
+		var result = '';
+		var index = 0;
+		var start;
+		var end;
+		var length = data.length;
+		if (dataIsSingleton(data)) {
+			return codePointToStringUnicode(data[0]);
+		}
+		while (index < length) {
+			start = data[index];
+			end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
+			if (start == end) {
+				result += codePointToStringUnicode(start);
+			} else if (start + 1 == end) {
+				result += codePointToStringUnicode(start) + codePointToStringUnicode(end);
+			} else {
+				result += codePointToStringUnicode(start) + '-' + codePointToStringUnicode(end);
+			}
+			index += 2;
+		}
+		return '[' + result + ']';
+	};
+
 	var splitAtBMP = function(data) {
 		// Iterate over the data per `(start, end)` pair.
 		var loneHighSurrogates = [];
@@ -958,7 +990,10 @@
 		return result.join('|');
 	};
 
-	var createCharacterClassesFromData = function(data, bmpOnly) {
+	var createCharacterClassesFromData = function(data, bmpOnly, hasUnicodeFlag) {
+		if (hasUnicodeFlag) {
+			return createUnicodeCharacterClasses(data);
+		}
 		var result = [];
 
 		var parts = splitAtBMP(data);
@@ -999,7 +1034,9 @@
 		}
 		if (hasLoneLowSurrogates) {
 			result.push(
-				// Make sure the low surrogates aren’t part of a surrogate pair.
+				// It is not possible to accurately assert the low surrogates aren’t
+				// part of a surrogate pair, since JavaScript regular expressions do
+				// not support lookbehind.
 				'(?:[^\\uD800-\\uDBFF]|^)' +
 				createBMPCharacterClasses(loneLowSurrogates)
 			);
@@ -1023,7 +1060,7 @@
 		return (new regenerate).add(value);
 	};
 
-	regenerate.version = '1.2.1';
+	regenerate.version = '1.3.0';
 
 	var proto = regenerate.prototype;
 	extend(proto, {
@@ -1120,13 +1157,19 @@
 		'toString': function(options) {
 			var result = createCharacterClassesFromData(
 				this.data,
-				options ? options.bmpOnly : false
+				options ? options.bmpOnly : false,
+				options ? options.hasUnicodeFlag : false
 			);
 			// Use `\0` instead of `\x00` where possible.
 			return result.replace(regexNull, '\\0$1');
 		},
 		'toRegExp': function(flags) {
-			return RegExp(this.toString(), flags || '');
+			var pattern = this.toString(
+				flags && flags.indexOf('u') != -1 ?
+					{ 'hasUnicodeFlag': true } :
+					null
+			);
+			return RegExp(pattern, flags || '');
 		},
 		'valueOf': function() { // Note: `valueOf` is aliased as `toArray`.
 			return dataToArray(this.data);
diff --git a/tests/tests.js b/tests/tests.js
index 1b27d8c..4ace9bb 100644
--- a/tests/tests.js
+++ b/tests/tests.js
@@ -202,6 +202,19 @@
 			/[\0-\xFF\u0201-\u0300]/g,
 			'toRegExp with flags'
 		);
+		var supportsUnicodeFlag = (function() {
+			try {
+				var regex = new RegExp('\\u{1D306}', 'u');
+				return true;
+			} catch (exception) {
+				return false
+			}
+		}());
+		supportsUnicodeFlag && deepEqual(
+			regenerate().addRange(0x0, 0x10FFFF).toRegExp('gu'),
+			new RegExp('[\\0-\\u{10FFFF}]', 'gu'),
+			'toRegExp with `u` flag triggers `hasUnicodeFlag: true`'
+		);
 		raises(
 			function() {
 				regenerate(0x10, 0x1F).removeRange(0x1F, 0x1A).toArray();
@@ -387,6 +400,21 @@
 			'Unmatched low surrogates with `bmpOnly: true`'
 		);
 		equal(
+			regenerate('a', '\xA9', 0x1D306).toString({ 'hasUnicodeFlag': true }),
+			'[a\\xA9\\u{1D306}]',
+			'Various code points with `hasUnicodeFlag: true`'
+		);
+		equal(
+			regenerate().addRange(0x0, 0x10FFFF).toString({ 'hasUnicodeFlag': true }),
+			'[\\0-\\u{10FFFF}]',
+			'All Unicode code points with `hasUnicodeFlag: true`'
+		);
+		equal(
+			regenerate().addRange(0xFFFE, 0x010001).toString({ 'hasUnicodeFlag': true }),
+			'[\\uFFFE-\\u{10001}]',
+			'Range that starts within BMP and ends in astral range with `hasUnicodeFlag: true`'
+		);
+		equal(
 			regenerate(0x0, 0x1, 0x2, 0x3, 0x1D306, 0x1D307, 0x1D308, 0x1D30A).toString(),
 			'[\\0-\\x03]|\\uD834[\\uDF06-\\uDF08\\uDF0A]',
 			'Mixed BMP and astral code points'

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-regenerate.git



More information about the Pkg-javascript-commits mailing list