[Pkg-javascript-commits] [node-character-parser] 01/02: Imported Upstream version 1.2.0
Leo Iannacone
l3on-guest at moszumanska.debian.org
Wed Apr 30 20:10:28 UTC 2014
This is an automated email from the git hooks/post-receive script.
l3on-guest pushed a commit to branch master
in repository node-character-parser.
commit 6bad0d161210002d2cb0ef0db9adc10baaf41d67
Author: Leo Iannacone <l3on at ubuntu.com>
Date: Wed Apr 30 21:51:58 2014 +0200
Imported Upstream version 1.2.0
---
.npmignore | 2 +
.travis.yml | 4 ++
LICENSE | 19 +++++
README.md | 142 ++++++++++++++++++++++++++++++++++++++
index.js | 217 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
package.json | 29 ++++++++
test/index.js | 55 +++++++++++++++
7 files changed, 468 insertions(+)
diff --git a/.npmignore b/.npmignore
new file mode 100644
index 0000000..cefaa67
--- /dev/null
+++ b/.npmignore
@@ -0,0 +1,2 @@
+test/
+.travis.yml
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..2ca91f2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,4 @@
+language: node_js
+node_js:
+ - "0.10"
+ - "0.8"
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e1b32ec
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2013 Forbes Lindesay
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b530291
--- /dev/null
+++ b/README.md
@@ -0,0 +1,142 @@
+# character-parser
+
+Parse JavaScript one character at a time to look for snippets in Templates. This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly.
+
+[![Build Status](https://travis-ci.org/ForbesLindesay/character-parser.png?branch=master)](https://travis-ci.org/ForbesLindesay/character-parser)
+
+## Installation
+
+ npm install character-parser
+
+## Usage
+
+Work out how much depth changes:
+
+```js
+var state = parse('foo(arg1, arg2, {\n foo: [a, b\n');
+assert(state.roundDepth === 1);
+assert(state.curlyDepth === 1);
+assert(state.squareDepth === 1);
+parse(' c, d]\n })', state);
+assert(state.squareDepth === 0);
+assert(state.curlyDepth === 0);
+assert(state.roundDepth === 0);
+```
+
+### Bracketed Expressions
+
+Find all the contents of a bracketed expression:
+
+```js
+var section = parser.parseMax('foo="(", bar="}") bing bong');
+assert(section.start === 0);
+assert(section.end === 16);//exclusive end of string
+assert(section.src = 'foo="(", bar="}"');
+
+
+var section = parser.parseMax('{foo="(", bar="}"} bing bong', {start: 1});
+assert(section.start === 1);
+assert(section.end === 17);//exclusive end of string
+assert(section.src = 'foo="(", bar="}"');
+```
+
+The bracketed expression parsing simply parses up to but excluding the first unmatched closed bracket (`)`, `}`, `]`). It is clever enough to ignore brackets in comments or strings.
+
+
+### Custom Delimited Expressions
+
+Find code up to a custom delimiter:
+
+```js
+var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>');
+assert(section.start === 0);
+assert(section.end === 17);//exclusive end of string
+assert(section.src = 'foo.bar("%>").baz');
+
+var section = parser.parseUntil('<%foo.bar("%>").baz%> bing bong', '%>', {start: 2});
+assert(section.start === 2);
+assert(section.end === 19);//exclusive end of string
+assert(section.src = 'foo.bar("%>").baz');
+```
+
+Delimiters are ignored if they are inside strings or comments.
+
+## API
+
+### parse(str, state = defaultState(), options = {start: 0, end: src.length})
+
+Parse a string starting at the index start, and return the state after parsing that string.
+
+If you want to parse one string in multiple sections you should keep passing the resulting state to the next parse operation.
+
+Returns a `State` object.
+
+### parseMax(src, options = {start: 0})
+
+Parses the source until the first unmatched close bracket (any of `)`, `}`, `]`). It returns an object with the structure:
+
+```js
+{
+ start: 0,//index of first character of string
+ end: 13,//index of first character after the end of string
+ src: 'source string'
+}
+```
+
+### parseUntil(src, delimiter, options = {start: 0, includeLineComment: false})
+
+Parses the source until the first occurence of `delimiter` which is not in a string or a comment. If `includeLineComment` is `true`, it will still count if the delimiter occurs in a line comment, but not in a block comment. It returns an object with the structure:
+
+```js
+{
+ start: 0,//index of first character of string
+ end: 13,//index of first character after the end of string
+ src: 'source string'
+}
+```
+
+### parseChar(character, state = defaultState())
+
+Parses the single character and returns the state. See `parse` for the structure of the returned state object. N.B. character must be a single character not a multi character string.
+
+### defaultState()
+
+Get a default starting state.
+
+### isPunctuator(character)
+
+Returns `true` if `character` represents punctuation in JavaScript.
+
+### isKeyword(name)
+
+Returns `true` if `name` is a keyword in JavaScript.
+
+## State
+
+A state is an object with the following structure
+
+```js
+{
+ lineComment: false, //true if inside a line comment
+ blockComment: false, //true if inside a block comment
+
+ singleQuote: false, //true if inside a single quoted string
+ doubleQuote: false, //true if inside a double quoted string
+ regexp: false, //true if inside a regular expression
+ escaped: false, //true if in a string and the last character was an escape character
+
+ roundDepth: 0, //number of un-closed open `(` brackets
+ curlyDepth: 0, //number of un-closed open `{` brackets
+ squareDepth: 0 //number of un-closed open `[` brackets
+}
+```
+
+It also has the following useful methods:
+
+- `.isString()` returns `true` if the current location is inside a string.
+- `.isComment()` returns `true` if the current location is inside a comment.
+- `isNesting()` returns `true` if the current location is anything but at the top level, i.e. with no nesting.
+
+## License
+
+MIT
\ No newline at end of file
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..8d0f395
--- /dev/null
+++ b/index.js
@@ -0,0 +1,217 @@
+exports = (module.exports = parse);
+exports.parse = parse;
+function parse(src, state, options) {
+ options = options || {};
+ state = state || exports.defaultState();
+ var start = options.start || 0;
+ var end = options.end || src.length;
+ var index = start;
+ while (index < end) {
+ if (state.roundDepth < 0 || state.curlyDepth < 0 || state.squareDepth < 0) {
+ throw new SyntaxError('Mismatched Bracket: ' + src[index - 1]);
+ }
+ exports.parseChar(src[index++], state);
+ }
+ return state;
+}
+
+exports.parseMax = parseMax;
+function parseMax(src, options) {
+ options = options || {};
+ var start = options.start || 0;
+ var index = start;
+ var state = exports.defaultState();
+ while (state.roundDepth >= 0 && state.curlyDepth >= 0 && state.squareDepth >= 0) {
+ if (index >= src.length) {
+ throw new Error('The end of the string was reached with no closing bracket found.');
+ }
+ exports.parseChar(src[index++], state);
+ }
+ var end = index - 1;
+ return {
+ start: start,
+ end: end,
+ src: src.substring(start, end)
+ };
+}
+
+exports.parseUntil = parseUntil;
+function parseUntil(src, delimiter, options) {
+ options = options || {};
+ var includeLineComment = options.includeLineComment || false;
+ var start = options.start || 0;
+ var index = start;
+ var state = exports.defaultState();
+ while (state.isString() || state.regexp || state.blockComment ||
+ (!includeLineComment && state.lineComment) || !startsWith(src, delimiter, index)) {
+ exports.parseChar(src[index++], state);
+ }
+ var end = index;
+ return {
+ start: start,
+ end: end,
+ src: src.substring(start, end)
+ };
+}
+
+
+exports.parseChar = parseChar;
+function parseChar(character, state) {
+ if (character.length !== 1) throw new Error('Character must be a string of length 1');
+ state = state || exports.defaultState();
+ var wasComment = state.blockComment || state.lineComment;
+ var lastChar = state.history ? state.history[0] : '';
+ if (state.lineComment) {
+ if (character === '\n') {
+ state.lineComment = false;
+ }
+ } else if (state.blockComment) {
+ if (state.lastChar === '*' && character === '/') {
+ state.blockComment = false;
+ }
+ } else if (state.singleQuote) {
+ if (character === '\'' && !state.escaped) {
+ state.singleQuote = false;
+ } else if (character === '\\' && !state.escaped) {
+ state.escaped = true;
+ } else {
+ state.escaped = false;
+ }
+ } else if (state.doubleQuote) {
+ if (character === '"' && !state.escaped) {
+ state.doubleQuote = false;
+ } else if (character === '\\' && !state.escaped) {
+ state.escaped = true;
+ } else {
+ state.escaped = false;
+ }
+ } else if (state.regexp) {
+ if (character === '/' && !state.escaped) {
+ state.regexp = false;
+ } else if (character === '\\' && !state.escaped) {
+ state.escaped = true;
+ } else {
+ state.escaped = false;
+ }
+ } else if (lastChar === '/' && character === '/') {
+ state.history = state.history.substr(1);
+ state.lineComment = true;
+ } else if (lastChar === '/' && character === '*') {
+ state.history = state.history.substr(1);
+ state.blockComment = true;
+ } else if (character === '/' && isRegexp(state.history)) {
+ state.regexp = true;
+ } else if (character === '\'') {
+ state.singleQuote = true;
+ } else if (character === '"') {
+ state.doubleQuote = true;
+ } else if (character === '(') {
+ state.roundDepth++;
+ } else if (character === ')') {
+ state.roundDepth--;
+ } else if (character === '{') {
+ state.curlyDepth++;
+ } else if (character === '}') {
+ state.curlyDepth--;
+ } else if (character === '[') {
+ state.squareDepth++;
+ } else if (character === ']') {
+ state.squareDepth--;
+ }
+ if (!state.blockComment && !state.lineComment && !wasComment) state.history = character + state.history;
+ return state;
+}
+
+exports.defaultState = function () { return new State() };
+function State() {
+ this.lineComment = false;
+ this.blockComment = false;
+
+ this.singleQuote = false;
+ this.doubleQuote = false;
+ this.regexp = false;
+ this.escaped = false;
+
+ this.roundDepth = 0;
+ this.curlyDepth = 0;
+ this.squareDepth = 0;
+
+ this.history = ''
+}
+State.prototype.isString = function () {
+ return this.singleQuote || this.doubleQuote;
+}
+State.prototype.isComment = function () {
+ return this.lineComment || this.blockComment;
+}
+State.prototype.isNesting = function () {
+ return this.isString() || this.isComment() || this.regexp || this.roundDepth > 0 || this.curlyDepth > 0 || this.squareDepth > 0
+}
+
+function startsWith(str, start, i) {
+ return str.substr(i || 0, start.length) === start;
+}
+
+exports.isPunctuator = isPunctuator
+function isPunctuator(c) {
+ var code = c.charCodeAt(0)
+
+ switch (code) {
+ case 46: // . dot
+ case 40: // ( open bracket
+ case 41: // ) close bracket
+ case 59: // ; semicolon
+ case 44: // , comma
+ case 123: // { open curly brace
+ case 125: // } close curly brace
+ case 91: // [
+ case 93: // ]
+ case 58: // :
+ case 63: // ?
+ case 126: // ~
+ case 37: // %
+ case 38: // &
+ case 42: // *:
+ case 43: // +
+ case 45: // -
+ case 47: // /
+ case 60: // <
+ case 62: // >
+ case 94: // ^
+ case 124: // |
+ case 33: // !
+ case 61: // =
+ return true;
+ default:
+ return false;
+ }
+}
+exports.isKeyword = isKeyword
+function isKeyword(id) {
+ return (id === 'if') || (id === 'in') || (id === 'do') || (id === 'var') || (id === 'for') || (id === 'new') ||
+ (id === 'try') || (id === 'let') || (id === 'this') || (id === 'else') || (id === 'case') ||
+ (id === 'void') || (id === 'with') || (id === 'enum') || (id === 'while') || (id === 'break') || (id === 'catch') ||
+ (id === 'throw') || (id === 'const') || (id === 'yield') || (id === 'class') || (id === 'super') ||
+ (id === 'return') || (id === 'typeof') || (id === 'delete') || (id === 'switch') || (id === 'export') ||
+ (id === 'import') || (id === 'default') || (id === 'finally') || (id === 'extends') || (id === 'function') ||
+ (id === 'continue') || (id === 'debugger') || (id === 'package') || (id === 'private') || (id === 'interface') ||
+ (id === 'instanceof') || (id === 'implements') || (id === 'protected') || (id === 'public') || (id === 'static') ||
+ (id === 'yield') || (id === 'let');
+}
+
+function isRegexp(history) {
+ //could be start of regexp or divide sign
+
+ history = history.replace(/^\s*/, '');
+
+ //unless its an `if`, `while`, `for` or `with` it's a divide, so we assume it's a divide
+ if (history[0] === ')') return false;
+ //unless it's a function expression, it's a regexp, so we assume it's a regexp
+ if (history[0] === '}') return true;
+ //any punctuation means it's a regexp
+ if (isPunctuator(history[0])) return true;
+ //if the last thing was a keyword then it must be a regexp (e.g. `typeof /foo/`)
+ if (/^\w+\b/.test(history) && isKeyword(/^\w+\b/.exec(history)[0].split('').reverse().join(''))) return true;
+
+ return false;
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..dc7bb9c
--- /dev/null
+++ b/package.json
@@ -0,0 +1,29 @@
+{
+ "name": "character-parser",
+ "version": "1.2.0",
+ "description": "Parse JavaScript one character at a time to look for snippets in Templates. This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly.",
+ "main": "index.js",
+ "scripts": {
+ "test": "mocha -R spec"
+ },
+ "repository": {
+ "type": "git",
+ "url": "https://github.com/ForbesLindesay/character-parser.git"
+ },
+ "keywords": [
+ "parser",
+ "JavaScript",
+ "bracket",
+ "nesting",
+ "comment",
+ "string",
+ "escape",
+ "escaping"
+ ],
+ "author": "ForbesLindesay",
+ "license": "MIT",
+ "devDependencies": {
+ "better-assert": "~1.0.0",
+ "mocha": "~1.9.0"
+ }
+}
\ No newline at end of file
diff --git a/test/index.js b/test/index.js
new file mode 100644
index 0000000..3860427
--- /dev/null
+++ b/test/index.js
@@ -0,0 +1,55 @@
+var assert = require('better-assert');
+var parser = require('../');
+var parse = parser;
+
+it('works out how much depth changes', function () {
+ var state = parse('foo(arg1, arg2, {\n foo: [a, b\n');
+ assert(state.roundDepth === 1);
+ assert(state.curlyDepth === 1);
+ assert(state.squareDepth === 1);
+
+ parse(' c, d]\n })', state);
+ assert(state.squareDepth === 0);
+ assert(state.curlyDepth === 0);
+ assert(state.roundDepth === 0);
+});
+
+it('finds contents of bracketed expressions', function () {
+ var section = parser.parseMax('foo="(", bar="}") bing bong');
+ assert(section.start === 0);
+ assert(section.end === 16);//exclusive end of string
+ assert(section.src = 'foo="(", bar="}"');
+
+ var section = parser.parseMax('{foo="(", bar="}"} bing bong', {start: 1});
+ assert(section.start === 1);
+ assert(section.end === 17);//exclusive end of string
+ assert(section.src = 'foo="(", bar="}"');
+});
+
+it('finds code up to a custom delimiter', function () {
+ var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>');
+ assert(section.start === 0);
+ assert(section.end === 17);//exclusive end of string
+ assert(section.src = 'foo.bar("%>").baz');
+
+ var section = parser.parseUntil('<%foo.bar("%>").baz%> bing bong', '%>', {start: 2});
+ assert(section.start === 2);
+ assert(section.end === 19);//exclusive end of string
+ assert(section.src = 'foo.bar("%>").baz');
+});
+
+describe('regressions', function () {
+ describe('#1', function () {
+ it('parses regular expressions', function () {
+ var section = parser.parseMax('foo=/\\//g, bar="}") bing bong');
+ assert(section.start === 0);
+ assert(section.end === 18);//exclusive end of string
+ assert(section.src = 'foo=/\\//g, bar="}"');
+
+ var section = parser.parseMax('foo = typeof /\\//g, bar="}") bing bong');
+ assert(section.start === 0);
+ //assert(section.end === 18);//exclusive end of string
+ assert(section.src = 'foo = typeof /\\//g, bar="}"');
+ })
+ })
+})
\ No newline at end of file
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-character-parser.git
More information about the Pkg-javascript-commits
mailing list