[Pkg-javascript-commits] [node-character-parser] 01/02: Imported Upstream version 1.2.0

Leo Iannacone l3on-guest at moszumanska.debian.org
Wed Apr 30 20:10:28 UTC 2014


This is an automated email from the git hooks/post-receive script.

l3on-guest pushed a commit to branch master
in repository node-character-parser.

commit 6bad0d161210002d2cb0ef0db9adc10baaf41d67
Author: Leo Iannacone <l3on at ubuntu.com>
Date:   Wed Apr 30 21:51:58 2014 +0200

    Imported Upstream version 1.2.0
---
 .npmignore    |   2 +
 .travis.yml   |   4 ++
 LICENSE       |  19 +++++
 README.md     | 142 ++++++++++++++++++++++++++++++++++++++
 index.js      | 217 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 package.json  |  29 ++++++++
 test/index.js |  55 +++++++++++++++
 7 files changed, 468 insertions(+)

diff --git a/.npmignore b/.npmignore
new file mode 100644
index 0000000..cefaa67
--- /dev/null
+++ b/.npmignore
@@ -0,0 +1,2 @@
+test/
+.travis.yml
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..2ca91f2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,4 @@
+language: node_js
+node_js:
+  - "0.10"
+  - "0.8"
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e1b32ec
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2013 Forbes Lindesay
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b530291
--- /dev/null
+++ b/README.md
@@ -0,0 +1,142 @@
+# character-parser
+
+Parse JavaScript one character at a time to look for snippets in Templates.  This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly.
+
+[![Build Status](https://travis-ci.org/ForbesLindesay/character-parser.png?branch=master)](https://travis-ci.org/ForbesLindesay/character-parser)
+
+## Installation
+
+    npm install character-parser
+
+## Usage
+
+Work out how much depth changes:
+
+```js
+var state = parse('foo(arg1, arg2, {\n  foo: [a, b\n');
+assert(state.roundDepth === 1);
+assert(state.curlyDepth === 1);
+assert(state.squareDepth === 1);
+parse('    c, d]\n  })', state);
+assert(state.squareDepth === 0);
+assert(state.curlyDepth === 0);
+assert(state.roundDepth === 0);
+```
+
+### Bracketed Expressions
+
+Find all the contents of a bracketed expression:
+
+```js
+var section = parser.parseMax('foo="(", bar="}") bing bong');
+assert(section.start === 0);
+assert(section.end === 16);//exclusive end of string
+assert(section.src = 'foo="(", bar="}"');
+
+
+var section = parser.parseMax('{foo="(", bar="}"} bing bong', {start: 1});
+assert(section.start === 1);
+assert(section.end === 17);//exclusive end of string
+assert(section.src = 'foo="(", bar="}"');
+```
+
+The bracketed expression parsing simply parses up to but excluding the first unmatched closed bracket (`)`, `}`, `]`).  It is clever enough to ignore brackets in comments or strings.
+
+
+### Custom Delimited Expressions
+
+Find code up to a custom delimiter:
+
+```js
+var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>');
+assert(section.start === 0);
+assert(section.end === 17);//exclusive end of string
+assert(section.src = 'foo.bar("%>").baz');
+
+var section = parser.parseUntil('<%foo.bar("%>").baz%> bing bong', '%>', {start: 2});
+assert(section.start === 2);
+assert(section.end === 19);//exclusive end of string
+assert(section.src = 'foo.bar("%>").baz');
+```
+
+Delimiters are ignored if they are inside strings or comments.
+
+## API
+
+### parse(str, state = defaultState(), options = {start: 0, end: src.length})
+
+Parse a string starting at the index start, and return the state after parsing that string.
+
+If you want to parse one string in multiple sections you should keep passing the resulting state to the next parse operation.
+
+Returns a `State` object.
+
+### parseMax(src, options = {start: 0})
+
+Parses the source until the first unmatched close bracket (any of `)`, `}`, `]`).  It returns an object with the structure:
+
+```js
+{
+  start: 0,//index of first character of string
+  end: 13,//index of first character after the end of string
+  src: 'source string'
+}
+```
+
+### parseUntil(src, delimiter, options = {start: 0, includeLineComment: false})
+
+Parses the source until the first occurence of `delimiter` which is not in a string or a comment.  If `includeLineComment` is `true`, it will still count if the delimiter occurs in a line comment, but not in a block comment.  It returns an object with the structure:
+
+```js
+{
+  start: 0,//index of first character of string
+  end: 13,//index of first character after the end of string
+  src: 'source string'
+}
+```
+
+### parseChar(character, state = defaultState())
+
+Parses the single character and returns the state.  See `parse` for the structure of the returned state object.  N.B. character must be a single character not a multi character string.
+
+### defaultState()
+
+Get a default starting state.
+
+### isPunctuator(character)
+
+Returns `true` if `character` represents punctuation in JavaScript.
+
+### isKeyword(name)
+
+Returns `true` if `name` is a keyword in JavaScript.
+
+## State
+
+A state is an object with the following structure
+
+```js
+{
+  lineComment: false, //true if inside a line comment
+  blockComment: false, //true if inside a block comment
+
+  singleQuote: false, //true if inside a single quoted string
+  doubleQuote: false, //true if inside a double quoted string
+  regexp:      false, //true if inside a regular expression
+  escaped: false, //true if in a string and the last character was an escape character
+
+  roundDepth: 0, //number of un-closed open `(` brackets
+  curlyDepth: 0, //number of un-closed open `{` brackets
+  squareDepth: 0 //number of un-closed open `[` brackets
+}
+```
+
+It also has the following useful methods:
+
+- `.isString()` returns `true` if the current location is inside a string.
+- `.isComment()` returns `true` if the current location is inside a comment.
+- `isNesting()` returns `true` if the current location is anything but at the top level, i.e. with no nesting.
+
+## License
+
+MIT
\ No newline at end of file
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..8d0f395
--- /dev/null
+++ b/index.js
@@ -0,0 +1,217 @@
+exports = (module.exports = parse);
+exports.parse = parse;
+function parse(src, state, options) {
+  options = options || {};
+  state = state || exports.defaultState();
+  var start = options.start || 0;
+  var end = options.end || src.length;
+  var index = start;
+  while (index < end) {
+    if (state.roundDepth < 0 || state.curlyDepth < 0 || state.squareDepth < 0) {
+      throw new SyntaxError('Mismatched Bracket: ' + src[index - 1]);
+    }
+    exports.parseChar(src[index++], state);
+  }
+  return state;
+}
+
+exports.parseMax = parseMax;
+function parseMax(src, options) {
+  options = options || {};
+  var start = options.start || 0;
+  var index = start;
+  var state = exports.defaultState();
+  while (state.roundDepth >= 0 && state.curlyDepth >= 0 && state.squareDepth >= 0) {
+    if (index >= src.length) {
+      throw new Error('The end of the string was reached with no closing bracket found.');
+    }
+    exports.parseChar(src[index++], state);
+  }
+  var end = index - 1;
+  return {
+    start: start,
+    end: end,
+    src: src.substring(start, end)
+  };
+}
+
+exports.parseUntil = parseUntil;
+function parseUntil(src, delimiter, options) {
+  options = options || {};
+  var includeLineComment = options.includeLineComment || false;
+  var start = options.start || 0;
+  var index = start;
+  var state = exports.defaultState();
+  while (state.isString() || state.regexp || state.blockComment ||
+         (!includeLineComment && state.lineComment) || !startsWith(src, delimiter, index)) {
+    exports.parseChar(src[index++], state);
+  }
+  var end = index;
+  return {
+    start: start,
+    end: end,
+    src: src.substring(start, end)
+  };
+}
+
+
+exports.parseChar = parseChar;
+function parseChar(character, state) {
+  if (character.length !== 1) throw new Error('Character must be a string of length 1');
+  state = state || exports.defaultState();
+  var wasComment = state.blockComment || state.lineComment;
+  var lastChar = state.history ? state.history[0] : '';
+  if (state.lineComment) {
+    if (character === '\n') {
+      state.lineComment = false;
+    }
+  } else if (state.blockComment) {
+    if (state.lastChar === '*' && character === '/') {
+      state.blockComment = false;
+    }
+  } else if (state.singleQuote) {
+    if (character === '\'' && !state.escaped) {
+      state.singleQuote = false;
+    } else if (character === '\\' && !state.escaped) {
+      state.escaped = true;
+    } else {
+      state.escaped = false;
+    }
+  } else if (state.doubleQuote) {
+    if (character === '"' && !state.escaped) {
+      state.doubleQuote = false;
+    } else if (character === '\\' && !state.escaped) {
+      state.escaped = true;
+    } else {
+      state.escaped = false;
+    }
+  } else if (state.regexp) {
+    if (character === '/' && !state.escaped) {
+      state.regexp = false;
+    } else if (character === '\\' && !state.escaped) {
+      state.escaped = true;
+    } else {
+      state.escaped = false;
+    }
+  } else if (lastChar === '/' && character === '/') {
+    state.history = state.history.substr(1);
+    state.lineComment = true;
+  } else if (lastChar === '/' && character === '*') {
+    state.history = state.history.substr(1);
+    state.blockComment = true;
+  } else if (character === '/' && isRegexp(state.history)) {
+    state.regexp = true;
+  } else if (character === '\'') {
+    state.singleQuote = true;
+  } else if (character === '"') {
+    state.doubleQuote = true;
+  } else if (character === '(') {
+    state.roundDepth++;
+  } else if (character === ')') {
+    state.roundDepth--;
+  } else if (character === '{') {
+    state.curlyDepth++;
+  } else if (character === '}') {
+    state.curlyDepth--;
+  } else if (character === '[') {
+    state.squareDepth++;
+  } else if (character === ']') {
+    state.squareDepth--;
+  }
+  if (!state.blockComment && !state.lineComment && !wasComment) state.history = character + state.history;
+  return state;
+}
+
+exports.defaultState = function () { return new State() };
+function State() {
+  this.lineComment = false;
+  this.blockComment = false;
+
+  this.singleQuote = false;
+  this.doubleQuote = false;
+  this.regexp = false;
+  this.escaped = false;
+
+  this.roundDepth = 0;
+  this.curlyDepth = 0;
+  this.squareDepth = 0;
+
+  this.history = ''
+}
+State.prototype.isString = function () {
+  return this.singleQuote || this.doubleQuote;
+}
+State.prototype.isComment = function () {
+  return this.lineComment || this.blockComment;
+}
+State.prototype.isNesting = function () {
+  return this.isString() || this.isComment() || this.regexp || this.roundDepth > 0 || this.curlyDepth > 0 || this.squareDepth > 0
+}
+
+function startsWith(str, start, i) {
+  return str.substr(i || 0, start.length) === start;
+}
+
+exports.isPunctuator = isPunctuator
+function isPunctuator(c) {
+  var code = c.charCodeAt(0)
+
+  switch (code) {
+    case 46:   // . dot
+    case 40:   // ( open bracket
+    case 41:   // ) close bracket
+    case 59:   // ; semicolon
+    case 44:   // , comma
+    case 123:  // { open curly brace
+    case 125:  // } close curly brace
+    case 91:   // [
+    case 93:   // ]
+    case 58:   // :
+    case 63:   // ?
+    case 126:  // ~
+    case 37:   // %
+    case 38:   // &
+    case 42:   // *:
+    case 43:   // +
+    case 45:   // -
+    case 47:   // /
+    case 60:   // <
+    case 62:   // >
+    case 94:   // ^
+    case 124:  // |
+    case 33:   // !
+    case 61:   // =
+      return true;
+    default:
+      return false;
+  }
+}
+exports.isKeyword = isKeyword
+function isKeyword(id) {
+  return (id === 'if') || (id === 'in') || (id === 'do') || (id === 'var') || (id === 'for') || (id === 'new') ||
+         (id === 'try') || (id === 'let') || (id === 'this') || (id === 'else') || (id === 'case') ||
+         (id === 'void') || (id === 'with') || (id === 'enum') || (id === 'while') || (id === 'break') || (id === 'catch') ||
+         (id === 'throw') || (id === 'const') || (id === 'yield') || (id === 'class') || (id === 'super') ||
+         (id === 'return') || (id === 'typeof') || (id === 'delete') || (id === 'switch') || (id === 'export') ||
+         (id === 'import') || (id === 'default') || (id === 'finally') || (id === 'extends') || (id === 'function') ||
+         (id === 'continue') || (id === 'debugger') || (id === 'package') || (id === 'private') || (id === 'interface') ||
+         (id === 'instanceof') || (id === 'implements') || (id === 'protected') || (id === 'public') || (id === 'static') ||
+         (id === 'yield') || (id === 'let');
+}
+
+function isRegexp(history) {
+  //could be start of regexp or divide sign
+
+  history = history.replace(/^\s*/, '');
+
+  //unless its an `if`, `while`, `for` or `with` it's a divide, so we assume it's a divide
+  if (history[0] === ')') return false;
+  //unless it's a function expression, it's a regexp, so we assume it's a regexp
+  if (history[0] === '}') return true;
+  //any punctuation means it's a regexp
+  if (isPunctuator(history[0])) return true;
+  //if the last thing was a keyword then it must be a regexp (e.g. `typeof /foo/`)
+  if (/^\w+\b/.test(history) && isKeyword(/^\w+\b/.exec(history)[0].split('').reverse().join(''))) return true;
+
+  return false;
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..dc7bb9c
--- /dev/null
+++ b/package.json
@@ -0,0 +1,29 @@
+{
+  "name": "character-parser",
+  "version": "1.2.0",
+  "description": "Parse JavaScript one character at a time to look for snippets in Templates.  This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly.",
+  "main": "index.js",
+  "scripts": {
+    "test": "mocha -R spec"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/ForbesLindesay/character-parser.git"
+  },
+  "keywords": [
+    "parser",
+    "JavaScript",
+    "bracket",
+    "nesting",
+    "comment",
+    "string",
+    "escape",
+    "escaping"
+  ],
+  "author": "ForbesLindesay",
+  "license": "MIT",
+  "devDependencies": {
+    "better-assert": "~1.0.0",
+    "mocha": "~1.9.0"
+  }
+}
\ No newline at end of file
diff --git a/test/index.js b/test/index.js
new file mode 100644
index 0000000..3860427
--- /dev/null
+++ b/test/index.js
@@ -0,0 +1,55 @@
+var assert = require('better-assert');
+var parser = require('../');
+var parse = parser;
+
+it('works out how much depth changes', function () {
+  var state = parse('foo(arg1, arg2, {\n  foo: [a, b\n');
+  assert(state.roundDepth === 1);
+  assert(state.curlyDepth === 1);
+  assert(state.squareDepth === 1);
+
+  parse('    c, d]\n  })', state);
+  assert(state.squareDepth === 0);
+  assert(state.curlyDepth === 0);
+  assert(state.roundDepth === 0);
+});
+
+it('finds contents of bracketed expressions', function () {
+  var section = parser.parseMax('foo="(", bar="}") bing bong');
+  assert(section.start === 0);
+  assert(section.end === 16);//exclusive end of string
+  assert(section.src = 'foo="(", bar="}"');
+
+  var section = parser.parseMax('{foo="(", bar="}"} bing bong', {start: 1});
+  assert(section.start === 1);
+  assert(section.end === 17);//exclusive end of string
+  assert(section.src = 'foo="(", bar="}"');
+});
+
+it('finds code up to a custom delimiter', function () {
+  var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>');
+  assert(section.start === 0);
+  assert(section.end === 17);//exclusive end of string
+  assert(section.src = 'foo.bar("%>").baz');
+
+  var section = parser.parseUntil('<%foo.bar("%>").baz%> bing bong', '%>', {start: 2});
+  assert(section.start === 2);
+  assert(section.end === 19);//exclusive end of string
+  assert(section.src = 'foo.bar("%>").baz');
+});
+
+describe('regressions', function () {
+  describe('#1', function () {
+    it('parses regular expressions', function () {
+      var section = parser.parseMax('foo=/\\//g, bar="}") bing bong');
+      assert(section.start === 0);
+      assert(section.end === 18);//exclusive end of string
+      assert(section.src = 'foo=/\\//g, bar="}"');
+
+      var section = parser.parseMax('foo = typeof /\\//g, bar="}") bing bong');
+      assert(section.start === 0);
+      //assert(section.end === 18);//exclusive end of string
+      assert(section.src = 'foo = typeof /\\//g, bar="}"');
+    })
+  })
+})
\ No newline at end of file

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-character-parser.git



More information about the Pkg-javascript-commits mailing list