[Pkg-javascript-commits] [node-jison-lex] 01/07: Import Upstream version 0.3.4
Sruthi Chandran
srud-guest at moszumanska.debian.org
Sat Oct 15 05:31:01 UTC 2016
This is an automated email from the git hooks/post-receive script.
srud-guest pushed a commit to branch master
in repository node-jison-lex.
commit 6dbaf52160a14f19b061d7fb7be30ab6971c9497
Author: Sruthi <srud at disroot.org>
Date: Sat Oct 15 10:37:56 2016 +0530
Import Upstream version 0.3.4
---
.gitignore | 6 +
README.md | 47 +++
cli.js | 86 +++++
examples/lex.l | 90 +++++
package.json | 42 +++
regexp-lexer.js | 603 +++++++++++++++++++++++++++++
tests/all-tests.js | 4 +
tests/regexplexer.js | 1021 ++++++++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 1899 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b1a6b49
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+node_modules/
+
+# Editor bak files
+*~
+*.bak
+*.orig
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3d93789
--- /dev/null
+++ b/README.md
@@ -0,0 +1,47 @@
+# jison-lex
+A lexical analyzer generator used by [jison](http://jison.org). It takes a lexical grammar definition (either in JSON or Bison's lexical grammar format) and outputs a JavaScript lexer.
+
+## install
+npm install jison-lex -g
+
+## usage
+```
+Usage: jison-lex [file] [options]
+
+file file containing a lexical grammar
+
+Options:
+ -o FILE, --outfile FILE Filename and base module name of the generated parser
+ -t TYPE, --module-type TYPE The type of module to generate (commonjs, js)
+ --version print version and exit
+```
+
+## programatic usage
+
+```
+var JisonLex = require('jison-lex');
+
+var grammar = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+};
+
+// or load from a file
+// var grammar = fs.readFileSync('mylexer.l', 'utf8');
+
+// generate source
+var lexerSource = JisonLex.generate(grammar);
+
+// or create a parser in memory
+var lexer = new JisonLex(grammar);
+lexer.setInput('xyxxy');
+lexer.lex();
+// => 'X'
+lexer.lex();
+// => 'Y'
+
+## license
+MIT
diff --git a/cli.js b/cli.js
new file mode 100755
index 0000000..8ca2894
--- /dev/null
+++ b/cli.js
@@ -0,0 +1,86 @@
+#!/usr/bin/env node
+
+var version = require('./package.json').version;
+
+var path = require('path');
+var fs = require('fs');
+var lexParser = require('lex-parser');
+var RegExpLexer = require('./regexp-lexer.js');
+
+
+var opts = require("nomnom")
+ .script('jison-lex')
+ .option('file', {
+ flag: true,
+ position: 0,
+ help: 'file containing a lexical grammar'
+ })
+ .option('outfile', {
+ abbr: 'o',
+ metavar: 'FILE',
+ help: 'Filename and base module name of the generated parser'
+ })
+ .option('module-type', {
+ abbr: 't',
+ default: 'commonjs',
+ metavar: 'TYPE',
+ help: 'The type of module to generate (commonjs, js)'
+ })
+ .option('version', {
+ abbr: 'V',
+ flag: true,
+ help: 'print version and exit',
+ callback: function() {
+ return version;
+ }
+ });
+
+exports.main = function (opts) {
+ if (opts.file) {
+ var raw = fs.readFileSync(path.normalize(opts.file), 'utf8'),
+ name = path.basename((opts.outfile||opts.file)).replace(/\..*$/g,'');
+
+ fs.writeFileSync(opts.outfile||(name + '.js'), processGrammar(raw, name));
+ } else {
+ readin(function (raw) {
+ console.log(processGrammar(raw));
+ });
+ }
+};
+
+function processGrammar (file, name) {
+ var grammar;
+ try {
+ grammar = lexParser.parse(file);
+ } catch (e) {
+ try {
+ grammar = JSON.parse(file);
+ } catch (e2) {
+ throw e;
+ }
+ }
+
+ var settings = grammar.options || {};
+ if (!settings.moduleType) settings.moduleType = opts['module-type'];
+ if (!settings.moduleName && name) settings.moduleName = name.replace(/-\w/g, function (match){ return match.charAt(1).toUpperCase(); });
+
+ grammar.options = settings;
+
+ return RegExpLexer.generate(grammar);
+}
+
+function readin (cb) {
+ var stdin = process.openStdin(),
+ data = '';
+
+ stdin.setEncoding('utf8');
+ stdin.addListener('data', function (chunk) {
+ data += chunk;
+ });
+ stdin.addListener('end', function () {
+ cb(data);
+ });
+}
+
+if (require.main === module)
+ exports.main(opts.parse());
diff --git a/examples/lex.l b/examples/lex.l
new file mode 100644
index 0000000..515984d
--- /dev/null
+++ b/examples/lex.l
@@ -0,0 +1,90 @@
+
+NAME [a-zA-Z_][a-zA-Z0-9_-]*
+BR \r\n|\n|\r
+
+%s indented trail rules
+%x code start_condition options conditions action
+
+%%
+
+<action>"/*"(.|\n|\r)*?"*/" return 'ACTION_BODY';
+<action>"//".* return 'ACTION_BODY';
+<action>"/"[^ /]*?['"{}'][^ ]*?"/" return 'ACTION_BODY'; // regexp with braces or quotes (and no spaces)
+<action>\"("\\\\"|'\"'|[^"])*\" return 'ACTION_BODY';
+<action>"'"("\\\\"|"\'"|[^'])*"'" return 'ACTION_BODY';
+<action>[/"'][^{}/"']+ return 'ACTION_BODY';
+<action>[^{}/"']+ return 'ACTION_BODY';
+<action>"{" yy.depth++; return '{'
+<action>"}" yy.depth == 0 ? this.begin('trail') : yy.depth--; return '}'
+
+<conditions>{NAME} return 'NAME';
+<conditions>">" this.popState(); return '>';
+<conditions>"," return ',';
+<conditions>"*" return '*';
+
+<rules>{BR}+ /* */
+<rules>\s+{BR}+ /* */
+<rules>\s+ this.begin('indented')
+<rules>"%%" this.begin('code'); return '%%'
+<rules>[a-zA-Z0-9_]+ return 'CHARACTER_LIT'
+
+<options>{NAME} yy.options[yytext] = true
+<options>{BR}+ this.begin('INITIAL')
+<options>\s+{BR}+ this.begin('INITIAL')
+<options>\s+ /* empty */
+
+<start_condition>{NAME} return 'START_COND'
+<start_condition>{BR}+ this.begin('INITIAL')
+<start_condition>\s+{BR}+ this.begin('INITIAL')
+<start_condition>\s+ /* empty */
+
+<trail>.*{BR}+ this.begin('rules')
+
+<indented>"{" yy.depth = 0; this.begin('action'); return '{'
+<indented>"%{"(.|{BR})*?"%}" this.begin('trail'); yytext = yytext.substr(2, yytext.length-4);return 'ACTION'
+"%{"(.|{BR})*?"%}" yytext = yytext.substr(2, yytext.length-4); return 'ACTION'
+<indented>.+ this.begin('rules'); return 'ACTION'
+
+"/*"(.|\n|\r)*?"*/" /* ignore */
+"//".* /* ignore */
+
+{BR}+ /* */
+\s+ /* */
+{NAME} return 'NAME';
+\"("\\\\"|'\"'|[^"])*\" yytext = yytext.replace(/\\"/g,'"'); return 'STRING_LIT';
+"'"("\\\\"|"\'"|[^'])*"'" yytext = yytext.replace(/\\'/g,"'"); return 'STRING_LIT';
+"|" return '|';
+"["("\\\\"|"\]"|[^\]])*"]" return 'ANY_GROUP_REGEX';
+"(?:" return 'SPECIAL_GROUP';
+"(?=" return 'SPECIAL_GROUP';
+"(?!" return 'SPECIAL_GROUP';
+"(" return '(';
+")" return ')';
+"+" return '+';
+"*" return '*';
+"?" return '?';
+"^" return '^';
+"," return ',';
+"<<EOF>>" return '$';
+"<" this.begin('conditions'); return '<';
+"/!" return '/!';
+"/" return '/';
+"\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4}) return 'ESCAPE_CHAR';
+"\\". yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR';
+"$" return '$';
+"." return '.';
+"%options" yy.options = {}; this.begin('options');
+"%s" this.begin('start_condition'); return 'START_INC';
+"%x" this.begin('start_condition'); return 'START_EXC';
+"%%" this.begin('rules'); return '%%';
+"{"\d+(","\s?\d+|",")?"}" return 'RANGE_REGEX';
+"{"{NAME}"}" return 'NAME_BRACE';
+"{" return '{';
+"}" return '}';
+. /* ignore bad characters */
+<*><<EOF>> return 'EOF';
+
+<code>(.|{BR})+ return 'CODE';
+
+%%
+
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..dea1800
--- /dev/null
+++ b/package.json
@@ -0,0 +1,42 @@
+{
+ "author": "Zach Carter <zach at carter.name> (http://zaa.ch)",
+ "name": "jison-lex",
+ "description": "lexical analyzer generator used by jison",
+ "version": "0.3.4",
+ "keywords": [
+ "jison",
+ "parser",
+ "generator",
+ "lexer",
+ "flex",
+ "tokenizer"
+ ],
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/zaach/jison-lex.git"
+ },
+ "bugs": {
+ "email": "jison at librelist.com",
+ "url": "http://github.com/zaach/jison-lex/issues"
+ },
+ "main": "regexp-lexer",
+ "bin": "cli.js",
+ "engines": {
+ "node": ">=0.4"
+ },
+ "dependencies": {
+ "lex-parser": "0.1.x",
+ "nomnom": "1.5.2"
+ },
+ "devDependencies": {
+ "test": "0.4.4"
+ },
+ "scripts": {
+ "test": "node tests/all-tests.js"
+ },
+ "directories": {
+ "lib": "lib",
+ "tests": "tests"
+ },
+ "homepage": "http://jison.org"
+}
diff --git a/regexp-lexer.js b/regexp-lexer.js
new file mode 100644
index 0000000..cc68c65
--- /dev/null
+++ b/regexp-lexer.js
@@ -0,0 +1,603 @@
+// Basic Lexer implemented using JavaScript regular expressions
+// MIT Licensed
+
+"use strict";
+
+var lexParser = require('lex-parser');
+var version = require('./package.json').version;
+
+// expand macros and convert matchers to RegExp's
+function prepareRules(rules, macros, actions, tokens, startConditions, caseless) {
+ var m,i,k,action,conditions,
+ newRules = [];
+
+ if (macros) {
+ macros = prepareMacros(macros);
+ }
+
+ function tokenNumberReplacement (str, token) {
+ return "return " + (tokens[token] || "'" + token + "'");
+ }
+
+ actions.push('switch($avoiding_name_collisions) {');
+
+ for (i=0;i < rules.length; i++) {
+ if (Object.prototype.toString.apply(rules[i][0]) !== '[object Array]') {
+ // implicit add to all inclusive start conditions
+ for (k in startConditions) {
+ if (startConditions[k].inclusive) {
+ startConditions[k].rules.push(i);
+ }
+ }
+ } else if (rules[i][0][0] === '*') {
+ // Add to ALL start conditions
+ for (k in startConditions) {
+ startConditions[k].rules.push(i);
+ }
+ rules[i].shift();
+ } else {
+ // Add to explicit start conditions
+ conditions = rules[i].shift();
+ for (k=0;k<conditions.length;k++) {
+ startConditions[conditions[k]].rules.push(i);
+ }
+ }
+
+ m = rules[i][0];
+ if (typeof m === 'string') {
+ for (k in macros) {
+ if (macros.hasOwnProperty(k)) {
+ m = m.split("{" + k + "}").join('(' + macros[k] + ')');
+ }
+ }
+ m = new RegExp("^(?:" + m + ")", caseless ? 'i':'');
+ }
+ newRules.push(m);
+ if (typeof rules[i][1] === 'function') {
+ rules[i][1] = String(rules[i][1]).replace(/^\s*function \(\)\s?\{/, '').replace(/\}\s*$/, '');
+ }
+ action = rules[i][1];
+ if (tokens && action.match(/return '[^']+'/)) {
+ action = action.replace(/return '([^']+)'/g, tokenNumberReplacement);
+ }
+ actions.push('case ' + i + ':' + action + '\nbreak;');
+ }
+ actions.push("}");
+
+ return newRules;
+}
+
+// expand macros within macros
+function prepareMacros (macros) {
+ var cont = true,
+ m,i,k,mnew;
+ while (cont) {
+ cont = false;
+ for (i in macros) if (macros.hasOwnProperty(i)) {
+ m = macros[i];
+ for (k in macros) if (macros.hasOwnProperty(k) && i !== k) {
+ mnew = m.split("{" + k + "}").join('(' + macros[k] + ')');
+ if (mnew !== m) {
+ cont = true;
+ macros[i] = mnew;
+ }
+ }
+ }
+ }
+ return macros;
+}
+
+function prepareStartConditions (conditions) {
+ var sc,
+ hash = {};
+ for (sc in conditions) if (conditions.hasOwnProperty(sc)) {
+ hash[sc] = {rules:[],inclusive:!!!conditions[sc]};
+ }
+ return hash;
+}
+
+function buildActions (dict, tokens) {
+ var actions = [dict.actionInclude || '', "var YYSTATE=YY_START;"];
+ var tok;
+ var toks = {};
+
+ for (tok in tokens) {
+ toks[tokens[tok]] = tok;
+ }
+
+ if (dict.options && dict.options.flex) {
+ dict.rules.push([".", "console.log(yytext);"]);
+ }
+
+ this.rules = prepareRules(dict.rules, dict.macros, actions, tokens && toks, this.conditions, this.options["case-insensitive"]);
+ var fun = actions.join("\n");
+ "yytext yyleng yylineno yylloc".split(' ').forEach(function (yy) {
+ fun = fun.replace(new RegExp("\\b(" + yy + ")\\b", "g"), "yy_.$1");
+ });
+
+ return "function anonymous(yy,yy_,$avoiding_name_collisions,YY_START) {" + fun + "\n}";
+}
+
+function RegExpLexer (dict, input, tokens) {
+ var opts = processGrammar(dict, tokens);
+ var source = generateModuleBody(opts);
+ var lexer = eval(source);
+
+ lexer.yy = {};
+ if (input) {
+ lexer.setInput(input);
+ }
+
+ lexer.generate = function () { return generateFromOpts(opts); };
+ lexer.generateModule = function () { return generateModule(opts); };
+ lexer.generateCommonJSModule = function () { return generateCommonJSModule(opts); };
+ lexer.generateAMDModule = function () { return generateAMDModule(opts); };
+
+ return lexer;
+}
+
+RegExpLexer.prototype = {
+ EOF: 1,
+ parseError: function parseError(str, hash) {
+ if (this.yy.parser) {
+ this.yy.parser.parseError(str, hash);
+ } else {
+ throw new Error(str);
+ }
+ },
+
+ // resets the lexer, sets new input
+ setInput: function (input, yy) {
+ this.yy = yy || this.yy || {};
+ this._input = input;
+ this._more = this._backtrack = this.done = false;
+ this.yylineno = this.yyleng = 0;
+ this.yytext = this.matched = this.match = '';
+ this.conditionStack = ['INITIAL'];
+ this.yylloc = {
+ first_line: 1,
+ first_column: 0,
+ last_line: 1,
+ last_column: 0
+ };
+ if (this.options.ranges) {
+ this.yylloc.range = [0,0];
+ }
+ this.offset = 0;
+ return this;
+ },
+
+ // consumes and returns one char from the input
+ input: function () {
+ var ch = this._input[0];
+ this.yytext += ch;
+ this.yyleng++;
+ this.offset++;
+ this.match += ch;
+ this.matched += ch;
+ var lines = ch.match(/(?:\r\n?|\n).*/g);
+ if (lines) {
+ this.yylineno++;
+ this.yylloc.last_line++;
+ } else {
+ this.yylloc.last_column++;
+ }
+ if (this.options.ranges) {
+ this.yylloc.range[1]++;
+ }
+
+ this._input = this._input.slice(1);
+ return ch;
+ },
+
+ // unshifts one char (or a string) into the input
+ unput: function (ch) {
+ var len = ch.length;
+ var lines = ch.split(/(?:\r\n?|\n)/g);
+
+ this._input = ch + this._input;
+ this.yytext = this.yytext.substr(0, this.yytext.length - len);
+ //this.yyleng -= len;
+ this.offset -= len;
+ var oldLines = this.match.split(/(?:\r\n?|\n)/g);
+ this.match = this.match.substr(0, this.match.length - 1);
+ this.matched = this.matched.substr(0, this.matched.length - 1);
+
+ if (lines.length - 1) {
+ this.yylineno -= lines.length - 1;
+ }
+ var r = this.yylloc.range;
+
+ this.yylloc = {
+ first_line: this.yylloc.first_line,
+ last_line: this.yylineno + 1,
+ first_column: this.yylloc.first_column,
+ last_column: lines ?
+ (lines.length === oldLines.length ? this.yylloc.first_column : 0)
+ + oldLines[oldLines.length - lines.length].length - lines[0].length :
+ this.yylloc.first_column - len
+ };
+
+ if (this.options.ranges) {
+ this.yylloc.range = [r[0], r[0] + this.yyleng - len];
+ }
+ this.yyleng = this.yytext.length;
+ return this;
+ },
+
+ // When called from action, caches matched text and appends it on next action
+ more: function () {
+ this._more = true;
+ return this;
+ },
+
+ // When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.
+ reject: function () {
+ if (this.options.backtrack_lexer) {
+ this._backtrack = true;
+ } else {
+ return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' + this.showPosition(), {
+ text: "",
+ token: null,
+ line: this.yylineno
+ });
+
+ }
+ return this;
+ },
+
+ // retain first n characters of the match
+ less: function (n) {
+ this.unput(this.match.slice(n));
+ },
+
+ // displays already matched input, i.e. for error messages
+ pastInput: function () {
+ var past = this.matched.substr(0, this.matched.length - this.match.length);
+ return (past.length > 20 ? '...':'') + past.substr(-20).replace(/\n/g, "");
+ },
+
+ // displays upcoming input, i.e. for error messages
+ upcomingInput: function () {
+ var next = this.match;
+ if (next.length < 20) {
+ next += this._input.substr(0, 20-next.length);
+ }
+ return (next.substr(0,20) + (next.length > 20 ? '...' : '')).replace(/\n/g, "");
+ },
+
+ // displays the character position where the lexing error occurred, i.e. for error messages
+ showPosition: function () {
+ var pre = this.pastInput();
+ var c = new Array(pre.length + 1).join("-");
+ return pre + this.upcomingInput() + "\n" + c + "^";
+ },
+
+ // test the lexed token: return FALSE when not a match, otherwise return token
+ test_match: function(match, indexed_rule) {
+ var token,
+ lines,
+ backup;
+
+ if (this.options.backtrack_lexer) {
+ // save context
+ backup = {
+ yylineno: this.yylineno,
+ yylloc: {
+ first_line: this.yylloc.first_line,
+ last_line: this.last_line,
+ first_column: this.yylloc.first_column,
+ last_column: this.yylloc.last_column
+ },
+ yytext: this.yytext,
+ match: this.match,
+ matches: this.matches,
+ matched: this.matched,
+ yyleng: this.yyleng,
+ offset: this.offset,
+ _more: this._more,
+ _input: this._input,
+ yy: this.yy,
+ conditionStack: this.conditionStack.slice(0),
+ done: this.done
+ };
+ if (this.options.ranges) {
+ backup.yylloc.range = this.yylloc.range.slice(0);
+ }
+ }
+
+ lines = match[0].match(/(?:\r\n?|\n).*/g);
+ if (lines) {
+ this.yylineno += lines.length;
+ }
+ this.yylloc = {
+ first_line: this.yylloc.last_line,
+ last_line: this.yylineno + 1,
+ first_column: this.yylloc.last_column,
+ last_column: lines ?
+ lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length :
+ this.yylloc.last_column + match[0].length
+ };
+ this.yytext += match[0];
+ this.match += match[0];
+ this.matches = match;
+ this.yyleng = this.yytext.length;
+ if (this.options.ranges) {
+ this.yylloc.range = [this.offset, this.offset += this.yyleng];
+ }
+ this._more = false;
+ this._backtrack = false;
+ this._input = this._input.slice(match[0].length);
+ this.matched += match[0];
+ token = this.performAction.call(this, this.yy, this, indexed_rule, this.conditionStack[this.conditionStack.length - 1]);
+ if (this.done && this._input) {
+ this.done = false;
+ }
+ if (token) {
+ return token;
+ } else if (this._backtrack) {
+ // recover context
+ for (var k in backup) {
+ this[k] = backup[k];
+ }
+ return false; // rule action called reject() implying the next rule should be tested instead.
+ }
+ return false;
+ },
+
+ // return next match in input
+ next: function () {
+ if (this.done) {
+ return this.EOF;
+ }
+ if (!this._input) {
+ this.done = true;
+ }
+
+ var token,
+ match,
+ tempMatch,
+ index;
+ if (!this._more) {
+ this.yytext = '';
+ this.match = '';
+ }
+ var rules = this._currentRules();
+ for (var i = 0; i < rules.length; i++) {
+ tempMatch = this._input.match(this.rules[rules[i]]);
+ if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
+ match = tempMatch;
+ index = i;
+ if (this.options.backtrack_lexer) {
+ token = this.test_match(tempMatch, rules[i]);
+ if (token !== false) {
+ return token;
+ } else if (this._backtrack) {
+ match = false;
+ continue; // rule action called reject() implying a rule MISmatch.
+ } else {
+ // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace)
+ return false;
+ }
+ } else if (!this.options.flex) {
+ break;
+ }
+ }
+ }
+ if (match) {
+ token = this.test_match(match, rules[index]);
+ if (token !== false) {
+ return token;
+ }
+ // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace)
+ return false;
+ }
+ if (this._input === "") {
+ return this.EOF;
+ } else {
+ return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), {
+ text: "",
+ token: null,
+ line: this.yylineno
+ });
+ }
+ },
+
+ // return next match that has a token
+ lex: function lex () {
+ var r = this.next();
+ if (r) {
+ return r;
+ } else {
+ return this.lex();
+ }
+ },
+
+ // activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)
+ begin: function begin (condition) {
+ this.conditionStack.push(condition);
+ },
+
+ // pop the previously active lexer condition state off the condition stack
+ popState: function popState () {
+ var n = this.conditionStack.length - 1;
+ if (n > 0) {
+ return this.conditionStack.pop();
+ } else {
+ return this.conditionStack[0];
+ }
+ },
+
+ // produce the lexer rule set which is active for the currently active lexer condition state
+ _currentRules: function _currentRules () {
+ if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
+ return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
+ } else {
+ return this.conditions["INITIAL"].rules;
+ }
+ },
+
+ // return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available
+ topState: function topState (n) {
+ n = this.conditionStack.length - 1 - Math.abs(n || 0);
+ if (n >= 0) {
+ return this.conditionStack[n];
+ } else {
+ return "INITIAL";
+ }
+ },
+
+ // alias for begin(condition)
+ pushState: function pushState (condition) {
+ this.begin(condition);
+ },
+
+ // return the number of states pushed
+ stateStackSize: function stateStackSize() {
+ return this.conditionStack.length;
+ }
+};
+
+
+// generate lexer source from a grammar
+function generate (dict, tokens) {
+ var opt = processGrammar(dict, tokens);
+
+ return generateFromOpts(opt);
+}
+
+// process the grammar and build final data structures and functions
+function processGrammar(dict, tokens) {
+ var opts = {};
+ if (typeof dict === 'string') {
+ dict = lexParser.parse(dict);
+ }
+ dict = dict || {};
+
+ opts.options = dict.options || {};
+ opts.moduleType = opts.options.moduleType;
+ opts.moduleName = opts.options.moduleName;
+
+ opts.conditions = prepareStartConditions(dict.startConditions);
+ opts.conditions.INITIAL = {rules:[],inclusive:true};
+
+ opts.performAction = buildActions.call(opts, dict, tokens);
+ opts.conditionStack = ['INITIAL'];
+
+ opts.moduleInclude = (dict.moduleInclude || '').trim();
+ return opts;
+}
+
+// Assemble the final source from the processed grammar
+function generateFromOpts (opt) {
+ var code = "";
+
+ if (opt.moduleType === 'commonjs') {
+ code = generateCommonJSModule(opt);
+ } else if (opt.moduleType === 'amd') {
+ code = generateAMDModule(opt);
+ } else {
+ code = generateModule(opt);
+ }
+
+ return code;
+}
+
+function generateModuleBody (opt) {
+ var functionDescriptions = {
+ setInput: "resets the lexer, sets new input",
+ input: "consumes and returns one char from the input",
+ unput: "unshifts one char (or a string) into the input",
+ more: "When called from action, caches matched text and appends it on next action",
+ reject: "When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.",
+ less: "retain first n characters of the match",
+ pastInput: "displays already matched input, i.e. for error messages",
+ upcomingInput: "displays upcoming input, i.e. for error messages",
+ showPosition: "displays the character position where the lexing error occurred, i.e. for error messages",
+ test_match: "test the lexed token: return FALSE when not a match, otherwise return token",
+ next: "return next match in input",
+ lex: "return next match that has a token",
+ begin: "activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)",
+ popState: "pop the previously active lexer condition state off the condition stack",
+ _currentRules: "produce the lexer rule set which is active for the currently active lexer condition state",
+ topState: "return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available",
+ pushState: "alias for begin(condition)",
+ stateStackSize: "return the number of states currently on the stack"
+ };
+ var out = "({\n";
+ var p = [];
+ var descr;
+ for (var k in RegExpLexer.prototype) {
+ if (RegExpLexer.prototype.hasOwnProperty(k) && k.indexOf("generate") === -1) {
+ // copy the function description as a comment before the implementation; supports multi-line descriptions
+ descr = "\n";
+ if (functionDescriptions[k]) {
+ descr += "// " + functionDescriptions[k].replace(/\n/g, "\n\/\/ ") + "\n";
+ }
+ p.push(descr + k + ":" + (RegExpLexer.prototype[k].toString() || '""'));
+ }
+ }
+ out += p.join(",\n");
+
+ if (opt.options) {
+ out += ",\noptions: " + JSON.stringify(opt.options);
+ }
+
+ out += ",\nperformAction: " + String(opt.performAction);
+ out += ",\nrules: [" + opt.rules + "]";
+ out += ",\nconditions: " + JSON.stringify(opt.conditions);
+ out += "\n})";
+
+ return out;
+}
+
+function generateModule(opt) {
+ opt = opt || {};
+
+ var out = "/* generated by jison-lex " + version + " */";
+ var moduleName = opt.moduleName || "lexer";
+
+ out += "\nvar " + moduleName + " = (function(){\nvar lexer = "
+ + generateModuleBody(opt);
+
+ if (opt.moduleInclude) {
+ out += ";\n" + opt.moduleInclude;
+ }
+
+ out += ";\nreturn lexer;\n})();";
+
+ return out;
+}
+
+function generateAMDModule(opt) {
+ var out = "/* generated by jison-lex " + version + " */";
+
+ out += "define([], function(){\nvar lexer = "
+ + generateModuleBody(opt);
+
+ if (opt.moduleInclude) {
+ out += ";\n" + opt.moduleInclude;
+ }
+
+ out += ";\nreturn lexer;"
+ + "\n});";
+
+ return out;
+}
+
+function generateCommonJSModule(opt) {
+ opt = opt || {};
+
+ var out = "";
+ var moduleName = opt.moduleName || "lexer";
+
+ out += generateModule(opt);
+ out += "\nexports.lexer = " + moduleName;
+ out += ";\nexports.lex = function () { return " + moduleName + ".lex.apply(lexer, arguments); };";
+ return out;
+}
+
+RegExpLexer.generate = generate;
+
+module.exports = RegExpLexer;
+
diff --git a/tests/all-tests.js b/tests/all-tests.js
new file mode 100755
index 0000000..8a0a4dd
--- /dev/null
+++ b/tests/all-tests.js
@@ -0,0 +1,4 @@
+exports.testRegExpLexer = require("./regexplexer");
+
+if (require.main === module)
+ process.exit(require("test").run(exports));
diff --git a/tests/regexplexer.js b/tests/regexplexer.js
new file mode 100644
index 0000000..6128c47
--- /dev/null
+++ b/tests/regexplexer.js
@@ -0,0 +1,1021 @@
+var RegExpLexer = require("../regexp-lexer"),
+ assert = require("assert");
+
+exports["test basic matchers"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xxyx";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test set yy"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return yy.x;" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xxyx";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input, { x: 'EX' });
+ assert.equal(lexer.lex(), "EX");
+};
+
+exports["test set input after"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xxyx";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test unrecognized char"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xa";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "X");
+ assert.throws(function(){lexer.lex()}, "bad char");
+};
+
+exports["test macro"] = function() {
+ var dict = {
+ macros: {
+ "digit": "[0-9]"
+ },
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["{digit}+", "return 'NAT';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "x12234y42";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "NAT");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "NAT");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test macro precedence"] = function() {
+ var dict = {
+ macros: {
+ "hex": "[0-9]|[a-f]"
+ },
+ rules: [
+ ["-", "return '-';" ],
+ ["{hex}+", "return 'HEX';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "129-abfe-42dc-ea12";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "HEX");
+ assert.equal(lexer.lex(), "-");
+ assert.equal(lexer.lex(), "HEX");
+ assert.equal(lexer.lex(), "-");
+ assert.equal(lexer.lex(), "HEX");
+ assert.equal(lexer.lex(), "-");
+ assert.equal(lexer.lex(), "HEX");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test nested macros"] = function () {
+ var dict = {
+ macros: {
+ "digit": "[0-9]",
+ "2digit": "{digit}{digit}",
+ "3digit": "{2digit}{digit}"
+ },
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["{3digit}", "return 'NNN';" ],
+ ["{2digit}", "return 'NN';" ],
+ ["{digit}", "return 'N';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "x1y42y123";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "N");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "NN");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "NNN");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test nested macro precedence"] = function() {
+ var dict = {
+ macros: {
+ "hex": "[0-9]|[a-f]",
+ "col": "#{hex}+"
+ },
+ rules: [
+ ["-", "return '-';" ],
+ ["{col}", "return 'HEX';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "#129-#abfe-#42dc-#ea12";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "HEX");
+ assert.equal(lexer.lex(), "-");
+ assert.equal(lexer.lex(), "HEX");
+ assert.equal(lexer.lex(), "-");
+ assert.equal(lexer.lex(), "HEX");
+ assert.equal(lexer.lex(), "-");
+ assert.equal(lexer.lex(), "HEX");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test action include"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return included ? 'Y' : 'N';" ],
+ ["$", "return 'EOF';" ]
+ ],
+ actionInclude: "var included = true;"
+ };
+
+ var input = "x";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test ignored"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["\\s+", "/* skip whitespace */" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "x x y x";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test disambiguate"] = function() {
+ var dict = {
+ rules: [
+ ["for\\b", "return 'FOR';" ],
+ ["if\\b", "return 'IF';" ],
+ ["[a-z]+", "return 'IDENTIFIER';" ],
+ ["\\s+", "/* skip whitespace */" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "if forever for for";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "IF");
+ assert.equal(lexer.lex(), "IDENTIFIER");
+ assert.equal(lexer.lex(), "FOR");
+ assert.equal(lexer.lex(), "FOR");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test yytext overwrite"] = function() {
+ var dict = {
+ rules: [
+ ["x", "yytext = 'hi der'; return 'X';" ]
+ ]
+ };
+
+ var input = "x";
+
+ var lexer = new RegExpLexer(dict, input);
+ lexer.lex();
+ assert.equal(lexer.yytext, "hi der");
+};
+
+exports["test yylineno"] = function() {
+ var dict = {
+ rules: [
+ ["\\s+", "/* skip whitespace */" ],
+ ["x", "return 'x';" ],
+ ["y", "return 'y';" ]
+ ]
+ };
+
+ var input = "x\nxy\n\n\nx";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.yylineno, 0);
+ assert.equal(lexer.lex(), "x");
+ assert.equal(lexer.lex(), "x");
+ assert.equal(lexer.yylineno, 1);
+ assert.equal(lexer.lex(), "y");
+ assert.equal(lexer.yylineno, 1);
+ assert.equal(lexer.lex(), "x");
+ assert.equal(lexer.yylineno, 4);
+};
+
+exports["test yylloc"] = function() {
+ var dict = {
+ rules: [
+ ["\\s+", "/* skip whitespace */" ],
+ ["x", "return 'x';" ],
+ ["y", "return 'y';" ]
+ ]
+ };
+
+ var input = "x\nxy\n\n\nx";
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "x");
+ assert.equal(lexer.yylloc.first_column, 0);
+ assert.equal(lexer.yylloc.last_column, 1);
+ assert.equal(lexer.lex(), "x");
+ assert.equal(lexer.yylloc.first_line, 2);
+ assert.equal(lexer.yylloc.last_line, 2);
+ assert.equal(lexer.yylloc.first_column, 0);
+ assert.equal(lexer.yylloc.last_column, 1);
+ assert.equal(lexer.lex(), "y");
+ assert.equal(lexer.yylloc.first_line, 2);
+ assert.equal(lexer.yylloc.last_line, 2);
+ assert.equal(lexer.yylloc.first_column, 1);
+ assert.equal(lexer.yylloc.last_column, 2);
+ assert.equal(lexer.lex(), "x");
+ assert.equal(lexer.yylloc.first_line, 5);
+ assert.equal(lexer.yylloc.last_line, 5);
+ assert.equal(lexer.yylloc.first_column, 0);
+ assert.equal(lexer.yylloc.last_column, 1);
+};
+
+exports["test more()"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ['"[^"]*', function(){
+ if(yytext.charAt(yyleng-1) == '\\') {
+ this.more();
+ } else {
+ yytext += this.input(); // swallow end quote
+ return "STRING";
+ }
+ } ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = 'x"fgjdrtj\\"sdfsdf"x';
+
+ var lexer = new RegExpLexer(dict, input);
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "STRING");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test defined token returns"] = function() {
+ var tokens = {"2":"X", "3":"Y", "4":"EOF"};
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xxyx";
+
+ var lexer = new RegExpLexer(dict, input, tokens);
+
+ assert.equal(lexer.lex(), 2);
+ assert.equal(lexer.lex(), 2);
+ assert.equal(lexer.lex(), 3);
+ assert.equal(lexer.lex(), 2);
+ assert.equal(lexer.lex(), 4);
+};
+
+exports["test module generator from constructor"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xxyx";
+
+ var lexerSource = RegExpLexer.generate(dict);
+ eval(lexerSource);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test module generator"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xxyx";
+
+ var lexer_ = new RegExpLexer(dict);
+ var lexerSource = lexer_.generateModule();
+ eval(lexerSource);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test generator with more complex lexer"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ['"[^"]*', function(){
+ if(yytext.charAt(yyleng-1) == '\\') {
+ this.more();
+ } else {
+ yytext += this.input(); // swallow end quote
+ return "STRING";
+ }
+ } ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = 'x"fgjdrtj\\"sdfsdf"x';
+
+ var lexer_ = new RegExpLexer(dict);
+ var lexerSource = lexer_.generateModule();
+ eval(lexerSource);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "STRING");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test commonjs module generator"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xxyx";
+
+ var lexer_ = new RegExpLexer(dict);
+ var lexerSource = lexer_.generateCommonJSModule();
+ var exports = {};
+ eval(lexerSource);
+ exports.lexer.setInput(input);
+
+ assert.equal(exports.lex(), "X");
+ assert.equal(exports.lex(), "X");
+ assert.equal(exports.lex(), "Y");
+ assert.equal(exports.lex(), "X");
+ assert.equal(exports.lex(), "EOF");
+};
+
+exports["test amd module generator"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+
+ var input = "xxyx";
+
+ var lexer_ = new RegExpLexer(dict);
+ var lexerSource = lexer_.generateAMDModule();
+
+ var lexer;
+ var define = function (_, fn) {
+ lexer = fn();
+ };
+
+ eval(lexerSource);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test DJ lexer"] = function() {
+ var dict = {
+ "lex": {
+ "macros": {
+ "digit": "[0-9]",
+ "id": "[a-zA-Z][a-zA-Z0-9]*"
+ },
+
+ "rules": [
+ ["\\/\\/.*", "/* ignore comment */"],
+ ["main\\b", "return 'MAIN';"],
+ ["class\\b", "return 'CLASS';"],
+ ["extends\\b", "return 'EXTENDS';"],
+ ["nat\\b", "return 'NATTYPE';"],
+ ["if\\b", "return 'IF';"],
+ ["else\\b", "return 'ELSE';"],
+ ["for\\b", "return 'FOR';"],
+ ["printNat\\b", "return 'PRINTNAT';"],
+ ["readNat\\b", "return 'READNAT';"],
+ ["this\\b", "return 'THIS';"],
+ ["new\\b", "return 'NEW';"],
+ ["var\\b", "return 'VAR';"],
+ ["null\\b", "return 'NUL';"],
+ ["{digit}+", "return 'NATLITERAL';"],
+ ["{id}", "return 'ID';"],
+ ["==", "return 'EQUALITY';"],
+ ["=", "return 'ASSIGN';"],
+ ["\\+", "return 'PLUS';"],
+ ["-", "return 'MINUS';"],
+ ["\\*", "return 'TIMES';"],
+ [">", "return 'GREATER';"],
+ ["\\|\\|", "return 'OR';"],
+ ["!", "return 'NOT';"],
+ ["\\.", "return 'DOT';"],
+ ["\\{", "return 'LBRACE';"],
+ ["\\}", "return 'RBRACE';"],
+ ["\\(", "return 'LPAREN';"],
+ ["\\)", "return 'RPAREN';"],
+ [";", "return 'SEMICOLON';"],
+ ["\\s+", "/* skip whitespace */"],
+ [".", "print('Illegal character');throw 'Illegal character';"],
+ ["$", "return 'ENDOFFILE';"]
+ ]
+ }
+};
+
+ var input = "class Node extends Object { \
+ var nat value var nat value;\
+ var Node next;\
+ var nat index;\
+ }\
+\
+ class List extends Object {\
+ var Node start;\
+\
+ Node prepend(Node startNode) {\
+ startNode.next = start;\
+ start = startNode;\
+ }\
+\
+ nat find(nat index) {\
+ var nat value;\
+ var Node node;\
+\
+ for(node = start;!(node == null);node = node.next){\
+ if(node.index == index){\
+ value = node.value;\
+ } else { 0; };\
+ };\
+\
+ value;\
+ }\
+ }\
+\
+ main {\
+ var nat index;\
+ var nat value;\
+ var List list;\
+ var Node startNode;\
+\
+ index = readNat();\
+ list = new List;\
+\
+ for(0;!(index==0);0){\
+ value = readNat();\
+ startNode = new Node;\
+ startNode.index = index;\
+ startNode.value = value;\
+ list.prepend(startNode);\
+ index = readNat();\
+ };\
+\
+ index = readNat();\
+\
+ for(0;!(index==0);0){\
+ printNat(list.find(index));\
+ index = readNat();\
+ };\
+ }";
+
+ var lexer = new RegExpLexer(dict.lex);
+ lexer.setInput(input);
+ var tok;
+ while (tok = lexer.lex(), tok!==1) {
+ assert.equal(typeof tok, "string");
+ }
+};
+
+exports["test instantiation from string"] = function() {
+ var dict = "%%\n'x' {return 'X';}\n'y' {return 'Y';}\n<<EOF>> {return 'EOF';}";
+
+ var input = "x";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test inclusive start conditions"] = function() {
+ var dict = {
+ startConditions: {
+ "TEST": 0,
+ },
+ rules: [
+ ["enter-test", "this.begin('TEST');" ],
+ [["TEST"], "x", "return 'T';" ],
+ [["TEST"], "y", "this.begin('INITIAL'); return 'TY';" ],
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+ var input = "xenter-testxyy";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "T");
+ assert.equal(lexer.lex(), "TY");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test exclusive start conditions"] = function() {
+ var dict = {
+ startConditions: {
+ "EAT": 1,
+ },
+ rules: [
+ ["\\/\\/", "this.begin('EAT');" ],
+ [["EAT"], ".", "" ],
+ [["EAT"], "\\n", "this.begin('INITIAL');" ],
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+ var input = "xy//yxteadh//ste\ny";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test pop start condition stack"] = function() {
+ var dict = {
+ startConditions: {
+ "EAT": 1,
+ },
+ rules: [
+ ["\\/\\/", "this.begin('EAT');" ],
+ [["EAT"], ".", "" ],
+ [["EAT"], "\\n", "this.popState();" ],
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ ["$", "return 'EOF';" ]
+ ]
+ };
+ var input = "xy//yxteadh//ste\ny";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+
+exports["test star start condition"] = function() {
+ var dict = {
+ startConditions: {
+ "EAT": 1,
+ },
+ rules: [
+ ["\\/\\/", "this.begin('EAT');" ],
+ [["EAT"], ".", "" ],
+ ["x", "return 'X';" ],
+ ["y", "return 'Y';" ],
+ [["*"],"$", "return 'EOF';" ]
+ ]
+ };
+ var input = "xy//yxteadh//stey";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test start condition constants"] = function() {
+ var dict = {
+ startConditions: {
+ "EAT": 1,
+ },
+ rules: [
+ ["\\/\\/", "this.begin('EAT');" ],
+ [["EAT"], ".", "if (YYSTATE==='EAT') return 'E';" ],
+ ["x", "if (YY_START==='INITIAL') return 'X';" ],
+ ["y", "return 'Y';" ],
+ [["*"],"$", "return 'EOF';" ]
+ ]
+ };
+ var input = "xy//y";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "Y");
+ assert.equal(lexer.lex(), "E");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test unicode encoding"] = function() {
+ var dict = {
+ rules: [
+ ["\\u2713", "return 'CHECK';" ],
+ ["\\u03c0", "return 'PI';" ],
+ ["y", "return 'Y';" ]
+ ]
+ };
+ var input = "\u2713\u03c0y";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "CHECK");
+ assert.equal(lexer.lex(), "PI");
+ assert.equal(lexer.lex(), "Y");
+};
+
+exports["test unicode"] = function() {
+ var dict = {
+ rules: [
+ ["π", "return 'PI';" ],
+ ["y", "return 'Y';" ]
+ ]
+ };
+ var input = "πy";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "PI");
+ assert.equal(lexer.lex(), "Y");
+};
+
+exports["test longest match returns"] = function() {
+ var dict = {
+ rules: [
+ [".", "return 'DOT';" ],
+ ["cat", "return 'CAT';" ]
+ ],
+ options: {flex: true}
+ };
+ var input = "cat!";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "CAT");
+ assert.equal(lexer.lex(), "DOT");
+};
+
+exports["test case insensitivity"] = function() {
+ var dict = {
+ rules: [
+ ["cat", "return 'CAT';" ]
+ ],
+ options: {'case-insensitive': true}
+ };
+ var input = "Cat";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "CAT");
+};
+
+exports["test less"] = function() {
+ var dict = {
+ rules: [
+ ["cat", "this.less(2); return 'CAT';" ],
+ ["t", "return 'T';" ]
+ ],
+ };
+ var input = "cat";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "CAT");
+ assert.equal(lexer.lex(), "T");
+};
+
+exports["test EOF unput"] = function() {
+ var dict = {
+ startConditions: {
+ "UN": 1,
+ },
+ rules: [
+ ["U", "this.begin('UN');return 'U';" ],
+ [["UN"],"$", "this.unput('X')" ],
+ [["UN"],"X", "this.popState();return 'X';" ],
+ ["$", "return 'EOF'" ]
+ ]
+ };
+ var input = "U";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "U");
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test flex mode default rule"] = function() {
+ var dict = {
+ rules: [
+ ["x", "return 'X';" ]
+ ],
+ options: {flex: true}
+ };
+ var input = "xyx";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.equal(lexer.lex(), "X");
+};
+
+exports["test pipe precedence"] = function() {
+ var dict = {
+ rules: [
+ ["x|y", "return 'X_Y';" ],
+ [".", "return 'N';"]
+ ]
+ };
+ var input = "xny";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X_Y");
+ assert.equal(lexer.lex(), "N");
+ assert.equal(lexer.lex(), "X_Y");
+};
+
+exports["test ranges"] = function() {
+ var dict = {
+ rules: [
+ ["x+", "return 'X';" ],
+ [".", "return 'N';"]
+ ],
+ options: {ranges: true}
+ };
+ var input = "xxxyy";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "X");
+ assert.deepEqual(lexer.yylloc.range, [0, 3]);
+};
+
+exports["test unput location"] = function() {
+ var dict = {
+ rules: [
+ ["x+", "return 'X';" ],
+ ["y\\n", "this.unput('\\n'); return 'Y';" ],
+ ["\\ny", "this.unput('y'); return 'BR';" ],
+ ["y", "return 'Y';" ],
+ [".", "return 'N';"]
+ ],
+ options: {ranges: true}
+ };
+ var input = "xxxy\ny";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+ console.log(lexer.rules);
+
+ assert.equal(lexer.next(), "X");
+ assert.deepEqual(lexer.yylloc, {first_line: 1,
+ first_column: 0,
+ last_line: 1,
+ last_column: 3,
+ range: [0, 3]});
+ assert.equal(lexer.next(), "Y");
+ assert.deepEqual(lexer.yylloc, {first_line: 1,
+ first_column: 3,
+ last_line: 1,
+ last_column: 4,
+ range: [3, 4]});
+ assert.equal(lexer.next(), "BR");
+ assert.deepEqual(lexer.yylloc, {first_line: 1,
+ first_column: 4,
+ last_line: 2,
+ last_column: 0,
+ range: [4, 5]});
+ assert.equal(lexer.next(), "Y");
+ assert.deepEqual(lexer.yylloc, {first_line: 2,
+ first_column: 0,
+ last_line: 2,
+ last_column: 1,
+ range: [5, 6]});
+
+};
+
+exports["test unput location again"] = function() {
+ var dict = {
+ rules: [
+ ["x+", "return 'X';" ],
+ ["y\\ny\\n", "this.unput('\\n'); return 'YY';" ],
+ ["\\ny", "this.unput('y'); return 'BR';" ],
+ ["y", "return 'Y';" ],
+ [".", "return 'N';"]
+ ],
+ options: {ranges: true}
+ };
+ var input = "xxxy\ny\ny";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+ console.log(lexer.rules);
+
+ assert.equal(lexer.next(), "X");
+ assert.deepEqual(lexer.yylloc, {first_line: 1,
+ first_column: 0,
+ last_line: 1,
+ last_column: 3,
+ range: [0, 3]});
+ assert.equal(lexer.next(), "YY");
+ assert.deepEqual(lexer.yylloc, {first_line: 1,
+ first_column: 3,
+ last_line: 2,
+ last_column: 1,
+ range: [3, 6]});
+ assert.equal(lexer.next(), "BR");
+ assert.deepEqual(lexer.yylloc, {first_line: 2,
+ first_column: 1,
+ last_line: 3,
+ last_column: 0,
+ range: [6, 7]});
+ assert.equal(lexer.next(), "Y");
+ assert.deepEqual(lexer.yylloc, {first_line: 3,
+ first_column: 0,
+ last_line: 3,
+ last_column: 1,
+ range: [7, 8]});
+
+};
+
+exports["test backtracking lexer reject() method"] = function() {
+ var dict = {
+ rules: [
+ ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ],
+ ["[A-Z]+", "return 'WORD';" ],
+ ["[0-9]+", "return 'NUM';" ]
+ ],
+ options: {backtrack_lexer: true}
+ };
+ var input = "A5";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.equal(lexer.lex(), "WORD");
+ assert.equal(lexer.lex(), "NUM");
+};
+
+exports["test lexer reject() exception when not in backtracking mode"] = function() {
+ var dict = {
+ rules: [
+ ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ],
+ ["[A-Z]+", "return 'WORD';" ],
+ ["[0-9]+", "return 'NUM';" ]
+ ],
+ options: {backtrack_lexer: false}
+ };
+ var input = "A5";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+
+ assert.throws(function() {
+ lexer.lex();
+ },
+ function(err) {
+ return (err instanceof Error) && /You can only invoke reject/.test(err);
+ });
+};
+
+exports["test yytext state after unput"] = function() {
+ var dict = {
+ rules: [
+ ["cat4", "this.unput('4'); return 'CAT';" ],
+ ["4", "return 'NUMBER';" ],
+ ["$", "return 'EOF';"]
+ ]
+ };
+
+ var input = "cat4";
+
+ var lexer = new RegExpLexer(dict);
+ lexer.setInput(input);
+ assert.equal(lexer.lex(), "CAT");
+ /*the yytext should be 'cat' since we unput '4' from 'cat4' */
+ assert.equal(lexer.yytext, "cat");
+ assert.equal(lexer.lex(), "NUMBER");
+ assert.equal(lexer.lex(), "EOF");
+};
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-jison-lex.git
More information about the Pkg-javascript-commits
mailing list