[Pkg-javascript-commits] [node-jison-lex] 01/07: Import Upstream version 0.3.4

Sruthi Chandran srud-guest at moszumanska.debian.org
Sat Oct 15 05:31:01 UTC 2016


This is an automated email from the git hooks/post-receive script.

srud-guest pushed a commit to branch master
in repository node-jison-lex.

commit 6dbaf52160a14f19b061d7fb7be30ab6971c9497
Author: Sruthi <srud at disroot.org>
Date:   Sat Oct 15 10:37:56 2016 +0530

    Import Upstream version 0.3.4
---
 .gitignore           |    6 +
 README.md            |   47 +++
 cli.js               |   86 +++++
 examples/lex.l       |   90 +++++
 package.json         |   42 +++
 regexp-lexer.js      |  603 +++++++++++++++++++++++++++++
 tests/all-tests.js   |    4 +
 tests/regexplexer.js | 1021 ++++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 1899 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b1a6b49
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+node_modules/
+
+# Editor bak files
+*~
+*.bak
+*.orig
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3d93789
--- /dev/null
+++ b/README.md
@@ -0,0 +1,47 @@
+# jison-lex
+A lexical analyzer generator used by [jison](http://jison.org). It takes a lexical grammar definition (either in JSON or Bison's lexical grammar format) and outputs a JavaScript lexer.
+
+## install
+npm install jison-lex -g
+
+## usage
+```
+Usage: jison-lex [file] [options]
+
+file     file containing a lexical grammar
+
+Options:
+   -o FILE, --outfile FILE       Filename and base module name of the generated parser
+   -t TYPE, --module-type TYPE   The type of module to generate (commonjs, js)
+   --version                     print version and exit
+```
+
+## programatic usage
+
+```
+var JisonLex = require('jison-lex');
+
+var grammar = {
+  rules: [
+    ["x", "return 'X';" ],
+    ["y", "return 'Y';" ],
+    ["$", "return 'EOF';" ]
+  ]
+};
+
+// or load from a file
+// var grammar = fs.readFileSync('mylexer.l', 'utf8');
+
+// generate source
+var lexerSource = JisonLex.generate(grammar);
+
+// or create a parser in memory
+var lexer = new JisonLex(grammar);
+lexer.setInput('xyxxy');
+lexer.lex();
+// => 'X'
+lexer.lex();
+// => 'Y'
+
+## license
+MIT
diff --git a/cli.js b/cli.js
new file mode 100755
index 0000000..8ca2894
--- /dev/null
+++ b/cli.js
@@ -0,0 +1,86 @@
+#!/usr/bin/env node
+
+var version = require('./package.json').version;
+
+var path = require('path');
+var fs = require('fs');
+var lexParser = require('lex-parser');
+var RegExpLexer = require('./regexp-lexer.js');
+
+
+var opts = require("nomnom")
+  .script('jison-lex')
+  .option('file', {
+    flag: true,
+    position: 0,
+    help: 'file containing a lexical grammar'
+  })
+  .option('outfile', {
+    abbr: 'o',
+    metavar: 'FILE',
+    help: 'Filename and base module name of the generated parser'
+  })
+  .option('module-type', {
+    abbr: 't',
+    default: 'commonjs',
+    metavar: 'TYPE',
+    help: 'The type of module to generate (commonjs, js)'
+  })
+  .option('version', {
+    abbr: 'V',
+    flag: true,
+    help: 'print version and exit',
+    callback: function() {
+       return version;
+    }
+  });
+
+exports.main = function (opts) {
+    if (opts.file) {
+        var raw = fs.readFileSync(path.normalize(opts.file), 'utf8'),
+            name = path.basename((opts.outfile||opts.file)).replace(/\..*$/g,'');
+
+        fs.writeFileSync(opts.outfile||(name + '.js'), processGrammar(raw, name));
+    } else {
+        readin(function (raw) {
+            console.log(processGrammar(raw));
+        });
+    }
+};
+
+function processGrammar (file, name) {
+    var grammar;
+    try {
+        grammar = lexParser.parse(file);
+    } catch (e) {
+        try {
+            grammar = JSON.parse(file);
+        } catch (e2) {
+            throw e;
+        }
+    }
+
+    var settings = grammar.options || {};
+    if (!settings.moduleType) settings.moduleType = opts['module-type'];
+    if (!settings.moduleName && name) settings.moduleName = name.replace(/-\w/g, function (match){ return match.charAt(1).toUpperCase(); });
+
+    grammar.options = settings;
+
+    return RegExpLexer.generate(grammar);
+}
+
+function readin (cb) {
+    var stdin = process.openStdin(),
+        data = '';
+
+    stdin.setEncoding('utf8');
+    stdin.addListener('data', function (chunk) {
+        data += chunk;
+    });
+    stdin.addListener('end', function () {
+        cb(data);
+    });
+}
+
+if (require.main === module)
+    exports.main(opts.parse());
diff --git a/examples/lex.l b/examples/lex.l
new file mode 100644
index 0000000..515984d
--- /dev/null
+++ b/examples/lex.l
@@ -0,0 +1,90 @@
+
+NAME              [a-zA-Z_][a-zA-Z0-9_-]*
+BR                \r\n|\n|\r
+
+%s indented trail rules
+%x code start_condition options conditions action
+
+%%
+
+<action>"/*"(.|\n|\r)*?"*/"           return 'ACTION_BODY';
+<action>"//".*                        return 'ACTION_BODY';
+<action>"/"[^ /]*?['"{}'][^ ]*?"/"    return 'ACTION_BODY'; // regexp with braces or quotes (and no spaces)
+<action>\"("\\\\"|'\"'|[^"])*\"       return 'ACTION_BODY';
+<action>"'"("\\\\"|"\'"|[^'])*"'"     return 'ACTION_BODY';
+<action>[/"'][^{}/"']+                return 'ACTION_BODY';
+<action>[^{}/"']+                     return 'ACTION_BODY';
+<action>"{"                           yy.depth++; return '{'
+<action>"}"                           yy.depth == 0 ? this.begin('trail') : yy.depth--; return '}'
+
+<conditions>{NAME}                    return 'NAME';
+<conditions>">"                       this.popState(); return '>';
+<conditions>","                       return ',';
+<conditions>"*"                       return '*';
+
+<rules>{BR}+                          /* */
+<rules>\s+{BR}+                       /* */
+<rules>\s+                            this.begin('indented')
+<rules>"%%"                           this.begin('code'); return '%%'
+<rules>[a-zA-Z0-9_]+                  return 'CHARACTER_LIT'
+
+<options>{NAME}                       yy.options[yytext] = true
+<options>{BR}+                        this.begin('INITIAL')
+<options>\s+{BR}+                     this.begin('INITIAL')
+<options>\s+                          /* empty */
+
+<start_condition>{NAME}               return 'START_COND'
+<start_condition>{BR}+                this.begin('INITIAL')
+<start_condition>\s+{BR}+             this.begin('INITIAL')
+<start_condition>\s+                  /* empty */
+
+<trail>.*{BR}+                        this.begin('rules')
+
+<indented>"{"                         yy.depth = 0; this.begin('action'); return '{'
+<indented>"%{"(.|{BR})*?"%}"          this.begin('trail'); yytext = yytext.substr(2, yytext.length-4);return 'ACTION'
+"%{"(.|{BR})*?"%}"                    yytext = yytext.substr(2, yytext.length-4); return 'ACTION'
+<indented>.+                          this.begin('rules'); return 'ACTION'
+
+"/*"(.|\n|\r)*?"*/"             /* ignore */
+"//".*                          /* ignore */
+
+{BR}+                           /* */
+\s+                             /* */
+{NAME}                          return 'NAME';
+\"("\\\\"|'\"'|[^"])*\"         yytext = yytext.replace(/\\"/g,'"'); return 'STRING_LIT';
+"'"("\\\\"|"\'"|[^'])*"'"       yytext = yytext.replace(/\\'/g,"'"); return 'STRING_LIT';
+"|"                             return '|';
+"["("\\\\"|"\]"|[^\]])*"]"      return 'ANY_GROUP_REGEX';
+"(?:"                           return 'SPECIAL_GROUP';
+"(?="                           return 'SPECIAL_GROUP';
+"(?!"                           return 'SPECIAL_GROUP';
+"("                             return '(';
+")"                             return ')';
+"+"                             return '+';
+"*"                             return '*';
+"?"                             return '?';
+"^"                             return '^';
+","                             return ',';
+"<<EOF>>"                       return '$';
+"<"                             this.begin('conditions'); return '<';
+"/!"                            return '/!';
+"/"                             return '/';
+"\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4})      return 'ESCAPE_CHAR';
+"\\".                           yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR';
+"$"                             return '$';
+"."                             return '.';
+"%options"                      yy.options = {}; this.begin('options');
+"%s"                            this.begin('start_condition'); return 'START_INC';
+"%x"                            this.begin('start_condition'); return 'START_EXC';
+"%%"                            this.begin('rules'); return '%%';
+"{"\d+(","\s?\d+|",")?"}"       return 'RANGE_REGEX';
+"{"{NAME}"}"                    return 'NAME_BRACE';
+"{"                             return '{';
+"}"                             return '}';
+.                               /* ignore bad characters */
+<*><<EOF>>                      return 'EOF';
+
+<code>(.|{BR})+                 return 'CODE';
+
+%%
+
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..dea1800
--- /dev/null
+++ b/package.json
@@ -0,0 +1,42 @@
+{
+  "author": "Zach Carter <zach at carter.name> (http://zaa.ch)",
+  "name": "jison-lex",
+  "description": "lexical analyzer generator used by jison",
+  "version": "0.3.4",
+  "keywords": [
+    "jison",
+    "parser",
+    "generator",
+    "lexer",
+    "flex",
+    "tokenizer"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/zaach/jison-lex.git"
+  },
+  "bugs": {
+    "email": "jison at librelist.com",
+    "url": "http://github.com/zaach/jison-lex/issues"
+  },
+  "main": "regexp-lexer",
+  "bin": "cli.js",
+  "engines": {
+    "node": ">=0.4"
+  },
+  "dependencies": {
+    "lex-parser": "0.1.x",
+    "nomnom": "1.5.2"
+  },
+  "devDependencies": {
+    "test": "0.4.4"
+  },
+  "scripts": {
+    "test": "node tests/all-tests.js"
+  },
+  "directories": {
+    "lib": "lib",
+    "tests": "tests"
+  },
+  "homepage": "http://jison.org"
+}
diff --git a/regexp-lexer.js b/regexp-lexer.js
new file mode 100644
index 0000000..cc68c65
--- /dev/null
+++ b/regexp-lexer.js
@@ -0,0 +1,603 @@
+// Basic Lexer implemented using JavaScript regular expressions
+// MIT Licensed
+
+"use strict";
+
+var lexParser = require('lex-parser');
+var version = require('./package.json').version;
+
+// expand macros and convert matchers to RegExp's
+function prepareRules(rules, macros, actions, tokens, startConditions, caseless) {
+    var m,i,k,action,conditions,
+        newRules = [];
+
+    if (macros) {
+        macros = prepareMacros(macros);
+    }
+
+    function tokenNumberReplacement (str, token) {
+        return "return " + (tokens[token] || "'" + token + "'");
+    }
+
+    actions.push('switch($avoiding_name_collisions) {');
+
+    for (i=0;i < rules.length; i++) {
+        if (Object.prototype.toString.apply(rules[i][0]) !== '[object Array]') {
+            // implicit add to all inclusive start conditions
+            for (k in startConditions) {
+                if (startConditions[k].inclusive) {
+                    startConditions[k].rules.push(i);
+                }
+            }
+        } else if (rules[i][0][0] === '*') {
+            // Add to ALL start conditions
+            for (k in startConditions) {
+                startConditions[k].rules.push(i);
+            }
+            rules[i].shift();
+        } else {
+            // Add to explicit start conditions
+            conditions = rules[i].shift();
+            for (k=0;k<conditions.length;k++) {
+                startConditions[conditions[k]].rules.push(i);
+            }
+        }
+
+        m = rules[i][0];
+        if (typeof m === 'string') {
+            for (k in macros) {
+                if (macros.hasOwnProperty(k)) {
+                    m = m.split("{" + k + "}").join('(' + macros[k] + ')');
+                }
+            }
+            m = new RegExp("^(?:" + m + ")", caseless ? 'i':'');
+        }
+        newRules.push(m);
+        if (typeof rules[i][1] === 'function') {
+            rules[i][1] = String(rules[i][1]).replace(/^\s*function \(\)\s?\{/, '').replace(/\}\s*$/, '');
+        }
+        action = rules[i][1];
+        if (tokens && action.match(/return '[^']+'/)) {
+            action = action.replace(/return '([^']+)'/g, tokenNumberReplacement);
+        }
+        actions.push('case ' + i + ':' + action + '\nbreak;');
+    }
+    actions.push("}");
+
+    return newRules;
+}
+
+// expand macros within macros
+function prepareMacros (macros) {
+    var cont = true,
+        m,i,k,mnew;
+    while (cont) {
+        cont = false;
+        for (i in macros) if (macros.hasOwnProperty(i)) {
+            m = macros[i];
+            for (k in macros) if (macros.hasOwnProperty(k) && i !== k) {
+                mnew = m.split("{" + k + "}").join('(' + macros[k] + ')');
+                if (mnew !== m) {
+                    cont = true;
+                    macros[i] = mnew;
+                }
+            }
+        }
+    }
+    return macros;
+}
+
+function prepareStartConditions (conditions) {
+    var sc,
+        hash = {};
+    for (sc in conditions) if (conditions.hasOwnProperty(sc)) {
+        hash[sc] = {rules:[],inclusive:!!!conditions[sc]};
+    }
+    return hash;
+}
+
+function buildActions (dict, tokens) {
+    var actions = [dict.actionInclude || '', "var YYSTATE=YY_START;"];
+    var tok;
+    var toks = {};
+
+    for (tok in tokens) {
+        toks[tokens[tok]] = tok;
+    }
+
+    if (dict.options && dict.options.flex) {
+        dict.rules.push([".", "console.log(yytext);"]);
+    }
+
+    this.rules = prepareRules(dict.rules, dict.macros, actions, tokens && toks, this.conditions, this.options["case-insensitive"]);
+    var fun = actions.join("\n");
+    "yytext yyleng yylineno yylloc".split(' ').forEach(function (yy) {
+        fun = fun.replace(new RegExp("\\b(" + yy + ")\\b", "g"), "yy_.$1");
+    });
+
+    return "function anonymous(yy,yy_,$avoiding_name_collisions,YY_START) {" + fun + "\n}";
+}
+
+function RegExpLexer (dict, input, tokens) {
+    var opts = processGrammar(dict, tokens);
+    var source = generateModuleBody(opts);
+    var lexer = eval(source);
+
+    lexer.yy = {};
+    if (input) {
+        lexer.setInput(input);
+    }
+
+    lexer.generate = function () { return generateFromOpts(opts); };
+    lexer.generateModule = function () { return generateModule(opts); };
+    lexer.generateCommonJSModule = function () { return generateCommonJSModule(opts); };
+    lexer.generateAMDModule = function () { return generateAMDModule(opts); };
+
+    return lexer;
+}
+
+RegExpLexer.prototype = {
+    EOF: 1,
+    parseError: function parseError(str, hash) {
+        if (this.yy.parser) {
+            this.yy.parser.parseError(str, hash);
+        } else {
+            throw new Error(str);
+        }
+    },
+
+    // resets the lexer, sets new input
+    setInput: function (input, yy) {
+        this.yy = yy || this.yy || {};
+        this._input = input;
+        this._more = this._backtrack = this.done = false;
+        this.yylineno = this.yyleng = 0;
+        this.yytext = this.matched = this.match = '';
+        this.conditionStack = ['INITIAL'];
+        this.yylloc = {
+            first_line: 1,
+            first_column: 0,
+            last_line: 1,
+            last_column: 0
+        };
+        if (this.options.ranges) {
+            this.yylloc.range = [0,0];
+        }
+        this.offset = 0;
+        return this;
+    },
+
+    // consumes and returns one char from the input
+    input: function () {
+        var ch = this._input[0];
+        this.yytext += ch;
+        this.yyleng++;
+        this.offset++;
+        this.match += ch;
+        this.matched += ch;
+        var lines = ch.match(/(?:\r\n?|\n).*/g);
+        if (lines) {
+            this.yylineno++;
+            this.yylloc.last_line++;
+        } else {
+            this.yylloc.last_column++;
+        }
+        if (this.options.ranges) {
+            this.yylloc.range[1]++;
+        }
+
+        this._input = this._input.slice(1);
+        return ch;
+    },
+
+    // unshifts one char (or a string) into the input
+    unput: function (ch) {
+        var len = ch.length;
+        var lines = ch.split(/(?:\r\n?|\n)/g);
+
+        this._input = ch + this._input;
+        this.yytext = this.yytext.substr(0, this.yytext.length - len);
+        //this.yyleng -= len;
+        this.offset -= len;
+        var oldLines = this.match.split(/(?:\r\n?|\n)/g);
+        this.match = this.match.substr(0, this.match.length - 1);
+        this.matched = this.matched.substr(0, this.matched.length - 1);
+
+        if (lines.length - 1) {
+            this.yylineno -= lines.length - 1;
+        }
+        var r = this.yylloc.range;
+
+        this.yylloc = {
+            first_line: this.yylloc.first_line,
+            last_line: this.yylineno + 1,
+            first_column: this.yylloc.first_column,
+            last_column: lines ?
+                (lines.length === oldLines.length ? this.yylloc.first_column : 0)
+                 + oldLines[oldLines.length - lines.length].length - lines[0].length :
+              this.yylloc.first_column - len
+        };
+
+        if (this.options.ranges) {
+            this.yylloc.range = [r[0], r[0] + this.yyleng - len];
+        }
+        this.yyleng = this.yytext.length;
+        return this;
+    },
+
+    // When called from action, caches matched text and appends it on next action
+    more: function () {
+        this._more = true;
+        return this;
+    },
+
+    // When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.
+    reject: function () {
+        if (this.options.backtrack_lexer) {
+            this._backtrack = true;
+        } else {
+            return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' + this.showPosition(), {
+                text: "",
+                token: null,
+                line: this.yylineno
+            });
+
+        }
+        return this;
+    },
+
+    // retain first n characters of the match
+    less: function (n) {
+        this.unput(this.match.slice(n));
+    },
+
+    // displays already matched input, i.e. for error messages
+    pastInput: function () {
+        var past = this.matched.substr(0, this.matched.length - this.match.length);
+        return (past.length > 20 ? '...':'') + past.substr(-20).replace(/\n/g, "");
+    },
+
+    // displays upcoming input, i.e. for error messages
+    upcomingInput: function () {
+        var next = this.match;
+        if (next.length < 20) {
+            next += this._input.substr(0, 20-next.length);
+        }
+        return (next.substr(0,20) + (next.length > 20 ? '...' : '')).replace(/\n/g, "");
+    },
+
+    // displays the character position where the lexing error occurred, i.e. for error messages
+    showPosition: function () {
+        var pre = this.pastInput();
+        var c = new Array(pre.length + 1).join("-");
+        return pre + this.upcomingInput() + "\n" + c + "^";
+    },
+
+    // test the lexed token: return FALSE when not a match, otherwise return token
+    test_match: function(match, indexed_rule) {
+        var token,
+            lines,
+            backup;
+
+        if (this.options.backtrack_lexer) {
+            // save context
+            backup = {
+                yylineno: this.yylineno,
+                yylloc: {
+                    first_line: this.yylloc.first_line,
+                    last_line: this.last_line,
+                    first_column: this.yylloc.first_column,
+                    last_column: this.yylloc.last_column
+                },
+                yytext: this.yytext,
+                match: this.match,
+                matches: this.matches,
+                matched: this.matched,
+                yyleng: this.yyleng,
+                offset: this.offset,
+                _more: this._more,
+                _input: this._input,
+                yy: this.yy,
+                conditionStack: this.conditionStack.slice(0),
+                done: this.done
+            };
+            if (this.options.ranges) {
+                backup.yylloc.range = this.yylloc.range.slice(0);
+            }
+        }
+
+        lines = match[0].match(/(?:\r\n?|\n).*/g);
+        if (lines) {
+            this.yylineno += lines.length;
+        }
+        this.yylloc = {
+            first_line: this.yylloc.last_line,
+            last_line: this.yylineno + 1,
+            first_column: this.yylloc.last_column,
+            last_column: lines ?
+                         lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length :
+                         this.yylloc.last_column + match[0].length
+        };
+        this.yytext += match[0];
+        this.match += match[0];
+        this.matches = match;
+        this.yyleng = this.yytext.length;
+        if (this.options.ranges) {
+            this.yylloc.range = [this.offset, this.offset += this.yyleng];
+        }
+        this._more = false;
+        this._backtrack = false;
+        this._input = this._input.slice(match[0].length);
+        this.matched += match[0];
+        token = this.performAction.call(this, this.yy, this, indexed_rule, this.conditionStack[this.conditionStack.length - 1]);
+        if (this.done && this._input) {
+            this.done = false;
+        }
+        if (token) {
+            return token;
+        } else if (this._backtrack) {
+            // recover context
+            for (var k in backup) {
+                this[k] = backup[k];
+            }
+            return false; // rule action called reject() implying the next rule should be tested instead.
+        }
+        return false;
+    },
+
+    // return next match in input
+    next: function () {
+        if (this.done) {
+            return this.EOF;
+        }
+        if (!this._input) {
+            this.done = true;
+        }
+
+        var token,
+            match,
+            tempMatch,
+            index;
+        if (!this._more) {
+            this.yytext = '';
+            this.match = '';
+        }
+        var rules = this._currentRules();
+        for (var i = 0; i < rules.length; i++) {
+            tempMatch = this._input.match(this.rules[rules[i]]);
+            if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
+                match = tempMatch;
+                index = i;
+                if (this.options.backtrack_lexer) {
+                    token = this.test_match(tempMatch, rules[i]);
+                    if (token !== false) {
+                        return token;
+                    } else if (this._backtrack) {
+                        match = false;
+                        continue; // rule action called reject() implying a rule MISmatch.
+                    } else {
+                        // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace)
+                        return false;
+                    }
+                } else if (!this.options.flex) {
+                    break;
+                }
+            }
+        }
+        if (match) {
+            token = this.test_match(match, rules[index]);
+            if (token !== false) {
+                return token;
+            }
+            // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace)
+            return false;
+        }
+        if (this._input === "") {
+            return this.EOF;
+        } else {
+            return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), {
+                text: "",
+                token: null,
+                line: this.yylineno
+            });
+        }
+    },
+
+    // return next match that has a token
+    lex: function lex () {
+        var r = this.next();
+        if (r) {
+            return r;
+        } else {
+            return this.lex();
+        }
+    },
+
+    // activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)
+    begin: function begin (condition) {
+        this.conditionStack.push(condition);
+    },
+
+    // pop the previously active lexer condition state off the condition stack
+    popState: function popState () {
+        var n = this.conditionStack.length - 1;
+        if (n > 0) {
+            return this.conditionStack.pop();
+        } else {
+            return this.conditionStack[0];
+        }
+    },
+
+    // produce the lexer rule set which is active for the currently active lexer condition state
+    _currentRules: function _currentRules () {
+        if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
+            return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
+        } else {
+            return this.conditions["INITIAL"].rules;
+        }
+    },
+
+    // return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available
+    topState: function topState (n) {
+        n = this.conditionStack.length - 1 - Math.abs(n || 0);
+        if (n >= 0) {
+            return this.conditionStack[n];
+        } else {
+            return "INITIAL";
+        }
+    },
+
+    // alias for begin(condition)
+    pushState: function pushState (condition) {
+        this.begin(condition);
+    },
+
+    // return the number of states pushed
+    stateStackSize: function stateStackSize() {
+        return this.conditionStack.length;
+    }
+};
+
+
+// generate lexer source from a grammar
+function generate (dict, tokens) {
+    var opt = processGrammar(dict, tokens);
+
+    return generateFromOpts(opt);
+}
+
+// process the grammar and build final data structures and functions
+function processGrammar(dict, tokens) {
+    var opts = {};
+    if (typeof dict === 'string') {
+        dict = lexParser.parse(dict);
+    }
+    dict = dict || {};
+
+    opts.options = dict.options || {};
+    opts.moduleType = opts.options.moduleType;
+    opts.moduleName = opts.options.moduleName;
+
+    opts.conditions = prepareStartConditions(dict.startConditions);
+    opts.conditions.INITIAL = {rules:[],inclusive:true};
+
+    opts.performAction = buildActions.call(opts, dict, tokens);
+    opts.conditionStack = ['INITIAL'];
+
+    opts.moduleInclude = (dict.moduleInclude || '').trim();
+    return opts;
+}
+
+// Assemble the final source from the processed grammar
+function generateFromOpts (opt) {
+    var code = "";
+
+    if (opt.moduleType === 'commonjs') {
+        code = generateCommonJSModule(opt);
+    } else if (opt.moduleType === 'amd') {
+        code = generateAMDModule(opt);
+    } else {
+        code = generateModule(opt);
+    }
+
+    return code;
+}
+
+function generateModuleBody (opt) {
+    var functionDescriptions = {
+        setInput: "resets the lexer, sets new input",
+        input: "consumes and returns one char from the input",
+        unput: "unshifts one char (or a string) into the input",
+        more: "When called from action, caches matched text and appends it on next action",
+        reject: "When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.",
+        less: "retain first n characters of the match",
+        pastInput: "displays already matched input, i.e. for error messages",
+        upcomingInput: "displays upcoming input, i.e. for error messages",
+        showPosition: "displays the character position where the lexing error occurred, i.e. for error messages",
+        test_match: "test the lexed token: return FALSE when not a match, otherwise return token",
+        next: "return next match in input",
+        lex: "return next match that has a token",
+        begin: "activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)",
+        popState: "pop the previously active lexer condition state off the condition stack",
+        _currentRules: "produce the lexer rule set which is active for the currently active lexer condition state",
+        topState: "return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available",
+        pushState: "alias for begin(condition)",
+        stateStackSize: "return the number of states currently on the stack"
+    };
+    var out = "({\n";
+    var p = [];
+    var descr;
+    for (var k in RegExpLexer.prototype) {
+        if (RegExpLexer.prototype.hasOwnProperty(k) && k.indexOf("generate") === -1) {
+            // copy the function description as a comment before the implementation; supports multi-line descriptions
+            descr = "\n";
+            if (functionDescriptions[k]) {
+                descr += "// " + functionDescriptions[k].replace(/\n/g, "\n\/\/ ") + "\n";
+            }
+            p.push(descr + k + ":" + (RegExpLexer.prototype[k].toString() || '""'));
+        }
+    }
+    out += p.join(",\n");
+
+    if (opt.options) {
+        out += ",\noptions: " + JSON.stringify(opt.options);
+    }
+
+    out += ",\nperformAction: " + String(opt.performAction);
+    out += ",\nrules: [" + opt.rules + "]";
+    out += ",\nconditions: " + JSON.stringify(opt.conditions);
+    out += "\n})";
+
+    return out;
+}
+
+function generateModule(opt) {
+    opt = opt || {};
+
+    var out = "/* generated by jison-lex " + version + " */";
+    var moduleName = opt.moduleName || "lexer";
+
+    out += "\nvar " + moduleName + " = (function(){\nvar lexer = "
+          + generateModuleBody(opt);
+
+    if (opt.moduleInclude) {
+        out += ";\n" + opt.moduleInclude;
+    }
+
+    out += ";\nreturn lexer;\n})();";
+
+    return out;
+}
+
+function generateAMDModule(opt) {
+    var out = "/* generated by jison-lex " + version + " */";
+
+    out += "define([], function(){\nvar lexer = "
+          + generateModuleBody(opt);
+
+    if (opt.moduleInclude) {
+        out += ";\n" + opt.moduleInclude;
+    }
+
+    out += ";\nreturn lexer;"
+         + "\n});";
+
+    return out;
+}
+
+function generateCommonJSModule(opt) {
+    opt = opt || {};
+
+    var out = "";
+    var moduleName = opt.moduleName || "lexer";
+
+    out += generateModule(opt);
+    out += "\nexports.lexer = " + moduleName;
+    out += ";\nexports.lex = function () { return " + moduleName + ".lex.apply(lexer, arguments); };";
+    return out;
+}
+
+RegExpLexer.generate = generate;
+
+module.exports = RegExpLexer;
+
diff --git a/tests/all-tests.js b/tests/all-tests.js
new file mode 100755
index 0000000..8a0a4dd
--- /dev/null
+++ b/tests/all-tests.js
@@ -0,0 +1,4 @@
+exports.testRegExpLexer = require("./regexplexer");
+
+if (require.main === module)
+    process.exit(require("test").run(exports));
diff --git a/tests/regexplexer.js b/tests/regexplexer.js
new file mode 100644
index 0000000..6128c47
--- /dev/null
+++ b/tests/regexplexer.js
@@ -0,0 +1,1021 @@
+var RegExpLexer = require("../regexp-lexer"),
+    assert = require("assert");
+
+exports["test basic matchers"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xxyx";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test set yy"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return yy.x;" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xxyx";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input, { x: 'EX' });
+    assert.equal(lexer.lex(), "EX");
+};
+
+exports["test set input after"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xxyx";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test unrecognized char"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xa";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "X");
+    assert.throws(function(){lexer.lex()}, "bad char");
+};
+
+exports["test macro"] = function() {
+    var dict = {
+        macros: {
+            "digit": "[0-9]"
+        },
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["{digit}+", "return 'NAT';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "x12234y42";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "NAT");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "NAT");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test macro precedence"] = function() {
+    var dict = {
+        macros: {
+            "hex": "[0-9]|[a-f]"
+        },
+        rules: [
+           ["-", "return '-';" ],
+           ["{hex}+", "return 'HEX';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "129-abfe-42dc-ea12";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "HEX");
+    assert.equal(lexer.lex(), "-");
+    assert.equal(lexer.lex(), "HEX");
+    assert.equal(lexer.lex(), "-");
+    assert.equal(lexer.lex(), "HEX");
+    assert.equal(lexer.lex(), "-");
+    assert.equal(lexer.lex(), "HEX");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test nested macros"] = function () {
+    var dict = {
+        macros: {
+            "digit": "[0-9]",
+            "2digit": "{digit}{digit}",
+            "3digit": "{2digit}{digit}"
+        },
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["{3digit}", "return 'NNN';" ],
+           ["{2digit}", "return 'NN';" ],
+           ["{digit}", "return 'N';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "x1y42y123";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "N");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "NN");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "NNN");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test nested macro precedence"] = function() {
+    var dict = {
+        macros: {
+            "hex": "[0-9]|[a-f]",
+            "col": "#{hex}+"
+        },
+        rules: [
+           ["-", "return '-';" ],
+           ["{col}", "return 'HEX';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "#129-#abfe-#42dc-#ea12";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "HEX");
+    assert.equal(lexer.lex(), "-");
+    assert.equal(lexer.lex(), "HEX");
+    assert.equal(lexer.lex(), "-");
+    assert.equal(lexer.lex(), "HEX");
+    assert.equal(lexer.lex(), "-");
+    assert.equal(lexer.lex(), "HEX");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test action include"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return included ? 'Y' : 'N';" ],
+           ["$", "return 'EOF';" ]
+       ],
+       actionInclude: "var included = true;"
+    };
+
+    var input = "x";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test ignored"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["\\s+", "/* skip whitespace */" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "x x   y x";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test disambiguate"] = function() {
+    var dict = {
+        rules: [
+           ["for\\b", "return 'FOR';" ],
+           ["if\\b", "return 'IF';" ],
+           ["[a-z]+", "return 'IDENTIFIER';" ],
+           ["\\s+", "/* skip whitespace */" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "if forever for for";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "IF");
+    assert.equal(lexer.lex(), "IDENTIFIER");
+    assert.equal(lexer.lex(), "FOR");
+    assert.equal(lexer.lex(), "FOR");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test yytext overwrite"] = function() {
+    var dict = {
+        rules: [
+           ["x", "yytext = 'hi der'; return 'X';" ]
+       ]
+    };
+
+    var input = "x";
+
+    var lexer = new RegExpLexer(dict, input);
+    lexer.lex();
+    assert.equal(lexer.yytext, "hi der");
+};
+
+exports["test yylineno"] = function() {
+    var dict = {
+        rules: [
+           ["\\s+", "/* skip whitespace */" ],
+           ["x", "return 'x';" ],
+           ["y", "return 'y';" ]
+       ]
+    };
+
+    var input = "x\nxy\n\n\nx";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.yylineno, 0);
+    assert.equal(lexer.lex(), "x");
+    assert.equal(lexer.lex(), "x");
+    assert.equal(lexer.yylineno, 1);
+    assert.equal(lexer.lex(), "y");
+    assert.equal(lexer.yylineno, 1);
+    assert.equal(lexer.lex(), "x");
+    assert.equal(lexer.yylineno, 4);
+};
+
+exports["test yylloc"] = function() {
+    var dict = {
+        rules: [
+           ["\\s+", "/* skip whitespace */" ],
+           ["x", "return 'x';" ],
+           ["y", "return 'y';" ]
+       ]
+    };
+
+    var input = "x\nxy\n\n\nx";
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "x");
+    assert.equal(lexer.yylloc.first_column, 0);
+    assert.equal(lexer.yylloc.last_column, 1);
+    assert.equal(lexer.lex(), "x");
+    assert.equal(lexer.yylloc.first_line, 2);
+    assert.equal(lexer.yylloc.last_line, 2);
+    assert.equal(lexer.yylloc.first_column, 0);
+    assert.equal(lexer.yylloc.last_column, 1);
+    assert.equal(lexer.lex(), "y");
+    assert.equal(lexer.yylloc.first_line, 2);
+    assert.equal(lexer.yylloc.last_line, 2);
+    assert.equal(lexer.yylloc.first_column, 1);
+    assert.equal(lexer.yylloc.last_column, 2);
+    assert.equal(lexer.lex(), "x");
+    assert.equal(lexer.yylloc.first_line, 5);
+    assert.equal(lexer.yylloc.last_line, 5);
+    assert.equal(lexer.yylloc.first_column, 0);
+    assert.equal(lexer.yylloc.last_column, 1);
+};
+
+exports["test more()"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ['"[^"]*', function(){
+               if(yytext.charAt(yyleng-1) == '\\') {
+                   this.more();
+               } else {
+                   yytext += this.input(); // swallow end quote
+                   return "STRING";
+               }
+            } ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = 'x"fgjdrtj\\"sdfsdf"x';
+
+    var lexer = new RegExpLexer(dict, input);
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "STRING");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test defined token returns"] = function() {
+    var tokens = {"2":"X", "3":"Y", "4":"EOF"};
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xxyx";
+
+    var lexer = new RegExpLexer(dict, input, tokens);
+
+    assert.equal(lexer.lex(), 2);
+    assert.equal(lexer.lex(), 2);
+    assert.equal(lexer.lex(), 3);
+    assert.equal(lexer.lex(), 2);
+    assert.equal(lexer.lex(), 4);
+};
+
+exports["test module generator from constructor"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xxyx";
+
+    var lexerSource = RegExpLexer.generate(dict);
+    eval(lexerSource);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test module generator"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xxyx";
+
+    var lexer_ = new RegExpLexer(dict);
+    var lexerSource = lexer_.generateModule();
+    eval(lexerSource);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test generator with more complex lexer"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ['"[^"]*', function(){
+               if(yytext.charAt(yyleng-1) == '\\') {
+                   this.more();
+               } else {
+                   yytext += this.input(); // swallow end quote
+                   return "STRING";
+               }
+            } ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = 'x"fgjdrtj\\"sdfsdf"x';
+
+    var lexer_ = new RegExpLexer(dict);
+    var lexerSource = lexer_.generateModule();
+    eval(lexerSource);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "STRING");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test commonjs module generator"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xxyx";
+
+    var lexer_ = new RegExpLexer(dict);
+    var lexerSource = lexer_.generateCommonJSModule();
+    var exports = {};
+    eval(lexerSource);
+    exports.lexer.setInput(input);
+
+    assert.equal(exports.lex(), "X");
+    assert.equal(exports.lex(), "X");
+    assert.equal(exports.lex(), "Y");
+    assert.equal(exports.lex(), "X");
+    assert.equal(exports.lex(), "EOF");
+};
+
+exports["test amd module generator"] = function() {
+    var dict = {
+        rules: [
+           ["x", "return 'X';" ],
+           ["y", "return 'Y';" ],
+           ["$", "return 'EOF';" ]
+       ]
+    };
+
+    var input = "xxyx";
+
+    var lexer_ = new RegExpLexer(dict);
+    var lexerSource = lexer_.generateAMDModule();
+
+    var lexer;
+    var define = function (_, fn) {
+      lexer = fn();
+    };
+
+    eval(lexerSource);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test DJ lexer"] = function() {
+    var dict = {
+    "lex": {
+        "macros": {
+            "digit": "[0-9]",
+            "id": "[a-zA-Z][a-zA-Z0-9]*"
+        },
+
+        "rules": [
+            ["\\/\\/.*",       "/* ignore comment */"],
+            ["main\\b",     "return 'MAIN';"],
+            ["class\\b",    "return 'CLASS';"],
+            ["extends\\b",  "return 'EXTENDS';"],
+            ["nat\\b",      "return 'NATTYPE';"],
+            ["if\\b",       "return 'IF';"],
+            ["else\\b",     "return 'ELSE';"],
+            ["for\\b",      "return 'FOR';"],
+            ["printNat\\b", "return 'PRINTNAT';"],
+            ["readNat\\b",  "return 'READNAT';"],
+            ["this\\b",     "return 'THIS';"],
+            ["new\\b",      "return 'NEW';"],
+            ["var\\b",      "return 'VAR';"],
+            ["null\\b",     "return 'NUL';"],
+            ["{digit}+",   "return 'NATLITERAL';"],
+            ["{id}",       "return 'ID';"],
+            ["==",         "return 'EQUALITY';"],
+            ["=",          "return 'ASSIGN';"],
+            ["\\+",        "return 'PLUS';"],
+            ["-",          "return 'MINUS';"],
+            ["\\*",        "return 'TIMES';"],
+            [">",          "return 'GREATER';"],
+            ["\\|\\|",     "return 'OR';"],
+            ["!",          "return 'NOT';"],
+            ["\\.",        "return 'DOT';"],
+            ["\\{",        "return 'LBRACE';"],
+            ["\\}",        "return 'RBRACE';"],
+            ["\\(",        "return 'LPAREN';"],
+            ["\\)",        "return 'RPAREN';"],
+            [";",          "return 'SEMICOLON';"],
+            ["\\s+",       "/* skip whitespace */"],
+            [".",          "print('Illegal character');throw 'Illegal character';"],
+            ["$",          "return 'ENDOFFILE';"]
+        ]
+    }
+};
+
+    var input = "class Node extends Object { \
+                      var nat value    var nat value;\
+                      var Node next;\
+                      var nat index;\
+                    }\
+\
+                    class List extends Object {\
+                      var Node start;\
+\
+                      Node prepend(Node startNode) {\
+                        startNode.next = start;\
+                        start = startNode;\
+                      }\
+\
+                      nat find(nat index) {\
+                        var nat value;\
+                        var Node node;\
+\
+                        for(node = start;!(node == null);node = node.next){\
+                          if(node.index == index){\
+                            value = node.value;\
+                          } else { 0; };\
+                        };\
+\
+                        value;\
+                      }\
+                    }\
+\
+                    main {\
+                      var nat index;\
+                      var nat value;\
+                      var List list;\
+                      var Node startNode;\
+\
+                      index = readNat();\
+                      list = new List;\
+\
+                      for(0;!(index==0);0){\
+                        value = readNat();\
+                        startNode = new Node;\
+                        startNode.index = index;\
+                        startNode.value = value;\
+                        list.prepend(startNode);\
+                        index = readNat();\
+                      };\
+\
+                      index = readNat();\
+\
+                      for(0;!(index==0);0){\
+                        printNat(list.find(index));\
+                        index = readNat();\
+                      };\
+                    }";
+
+    var lexer = new RegExpLexer(dict.lex);
+    lexer.setInput(input);
+    var tok;
+    while (tok = lexer.lex(), tok!==1) {
+        assert.equal(typeof tok, "string");
+    }
+};
+
+exports["test instantiation from string"] = function() {
+    var dict = "%%\n'x' {return 'X';}\n'y' {return 'Y';}\n<<EOF>> {return 'EOF';}";
+
+    var input = "x";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test inclusive start conditions"] = function() {
+    var dict = {
+        startConditions: {
+            "TEST": 0,
+        },
+        rules: [
+            ["enter-test", "this.begin('TEST');" ],
+            [["TEST"], "x", "return 'T';" ],
+            [["TEST"], "y", "this.begin('INITIAL'); return 'TY';" ],
+            ["x", "return 'X';" ],
+            ["y", "return 'Y';" ],
+            ["$", "return 'EOF';" ]
+        ]
+    };
+    var input = "xenter-testxyy";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "T");
+    assert.equal(lexer.lex(), "TY");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test exclusive start conditions"] = function() {
+    var dict = {
+        startConditions: {
+            "EAT": 1,
+        },
+        rules: [
+            ["\\/\\/", "this.begin('EAT');" ],
+            [["EAT"], ".", "" ],
+            [["EAT"], "\\n", "this.begin('INITIAL');" ],
+            ["x", "return 'X';" ],
+            ["y", "return 'Y';" ],
+            ["$", "return 'EOF';" ]
+        ]
+    };
+    var input = "xy//yxteadh//ste\ny";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test pop start condition stack"] = function() {
+    var dict = {
+        startConditions: {
+            "EAT": 1,
+        },
+        rules: [
+            ["\\/\\/", "this.begin('EAT');" ],
+            [["EAT"], ".", "" ],
+            [["EAT"], "\\n", "this.popState();" ],
+            ["x", "return 'X';" ],
+            ["y", "return 'Y';" ],
+            ["$", "return 'EOF';" ]
+        ]
+    };
+    var input = "xy//yxteadh//ste\ny";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+
+exports["test star start condition"] = function() {
+    var dict = {
+        startConditions: {
+            "EAT": 1,
+        },
+        rules: [
+            ["\\/\\/", "this.begin('EAT');" ],
+            [["EAT"], ".", "" ],
+            ["x", "return 'X';" ],
+            ["y", "return 'Y';" ],
+            [["*"],"$", "return 'EOF';" ]
+        ]
+    };
+    var input = "xy//yxteadh//stey";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test start condition constants"] = function() {
+    var dict = {
+        startConditions: {
+            "EAT": 1,
+        },
+        rules: [
+            ["\\/\\/", "this.begin('EAT');" ],
+            [["EAT"], ".", "if (YYSTATE==='EAT') return 'E';" ],
+            ["x", "if (YY_START==='INITIAL') return 'X';" ],
+            ["y", "return 'Y';" ],
+            [["*"],"$", "return 'EOF';" ]
+        ]
+    };
+    var input = "xy//y";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "Y");
+    assert.equal(lexer.lex(), "E");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test unicode encoding"] = function() {
+    var dict = {
+        rules: [
+            ["\\u2713", "return 'CHECK';" ],
+            ["\\u03c0", "return 'PI';" ],
+            ["y", "return 'Y';" ]
+        ]
+    };
+    var input = "\u2713\u03c0y";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "CHECK");
+    assert.equal(lexer.lex(), "PI");
+    assert.equal(lexer.lex(), "Y");
+};
+
+exports["test unicode"] = function() {
+    var dict = {
+        rules: [
+            ["π", "return 'PI';" ],
+            ["y", "return 'Y';" ]
+        ]
+    };
+    var input = "πy";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "PI");
+    assert.equal(lexer.lex(), "Y");
+};
+
+exports["test longest match returns"] = function() {
+    var dict = {
+        rules: [
+            [".", "return 'DOT';" ],
+            ["cat", "return 'CAT';" ]
+        ],
+        options: {flex: true}
+    };
+    var input = "cat!";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "CAT");
+    assert.equal(lexer.lex(), "DOT");
+};
+
+exports["test case insensitivity"] = function() {
+    var dict = {
+        rules: [
+            ["cat", "return 'CAT';" ]
+        ],
+        options: {'case-insensitive': true}
+    };
+    var input = "Cat";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "CAT");
+};
+
+exports["test less"] = function() {
+    var dict = {
+        rules: [
+            ["cat", "this.less(2); return 'CAT';" ],
+            ["t", "return 'T';" ]
+        ],
+    };
+    var input = "cat";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "CAT");
+    assert.equal(lexer.lex(), "T");
+};
+
+exports["test EOF unput"] = function() {
+    var dict = {
+        startConditions: {
+            "UN": 1,
+        },
+        rules: [
+            ["U", "this.begin('UN');return 'U';" ],
+            [["UN"],"$", "this.unput('X')" ],
+            [["UN"],"X", "this.popState();return 'X';" ],
+            ["$", "return 'EOF'" ]
+        ]
+    };
+    var input = "U";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "U");
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "EOF");
+};
+
+exports["test flex mode default rule"] = function() {
+    var dict = {
+        rules: [
+            ["x", "return 'X';" ]
+        ],
+        options: {flex: true}
+    };
+    var input = "xyx";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.equal(lexer.lex(), "X");
+};
+
+exports["test pipe precedence"] = function() {
+    var dict = {
+        rules: [
+            ["x|y", "return 'X_Y';" ],
+            [".",   "return 'N';"]
+        ]
+    };
+    var input = "xny";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X_Y");
+    assert.equal(lexer.lex(), "N");
+    assert.equal(lexer.lex(), "X_Y");
+};
+
+exports["test ranges"] = function() {
+    var dict = {
+        rules: [
+            ["x+", "return 'X';" ],
+            [".",   "return 'N';"]
+        ],
+        options: {ranges: true}
+    };
+    var input = "xxxyy";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "X");
+    assert.deepEqual(lexer.yylloc.range, [0, 3]);
+};
+
+exports["test unput location"] = function() {
+    var dict = {
+        rules: [
+            ["x+", "return 'X';" ],
+            ["y\\n", "this.unput('\\n'); return 'Y';" ],
+            ["\\ny", "this.unput('y'); return 'BR';" ],
+            ["y", "return 'Y';" ],
+            [".",   "return 'N';"]
+        ],
+        options: {ranges: true}
+    };
+    var input = "xxxy\ny";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+    console.log(lexer.rules);
+
+    assert.equal(lexer.next(), "X");
+    assert.deepEqual(lexer.yylloc, {first_line: 1,
+                                    first_column: 0,
+                                    last_line: 1,
+                                    last_column: 3,
+                                    range: [0, 3]});
+    assert.equal(lexer.next(), "Y");
+    assert.deepEqual(lexer.yylloc, {first_line: 1,
+                                    first_column: 3,
+                                    last_line: 1,
+                                    last_column: 4,
+                                    range: [3, 4]});
+    assert.equal(lexer.next(), "BR");
+    assert.deepEqual(lexer.yylloc, {first_line: 1,
+                                    first_column: 4,
+                                    last_line: 2,
+                                    last_column: 0,
+                                    range: [4, 5]});
+    assert.equal(lexer.next(), "Y");
+    assert.deepEqual(lexer.yylloc, {first_line: 2,
+                                    first_column: 0,
+                                    last_line: 2,
+                                    last_column: 1,
+                                    range: [5, 6]});
+
+};
+
+exports["test unput location again"] = function() {
+    var dict = {
+        rules: [
+            ["x+", "return 'X';" ],
+            ["y\\ny\\n", "this.unput('\\n'); return 'YY';" ],
+            ["\\ny", "this.unput('y'); return 'BR';" ],
+            ["y", "return 'Y';" ],
+            [".",   "return 'N';"]
+        ],
+        options: {ranges: true}
+    };
+    var input = "xxxy\ny\ny";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+    console.log(lexer.rules);
+
+    assert.equal(lexer.next(), "X");
+    assert.deepEqual(lexer.yylloc, {first_line: 1,
+                                    first_column: 0,
+                                    last_line: 1,
+                                    last_column: 3,
+                                    range: [0, 3]});
+    assert.equal(lexer.next(), "YY");
+    assert.deepEqual(lexer.yylloc, {first_line: 1,
+                                    first_column: 3,
+                                    last_line: 2,
+                                    last_column: 1,
+                                    range: [3, 6]});
+    assert.equal(lexer.next(), "BR");
+    assert.deepEqual(lexer.yylloc, {first_line: 2,
+                                    first_column: 1,
+                                    last_line: 3,
+                                    last_column: 0,
+                                    range: [6, 7]});
+    assert.equal(lexer.next(), "Y");
+    assert.deepEqual(lexer.yylloc, {first_line: 3,
+                                    first_column: 0,
+                                    last_line: 3,
+                                    last_column: 1,
+                                    range: [7, 8]});
+
+};
+
+exports["test backtracking lexer reject() method"] = function() {
+    var dict = {
+        rules: [
+            ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ],
+            ["[A-Z]+", "return 'WORD';" ],
+            ["[0-9]+", "return 'NUM';" ]
+        ],
+        options: {backtrack_lexer: true}
+    };
+    var input = "A5";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.equal(lexer.lex(), "WORD");
+    assert.equal(lexer.lex(), "NUM");
+};
+
+exports["test lexer reject() exception when not in backtracking mode"] = function() {
+    var dict = {
+        rules: [
+            ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ],
+            ["[A-Z]+", "return 'WORD';" ],
+            ["[0-9]+", "return 'NUM';" ]
+        ],
+        options: {backtrack_lexer: false}
+    };
+    var input = "A5";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+
+    assert.throws(function() {
+      lexer.lex();
+    },
+    function(err) {
+      return (err instanceof Error) && /You can only invoke reject/.test(err);
+    });
+};
+
+exports["test yytext state after unput"] = function() {
+    var dict = {
+        rules: [
+            ["cat4", "this.unput('4'); return 'CAT';" ],
+            ["4", "return 'NUMBER';" ],
+            ["$", "return 'EOF';"]
+        ]
+    };
+
+    var input = "cat4";
+
+    var lexer = new RegExpLexer(dict);
+    lexer.setInput(input);
+    assert.equal(lexer.lex(), "CAT");
+    /*the yytext should be 'cat' since we unput '4' from 'cat4' */
+    assert.equal(lexer.yytext, "cat");
+    assert.equal(lexer.lex(), "NUMBER");
+    assert.equal(lexer.lex(), "EOF");
+};

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-jison-lex.git



More information about the Pkg-javascript-commits mailing list