[Pkg-javascript-commits] [node-lex-parser] 01/06: Import Upstream version 0.1.4

Praveen Arimbrathodiyil praveen at moszumanska.debian.org
Fri Oct 14 06:35:48 UTC 2016


This is an automated email from the git hooks/post-receive script.

praveen pushed a commit to branch master
in repository node-lex-parser.

commit 862f8a5fdc45a267f4089c67aaca882123c7f205
Author: Praveen Arimbrathodiyil <praveen at debian.org>
Date:   Fri Oct 14 11:23:32 2016 +0530

    Import Upstream version 0.1.4
---
 .gitignore                     |   7 +
 .npmignore                     |   2 +
 Makefile                       |  22 +++
 README.md                      | 111 ++++++++++++
 lex.l                          |  90 ++++++++++
 lex.y                          | 223 +++++++++++++++++++++++
 package.json                   |  25 +++
 tests/all-tests.js             | 396 +++++++++++++++++++++++++++++++++++++++++
 tests/lex/ansic.jisonlex       | 115 ++++++++++++
 tests/lex/bnf.jisonlex         |  23 +++
 tests/lex/bnf.lex.json         |  24 +++
 tests/lex/lex_grammar.jisonlex |  29 +++
 tests/lex/lex_grammar.lex.json |  30 ++++
 13 files changed, 1097 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8c32b70
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+lex-parser.js
+node_modules/
+
+# Editor bak files
+*~
+*.bak
+*.orig
diff --git a/.npmignore b/.npmignore
new file mode 100644
index 0000000..7a48940
--- /dev/null
+++ b/.npmignore
@@ -0,0 +1,2 @@
+lex.y
+lex.l
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8e1add5
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+
+all: install build test
+
+install:
+	npm install
+
+build:
+	node ./node_modules/.bin/jison lex.y lex.l
+	mv lex.js lex-parser.js
+
+test:
+	node tests/all-tests.js
+
+
+
+
+clean:
+
+superclean: clean
+	-find . -type d -name 'node_modules' -exec rm -rf "{}" \;
+
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6913c62
--- /dev/null
+++ b/README.md
@@ -0,0 +1,111 @@
+# lex-parser
+
+A parser for lexical grammars used by [jison](http://jison.org) and jison-lex.
+
+## install
+
+    npm install lex-parser
+
+## build
+
+To build the parser yourself, clone the git repo then run:
+
+    make
+
+This will generate `lex-parser.js`.
+
+## usage
+
+    var lexParser = require("lex-parser");
+
+    // parse a lexical grammar and return JSON
+    lexParser.parse("%% ... ");
+
+## example
+
+The parser can parse its own lexical grammar, shown below:
+
+    NAME              [a-zA-Z_][a-zA-Z0-9_-]*
+
+    %s indented trail rules
+    %x code start_condition options conditions action
+
+    %%
+
+    <action>[^{}]+          return 'ACTION_BODY'
+    <action>"{"             yy.depth++; return '{'
+    <action>"}"             yy.depth == 0 ? this.begin('trail') : yy.depth--; return '}'
+
+    <conditions>{NAME}      return 'NAME'
+    <conditions>">"         this.popState(); return '>'
+    <conditions>","         return ','
+    <conditions>"*"         return '*'
+
+    <rules>\n+              /* */
+    <rules>\s+              this.begin('indented')
+    <rules>"%%"             this.begin('code'); return '%%'
+    <rules>[a-zA-Z0-9_]+    return 'CHARACTER_LIT'
+
+    <options>{NAME}         yy.options[yytext] = true
+    <options>\n+            this.begin('INITIAL')
+    <options>\s+\n+         this.begin('INITIAL')
+    <options>\s+            /* empty */
+
+    <start_condition>{NAME}         return 'START_COND'
+    <start_condition>\n+            this.begin('INITIAL')
+    <start_condition>\s+\n+         this.begin('INITIAL')
+    <start_condition>\s+            /* empty */
+
+    <trail>.*\n+                    this.begin('rules')
+
+    <indented>"{"                   yy.depth = 0; this.begin('action'); return '{'
+    <indented>"%{"(.|\n)*?"%}"      this.begin('trail'); yytext = yytext.substr(2, yytext.length-4);return 'ACTION'
+    "%{"(.|\n)*?"%}"                yytext = yytext.substr(2, yytext.length-4); return 'ACTION'
+    <indented>.+                    this.begin('rules'); return 'ACTION'
+
+    "/*"(.|\n|\r)*?"*/"             /* ignore */
+    "//".*                          /* ignore */
+
+    \n+                             /* */
+    \s+                             /* */
+    {NAME}                          return 'NAME'
+    \"("\\\\"|'\"'|[^"])*\"         yytext = yytext.replace(/\\"/g,'"');return 'STRING_LIT'
+    "'"("\\\\"|"\'"|[^'])*"'"       yytext = yytext.replace(/\\'/g,"'");return 'STRING_LIT'
+    "|"                             return '|'
+    "["("\\\\"|"\]"|[^\]])*"]"      return 'ANY_GROUP_REGEX'
+    "(?:"                           return 'SPECIAL_GROUP'
+    "(?="                           return 'SPECIAL_GROUP'
+    "(?!"                           return 'SPECIAL_GROUP'
+    "("                             return '('
+    ")"                             return ')'
+    "+"                             return '+'
+    "*"                             return '*'
+    "?"                             return '?'
+    "^"                             return '^'
+    ","                             return ','
+    "<<EOF>>"                       return '$'
+    "<"                             this.begin('conditions'); return '<'
+    "/!"                            return '/!'
+    "/"                             return '/'
+    "\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4}) return 'ESCAPE_CHAR'
+    "\\".                           yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR'
+    "$"                             return '$'
+    "."                             return '.'
+    "%options"                      yy.options = {}; this.begin('options')
+    "%s"                            this.begin('start_condition');return 'START_INC'
+    "%x"                            this.begin('start_condition');return 'START_EXC'
+    "%%"                            this.begin('rules'); return '%%'
+    "{"\d+(","\s?\d+|",")?"}"       return 'RANGE_REGEX'
+    "{"{NAME}"}"                    return 'NAME_BRACE'
+    "{"                             return '{'
+    "}"                             return '}'
+    .                               /* ignore bad characters */
+    <*><<EOF>>                      return 'EOF'
+
+    <code>(.|\n)+                   return 'CODE'
+
+    %%
+
+## license
+
+MIT
diff --git a/lex.l b/lex.l
new file mode 100644
index 0000000..515984d
--- /dev/null
+++ b/lex.l
@@ -0,0 +1,90 @@
+
+NAME              [a-zA-Z_][a-zA-Z0-9_-]*
+BR                \r\n|\n|\r
+
+%s indented trail rules
+%x code start_condition options conditions action
+
+%%
+
+<action>"/*"(.|\n|\r)*?"*/"           return 'ACTION_BODY';
+<action>"//".*                        return 'ACTION_BODY';
+<action>"/"[^ /]*?['"{}'][^ ]*?"/"    return 'ACTION_BODY'; // regexp with braces or quotes (and no spaces)
+<action>\"("\\\\"|'\"'|[^"])*\"       return 'ACTION_BODY';
+<action>"'"("\\\\"|"\'"|[^'])*"'"     return 'ACTION_BODY';
+<action>[/"'][^{}/"']+                return 'ACTION_BODY';
+<action>[^{}/"']+                     return 'ACTION_BODY';
+<action>"{"                           yy.depth++; return '{'
+<action>"}"                           yy.depth == 0 ? this.begin('trail') : yy.depth--; return '}'
+
+<conditions>{NAME}                    return 'NAME';
+<conditions>">"                       this.popState(); return '>';
+<conditions>","                       return ',';
+<conditions>"*"                       return '*';
+
+<rules>{BR}+                          /* */
+<rules>\s+{BR}+                       /* */
+<rules>\s+                            this.begin('indented')
+<rules>"%%"                           this.begin('code'); return '%%'
+<rules>[a-zA-Z0-9_]+                  return 'CHARACTER_LIT'
+
+<options>{NAME}                       yy.options[yytext] = true
+<options>{BR}+                        this.begin('INITIAL')
+<options>\s+{BR}+                     this.begin('INITIAL')
+<options>\s+                          /* empty */
+
+<start_condition>{NAME}               return 'START_COND'
+<start_condition>{BR}+                this.begin('INITIAL')
+<start_condition>\s+{BR}+             this.begin('INITIAL')
+<start_condition>\s+                  /* empty */
+
+<trail>.*{BR}+                        this.begin('rules')
+
+<indented>"{"                         yy.depth = 0; this.begin('action'); return '{'
+<indented>"%{"(.|{BR})*?"%}"          this.begin('trail'); yytext = yytext.substr(2, yytext.length-4);return 'ACTION'
+"%{"(.|{BR})*?"%}"                    yytext = yytext.substr(2, yytext.length-4); return 'ACTION'
+<indented>.+                          this.begin('rules'); return 'ACTION'
+
+"/*"(.|\n|\r)*?"*/"             /* ignore */
+"//".*                          /* ignore */
+
+{BR}+                           /* */
+\s+                             /* */
+{NAME}                          return 'NAME';
+\"("\\\\"|'\"'|[^"])*\"         yytext = yytext.replace(/\\"/g,'"'); return 'STRING_LIT';
+"'"("\\\\"|"\'"|[^'])*"'"       yytext = yytext.replace(/\\'/g,"'"); return 'STRING_LIT';
+"|"                             return '|';
+"["("\\\\"|"\]"|[^\]])*"]"      return 'ANY_GROUP_REGEX';
+"(?:"                           return 'SPECIAL_GROUP';
+"(?="                           return 'SPECIAL_GROUP';
+"(?!"                           return 'SPECIAL_GROUP';
+"("                             return '(';
+")"                             return ')';
+"+"                             return '+';
+"*"                             return '*';
+"?"                             return '?';
+"^"                             return '^';
+","                             return ',';
+"<<EOF>>"                       return '$';
+"<"                             this.begin('conditions'); return '<';
+"/!"                            return '/!';
+"/"                             return '/';
+"\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4})      return 'ESCAPE_CHAR';
+"\\".                           yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR';
+"$"                             return '$';
+"."                             return '.';
+"%options"                      yy.options = {}; this.begin('options');
+"%s"                            this.begin('start_condition'); return 'START_INC';
+"%x"                            this.begin('start_condition'); return 'START_EXC';
+"%%"                            this.begin('rules'); return '%%';
+"{"\d+(","\s?\d+|",")?"}"       return 'RANGE_REGEX';
+"{"{NAME}"}"                    return 'NAME_BRACE';
+"{"                             return '{';
+"}"                             return '}';
+.                               /* ignore bad characters */
+<*><<EOF>>                      return 'EOF';
+
+<code>(.|{BR})+                 return 'CODE';
+
+%%
+
diff --git a/lex.y b/lex.y
new file mode 100644
index 0000000..599c382
--- /dev/null
+++ b/lex.y
@@ -0,0 +1,223 @@
+%start lex
+
+/* Jison lexer file format grammar */
+
+%nonassoc '/' '/!'
+
+%left '*' '+' '?' RANGE_REGEX
+
+%%
+
+lex
+    : definitions '%%' rules epilogue
+        { 
+          $$ = { rules: $rules };
+          if ($definitions[0]) $$.macros = $definitions[0];
+          if ($definitions[1]) $$.startConditions = $definitions[1];
+          if ($epilogue) $$.moduleInclude = $epilogue;
+          if (yy.options) $$.options = yy.options;
+          if (yy.actionInclude) $$.actionInclude = yy.actionInclude;
+          delete yy.options;
+          delete yy.actionInclude;
+          return $$; 
+        }
+    ;
+
+epilogue
+    : EOF
+      { $$ = null; }
+    | '%%' EOF
+      { $$ = null; }
+    | '%%' CODE EOF
+      { $$ = $2; }
+    ;
+
+definitions
+    : definition definitions
+        {
+          $$ = $definitions;
+          if ('length' in $definition) {
+            $$[0] = $$[0] || {};
+            $$[0][$definition[0]] = $definition[1];
+          } else {
+            $$[1] = $$[1] || {};
+            for (var name in $definition) {
+              $$[1][name] = $definition[name];
+            }
+          }
+        }
+    | ACTION definitions
+        { yy.actionInclude += $1; $$ = $definitions; }
+    |
+        { yy.actionInclude = ''; $$ = [null,null]; }
+    ;
+
+definition
+    : NAME regex
+        { $$ = [$1, $2]; }
+    | START_INC names_inclusive
+        { $$ = $2; }
+    | START_EXC names_exclusive
+        { $$ = $2; }
+    ;
+
+names_inclusive
+    : START_COND
+        { $$ = {}; $$[$1] = 0; }
+    | names_inclusive START_COND
+        { $$ = $1; $$[$2] = 0; }
+    ;
+
+names_exclusive
+    : START_COND
+        { $$ = {}; $$[$1] = 1; }
+    | names_exclusive START_COND
+        { $$ = $1; $$[$2] = 1; }
+    ;
+
+rules
+    : rules rule
+        { $$ = $1; $$.push($2); }
+    | rule
+        { $$ = [$1]; }
+    ;
+
+rule
+    : start_conditions regex action
+        { $$ = $1 ? [$1, $2, $3] : [$2,$3]; }
+    ;
+
+action
+    : '{' action_body '}'
+        {$$ = $2;}
+    | ACTION
+        {$$ = $1;}
+    ;
+
+action_body
+    :
+        {$$ = '';}
+    | action_comments_body
+        {$$ = $1;}
+    | action_body '{' action_body '}' action_comments_body
+        {$$ = $1+$2+$3+$4+$5;}
+    | action_body '{' action_body '}'
+        {$$ = $1 + $2 + $3 + $4;}
+    ;
+
+action_comments_body
+    : ACTION_BODY
+        { $$ = yytext; }
+    | action_comments_body ACTION_BODY
+        { $$ = $1+$2; }
+    ;
+
+
+start_conditions
+    : '<' name_list '>'
+        { $$ = $2; }
+    | '<' '*' '>'
+        { $$ = ['*']; }
+    |
+    ;
+
+name_list
+    : NAME
+        { $$ = [$1]; }
+    | name_list ',' NAME
+        { $$ = $1; $$.push($3); }
+    ;
+
+regex
+    : regex_list
+        {
+          $$ = $1;
+          if (!(yy.options && yy.options.flex) && $$.match(/[\w\d]$/) && !$$.match(/\\(r|f|n|t|v|s|b|c[A-Z]|x[0-9A-F]{2}|u[a-fA-F0-9]{4}|[0-7]{1,3})$/)) {
+              $$ += "\\b";
+          }
+        }
+    ;
+
+regex_list
+    : regex_list '|' regex_concat
+        { $$ = $1 + '|' + $3; }
+    | regex_list '|'
+        { $$ = $1 + '|'; }
+    | regex_concat
+    |
+        { $$ = '' }
+    ;
+
+regex_concat
+    : regex_concat regex_base
+        { $$ = $1 + $2; }
+    | regex_base
+    ;
+
+regex_base
+    : '(' regex_list ')'
+        { $$ = '(' + $2 + ')'; }
+    | SPECIAL_GROUP regex_list ')'
+        { $$ = $1 + $2 + ')'; }
+    | regex_base '+'
+        { $$ = $1 + '+'; }
+    | regex_base '*'
+        { $$ = $1 + '*'; }
+    | regex_base '?'
+        { $$ = $1 + '?'; }
+    | '/' regex_base
+        { $$ = '(?=' + $2 + ')'; }
+    | '/!' regex_base
+        { $$ = '(?!' + $2 + ')'; }
+    | name_expansion
+    | regex_base range_regex
+        { $$ = $1 + $2; }
+    | any_group_regex
+    | '.'
+        { $$ = '.'; }
+    | '^'
+        { $$ = '^'; }
+    | '$'
+        { $$ = '$'; }
+    | string
+    | escape_char
+    ;
+
+name_expansion
+    : NAME_BRACE
+    ;
+
+any_group_regex
+    : ANY_GROUP_REGEX
+        { $$ = yytext; }
+    ;
+
+escape_char
+    : ESCAPE_CHAR
+        { $$ = yytext; }
+    ;
+
+range_regex
+    : RANGE_REGEX
+        { $$ = yytext; }
+    ;
+
+string
+    : STRING_LIT
+        { $$ = prepareString(yytext.substr(1, yytext.length - 2)); }
+    | CHARACTER_LIT
+    ;
+
+%%
+
+function encodeRE (s) {
+    return s.replace(/([.*+?^${}()|[\]\/\\])/g, '\\$1').replace(/\\\\u([a-fA-F0-9]{4})/g,'\\u$1');
+}
+
+function prepareString (s) {
+    // unescape slashes
+    s = s.replace(/\\\\/g, "\\");
+    s = encodeRE(s);
+    return s;
+};
+
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..a95535a
--- /dev/null
+++ b/package.json
@@ -0,0 +1,25 @@
+{
+  "name": "lex-parser",
+  "version": "0.1.4",
+  "description": "A parser for lexical grammars used by jison",
+  "main": "lex-parser.js",
+  "scripts": {
+    "test": "make test"
+  },
+  "repository": {
+    "type": "git",
+    "repository": "https://github.com/zaach/lex-parser.git"
+  },
+  "keywords": [
+    "lexical",
+    "grammar",
+    "parser",
+    "jison"
+  ],
+  "author": "Zach Carter",
+  "license": "MIT",
+  "devDependencies": {
+    "jison": "0.4.x",
+    "test": "*"
+  }
+}
diff --git a/tests/all-tests.js b/tests/all-tests.js
new file mode 100644
index 0000000..8a09501
--- /dev/null
+++ b/tests/all-tests.js
@@ -0,0 +1,396 @@
+var assert = require("assert"),
+    lex    = require("../lex-parser"),
+    fs     = require('fs'),
+    path   = require('path');
+
+function read (p, file) {
+    return fs.readFileSync(path.join(__dirname, p, file), "utf8");
+}
+
+exports["test lex grammar with macros"] = function () {
+    var lexgrammar = 'D [0-9]\nID [a-zA-Z][a-zA-Z0-9]+\n%%\n\n{D}"ohhai" {print(9);}\n"{" return \'{\';';
+    var expected = {
+        macros: {"D": "[0-9]", "ID": "[a-zA-Z][a-zA-Z0-9]+"},
+        rules: [
+            ["{D}ohhai\\b", "print(9);"],
+            ["\\{", "return '{';"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test escaped chars"] = function () {
+    var lexgrammar = '%%\n"\\n"+ {return \'NL\';}\n\\n+ {return \'NL2\';}\n\\s+ {/* skip */}';
+    var expected = {
+        rules: [
+            ["\\\\n+", "return 'NL';"],
+            ["\\n+", "return 'NL2';"],
+            ["\\s+", "/* skip */"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test advanced"] = function () {
+    var lexgrammar = '%%\n$ {return \'EOF\';}\n. {/* skip */}\n"stuff"*/("{"|";") {/* ok */}\n(.+)[a-z]{1,2}"hi"*? {/* skip */}\n';
+    var expected = {
+        rules: [
+            ["$", "return 'EOF';"],
+            [".", "/* skip */"],
+            ["stuff*(?=(\\{|;))", "/* ok */"],
+            ["(.+)[a-z]{1,2}hi*?", "/* skip */"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test [^\\]]"] = function () {
+    var lexgrammar = '%%\n"["[^\\]]"]" {return true;}\n\'f"oo\\\'bar\'  {return \'baz2\';}\n"fo\\"obar"  {return \'baz\';}\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "return true;"],
+            ["f\"oo'bar\\b", "return 'baz2';"],
+            ['fo"obar\\b', "return 'baz';"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test multiline action"] = function () {
+    var lexgrammar = '%%\n"["[^\\]]"]" %{\nreturn true;\n%}\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "\nreturn true;\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test multiline action with single braces"] = function () {
+    var lexgrammar = '%%\n"["[^\\]]"]" {\nvar b={};return true;\n}\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "\nvar b={};return true;\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test multiline action with brace in a multi-line-comment"] = function () {
+    var lexgrammar = '%%\n"["[^\\]]"]" {\nvar b={}; /* { */ return true;\n}\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "\nvar b={}; /* { */ return true;\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test multiline action with brace in a single-line-comment"] = function () {
+    var lexgrammar = '%%\n"["[^\\]]"]" {\nvar b={}; // { \nreturn 2 / 3;\n}\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "\nvar b={}; // { \nreturn 2 / 3;\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test multiline action with braces in strings"] = function () {
+    var lexgrammar = '%%\n"["[^\\]]"]" {\nvar b=\'{\' + "{"; // { \nreturn 2 / 3;\n}\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "\nvar b='{' + \"{\"; // { \nreturn 2 / 3;\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test multiline action with braces in regexp"] = function () {
+    var lexgrammar = '%%\n"["[^\\]]"]" {\nvar b=/{/; // { \nreturn 2 / 3;\n}\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "\nvar b=/{/; // { \nreturn 2 / 3;\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test include"] = function () {
+    var lexgrammar = '\nRULE [0-9]\n\n%{\n hi <stuff> \n%}\n%%\n"["[^\\]]"]" %{\nreturn true;\n%}\n';
+    var expected = {
+        macros: {"RULE": "[0-9]"},
+        actionInclude: "\n hi <stuff> \n",
+        rules: [
+            ["\\[[^\\]]\\]", "\nreturn true;\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test bnf lex grammar"] = function () {
+    var lexgrammar = lex.parse(read('lex', 'bnf.jisonlex'));
+    var expected = JSON.parse(read('lex', 'bnf.lex.json'));
+
+    assert.deepEqual(lexgrammar, expected, "grammar should be parsed correctly");
+};
+
+exports["test lex grammar bootstrap"] = function () {
+    var lexgrammar = lex.parse(read('lex', 'lex_grammar.jisonlex'));
+    var expected = JSON.parse(read('lex', 'lex_grammar.lex.json'));
+
+    assert.deepEqual(lexgrammar, expected, "grammar should be parsed correctly");
+};
+
+exports["test ANSI C lexical grammar"] = function () {
+    var lexgrammar = lex.parse(read('lex','ansic.jisonlex'));
+
+    assert.ok(lexgrammar, "grammar should be parsed correctly");
+};
+
+exports["test advanced"] = function () {
+    var lexgrammar = '%%\n"stuff"*/!("{"|";") {/* ok */}\n';
+    var expected = {
+        rules: [
+            ["stuff*(?!(\\{|;))", "/* ok */"],
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test start conditions"] = function () {
+    var lexgrammar = '%s TEST TEST2\n%x EAT\n%%\n'+
+                     '"enter-test" {this.begin(\'TEST\');}\n'+
+                     '<TEST,EAT>"x" {return \'T\';}\n'+
+                     '<*>"z" {return \'Z\';}\n'+
+                     '<TEST>"y" {this.begin(\'INITIAL\'); return \'TY\';}';
+    var expected = {
+        startConditions: {
+            "TEST": 0,
+            "TEST2": 0,
+            "EAT": 1,
+        },
+        rules: [
+            ["enter-test\\b", "this.begin('TEST');" ],
+            [["TEST","EAT"], "x\\b", "return 'T';" ],
+            [["*"], "z\\b", "return 'Z';" ],
+            [["TEST"], "y\\b", "this.begin('INITIAL'); return 'TY';" ]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test no brace action"] = function () {
+    var lexgrammar = '%%\n"["[^\\]]"]" return true;\n"x" return 1;';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "return true;"],
+            ["x\\b", "return 1;"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test quote escape"] = function () {
+    var lexgrammar = '%%\n\\"\\\'"x" return 1;';
+    var expected = {
+        rules: [
+            ["\"'x\\b", "return 1;"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test escape things"] = function () {
+    var lexgrammar = '%%\n\\"\\\'\\\\\\*\\i return 1;\n"a"\\b return 2;\n\\cA {}\n\\012 {}\n\\xFF {}';
+    var expected = {
+        rules: [
+            ["\"'\\\\\\*i\\b", "return 1;"],
+            ["a\\b", "return 2;"],
+            ["\\cA", ""],
+            ["\\012", ""],
+            ["\\xFF", ""]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test unicode encoding"] = function () {
+    var lexgrammar = '%%\n"\\u03c0" return 1;';
+    var expected = {
+        rules: [
+            ["\\u03c0", "return 1;"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test unicode"] = function () {
+    var lexgrammar = '%%\n"π" return 1;';
+    var expected = {
+        rules: [
+            ["π", "return 1;"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test bugs"] = function () {
+    var lexgrammar = '%%\n\\\'([^\\\\\']+|\\\\(\\n|.))*?\\\' return 1;';
+    var expected = {
+        rules: [
+            ["'([^\\\\']+|\\\\(\\n|.))*?'", "return 1;"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test special groupings"] = function () {
+    var lexgrammar = '%%\n(?:"foo"|"bar")\\(\\) return 1;';
+    var expected = {
+        rules: [
+            ["(?:foo|bar)\\(\\)", "return 1;"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test trailing code include"] = function () {
+    var lexgrammar = '%%"foo"  {return bar;}\n%% var bar = 1;';
+    var expected = {
+        rules: [
+            ['foo\\b', "return bar;"]
+        ],
+        moduleInclude: " var bar = 1;"
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test empty or regex"] = function () {
+    var lexgrammar = '%%\n(|"bar")("foo"|)(|) return 1;';
+    var expected = {
+        rules: [
+            ["(|bar)(foo|)(|)", "return 1;"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test options"] = function () {
+    var lexgrammar = '%options flex\n%%\n"foo" return 1;';
+    var expected = {
+        rules: [
+            ["foo", "return 1;"]
+        ],
+        options: {flex: true}
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test unquoted string rules"] = function () {
+    var lexgrammar = "%%\nfoo* return 1";
+    var expected = {
+        rules: [
+            ["foo*", "return 1"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test [^\\\\]"] = function () {
+    var lexgrammar = '%%\n"["[^\\\\]"]" {return true;}\n\'f"oo\\\'bar\'  {return \'baz2\';}\n"fo\\"obar"  {return \'baz\';}\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\\\]\\]", "return true;"],
+            ["f\"oo'bar\\b", "return 'baz2';"],
+            ['fo"obar\\b', "return 'baz';"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test comments"] = function () {
+    var lexgrammar = "/* */ // foo\n%%\nfoo* return 1";
+    var expected = {
+        rules: [
+            ["foo*", "return 1"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test rules with trailing escapes"] = function () {
+    var lexgrammar = '%%\n\\#[^\\n]*\\n {/* ok */}\n';
+    var expected = {
+        rules: [
+            ["#[^\\n]*\\n", "/* ok */"],
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test no brace action with surplus whitespace between rules"] = function () {
+    var lexgrammar = '%%\n"a" return true;\n  \n"b" return 1;\n   \n';
+    var expected = {
+        rules: [
+            ["a\\b", "return true;"],
+            ["b\\b", "return 1;"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test windows line endings"] = function () {
+    var lexgrammar = '%%\r\n"["[^\\]]"]" %{\r\nreturn true;\r\n%}\r\n';
+    var expected = {
+        rules: [
+            ["\\[[^\\]]\\]", "\r\nreturn true;\r\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+exports["test braced action with surplus whitespace between rules"] = function () {
+    var lexgrammar = '%%\n"a" %{  \nreturn true;\n%}  \n  \n"b" %{    return 1;\n%}  \n   \n';
+    var expected = {
+        rules: [
+            ["a\\b", "  \nreturn true;\n"],
+            ["b\\b", "    return 1;\n"]
+        ]
+    };
+
+    assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
+};
+
+
+if (require.main === module)
+    require("test").run(exports);
diff --git a/tests/lex/ansic.jisonlex b/tests/lex/ansic.jisonlex
new file mode 100644
index 0000000..1b8e783
--- /dev/null
+++ b/tests/lex/ansic.jisonlex
@@ -0,0 +1,115 @@
+D			[0-9]
+L			[a-zA-Z_]
+H			[a-fA-F0-9]
+E			[Ee][+-]?{D}+
+FS			[fFlL]
+IS			[uUlL]*
+
+%{
+#include <stdio.h>
+#include "y.tab.h"
+
+void count();
+%}
+
+%%
+"/*"			{ comment(); }
+
+"auto"			{ count(); return(AUTO); }
+"break"			{ count(); return(BREAK); }
+"case"			{ count(); return(CASE); }
+"char"			{ count(); return(CHAR); }
+"const"			{ count(); return(CONST); }
+"continue"		{ count(); return(CONTINUE); }
+"default"		{ count(); return(DEFAULT); }
+"do"			{ count(); return(DO); }
+"double"		{ count(); return(DOUBLE); }
+"else"			{ count(); return(ELSE); }
+"enum"			{ count(); return(ENUM); }
+"extern"		{ count(); return(EXTERN); }
+"float"			{ count(); return(FLOAT); }
+"for"			{ count(); return(FOR); }
+"goto"			{ count(); return(GOTO); }
+"if"			{ count(); return(IF); }
+"int"			{ count(); return(INT); }
+"long"			{ count(); return(LONG); }
+"register"		{ count(); return(REGISTER); }
+"return"		{ count(); return(RETURN); }
+"short"			{ count(); return(SHORT); }
+"signed"		{ count(); return(SIGNED); }
+"sizeof"		{ count(); return(SIZEOF); }
+"static"		{ count(); return(STATIC); }
+"struct"		{ count(); return(STRUCT); }
+"switch"		{ count(); return(SWITCH); }
+"typedef"		{ count(); return(TYPEDEF); }
+"union"			{ count(); return(UNION); }
+"unsigned"		{ count(); return(UNSIGNED); }
+"void"			{ count(); return(VOID); }
+"volatile"		{ count(); return(VOLATILE); }
+"while"			{ count(); return(WHILE); }
+
+{L}({L}|{D})*		{ count(); return(check_type()); }
+
+"0"[xX]{H}+{IS}?		{ count(); return(CONSTANT); }
+"0"{D}+{IS}?		{ count(); return(CONSTANT); }
+{D}+{IS}?		{ count(); return(CONSTANT); }
+"L"?"'"("\'"|[^'])+"'"	{ count(); return(CONSTANT); }
+
+{D}+{E}{FS}?		{ count(); return(CONSTANT); }
+{D}*"."{D}+({E})?{FS}?	{ count(); return(CONSTANT); }
+{D}+"."{D}*({E})?{FS}?	{ count(); return(CONSTANT); }
+
+"L"?'"'('\"'|[^"])*'"'	{ count(); return(STRING_LITERAL); }
+
+"..."			{ count(); return(ELLIPSIS); }
+">>="			{ count(); return(RIGHT_ASSIGN); }
+"<<="			{ count(); return(LEFT_ASSIGN); }
+"+="			{ count(); return(ADD_ASSIGN); }
+"-="			{ count(); return(SUB_ASSIGN); }
+"*="			{ count(); return(MUL_ASSIGN); }
+"/="			{ count(); return(DIV_ASSIGN); }
+"%="			{ count(); return(MOD_ASSIGN); }
+"&="			{ count(); return(AND_ASSIGN); }
+"^="			{ count(); return(XOR_ASSIGN); }
+"|="			{ count(); return(OR_ASSIGN); }
+">>"			{ count(); return(RIGHT_OP); }
+"<<"			{ count(); return(LEFT_OP); }
+"++"			{ count(); return(INC_OP); }
+"--"			{ count(); return(DEC_OP); }
+"->"			{ count(); return(PTR_OP); }
+"&&"			{ count(); return(AND_OP); }
+"||"			{ count(); return(OR_OP); }
+"<="			{ count(); return(LE_OP); }
+">="			{ count(); return(GE_OP); }
+"=="			{ count(); return(EQ_OP); }
+"!="			{ count(); return(NE_OP); }
+";"			{ count(); return(';'); }
+("{"|"<%")		{ count(); return('{'); }
+("}"|"%>")		%{ count(); return('}'); %}
+","			{ count(); return(','); }
+":"			{ count(); return(':'); }
+"="			{ count(); return('='); }
+"("			{ count(); return('('); }
+")"			{ count(); return(')'); }
+("["|"<:")		{ count(); return('['); }
+("]"|":>")		{ count(); return(']'); }
+"."			{ count(); return('.'); }
+"&"			{ count(); return('&'); }
+"!"			{ count(); return('!'); }
+"~"			{ count(); return('~'); }
+"-"			{ count(); return('-'); }
+"+"			{ count(); return('+'); }
+"*"			{ count(); return('*'); }
+"/"			{ count(); return('/'); }
+"%"			{ count(); return('%'); }
+"<"			{ count(); return('<'); }
+">"			{ count(); return('>'); }
+"^"			{ count(); return('^'); }
+"|"			{ count(); return('|'); }
+"?"			{ count(); return('?'); }
+
+[ \t\v\n\f]		{ count(); }
+.			{ /* ignore bad characters */ }
+
+%%
+
diff --git a/tests/lex/bnf.jisonlex b/tests/lex/bnf.jisonlex
new file mode 100644
index 0000000..763298b
--- /dev/null
+++ b/tests/lex/bnf.jisonlex
@@ -0,0 +1,23 @@
+
+%%
+\s+		{/* skip whitespace */}
+"/*"[^*]*"*"		{return yy.lexComment(this);}
+[a-zA-Z][a-zA-Z0-9_-]*		{return 'ID';}
+'"'[^"]+'"'		{yytext = yytext.substr(1, yyleng-2); return 'STRING';}
+"'"[^']+"'"		{yytext = yytext.substr(1, yyleng-2); return 'STRING';}
+":"		{return ':';}
+";"		{return ';';}
+"|"		{return '|';}
+"%%"		{return '%%';}
+"%prec"		{return 'PREC';}
+"%start"		{return 'START';}
+"%left"		{return 'LEFT';}
+"%right"		{return 'RIGHT';}
+"%nonassoc"		{return 'NONASSOC';}
+"%"[a-zA-Z]+[^\n]*		{/* ignore unrecognized decl */}
+"{{"[^}]*"}"		{return yy.lexAction(this);}
+"{"[^}]*"}"		{yytext = yytext.substr(1, yyleng-2); return 'ACTION';}
+"<"[^>]*">"		{yytext = yytext.substr(1, yyleng-2); return 'ACTION';}
+.		{/* ignore bad characters */}
+<<EOF>>		{return 'EOF';}
+
diff --git a/tests/lex/bnf.lex.json b/tests/lex/bnf.lex.json
new file mode 100644
index 0000000..0dc1f6c
--- /dev/null
+++ b/tests/lex/bnf.lex.json
@@ -0,0 +1,24 @@
+{
+    "rules": [
+      ["\\s+", "/* skip whitespace */"],
+      ["\\/\\*[^*]*\\*", "return yy.lexComment(this);"],
+      ["[a-zA-Z][a-zA-Z0-9_-]*", "return 'ID';"],
+      ["\"[^\"]+\"", "yytext = yytext.substr(1, yyleng-2); return 'STRING';"],
+      ["'[^']+'", "yytext = yytext.substr(1, yyleng-2); return 'STRING';"],
+      [":", "return ':';"],
+      [";", "return ';';"],
+      ["\\|", "return '|';"],
+      ["%%", "return '%%';"],
+      ["%prec\\b", "return 'PREC';"],
+      ["%start\\b", "return 'START';"],
+      ["%left\\b", "return 'LEFT';"],
+      ["%right\\b", "return 'RIGHT';"],
+      ["%nonassoc\\b", "return 'NONASSOC';"],
+      ["%[a-zA-Z]+[^\\n]*", "/* ignore unrecognized decl */"],
+      ["\\{\\{[^}]*\\}", "return yy.lexAction(this);"],
+      ["\\{[^}]*\\}", "yytext = yytext.substr(1, yyleng-2); return 'ACTION';"],
+      ["<[^>]*>", "yytext = yytext.substr(1, yyleng-2); return 'ACTION';"],
+      [".", "/* ignore bad characters */"],
+      ["$", "return 'EOF';"]
+    ]
+}
diff --git a/tests/lex/lex_grammar.jisonlex b/tests/lex/lex_grammar.jisonlex
new file mode 100644
index 0000000..ae7e896
--- /dev/null
+++ b/tests/lex/lex_grammar.jisonlex
@@ -0,0 +1,29 @@
+
+%%
+\n+    	{yy.freshLine = true;}
+\s+    	{yy.freshLine = false;}
+"y{"[^}]*"}"    	{yytext = yytext.substr(2, yytext.length-3);return 'ACTION';}
+[a-zA-Z_][a-zA-Z0-9_-]*    	{return 'NAME';}
+'"'([^"]|'\"')*'"'    	{return 'STRING_LIT';}
+"'"([^']|"\'")*"'"    	{return 'STRING_LIT';}
+"|"    	{return '|';}
+"["("\]"|[^\]])*"]"    	{return 'ANY_GROUP_REGEX';}
+"("    	{return '(';}
+")"    	{return ')';}
+"+"    	{return '+';}
+"*"    	{return '*';}
+"?"    	{return '?';}
+"^"    	{return '^';}
+"/"    	{return '/';}
+"\\"[a-zA-Z0]    	{return 'ESCAPE_CHAR';}
+"$"    	{return '$';}
+"<<EOF>>"    	{return '$';}
+"."    	{return '.';}
+"%%"    	{return '%%';}
+"{"\d+(","\s?\d+|",")?"}"    	{return 'RANGE_REGEX';}
+/"{"    	%{if(yy.freshLine){this.input('{');return '{';} else this.unput('y');%}
+"}"    	%{return '}';%}
+"%{"(.|\n)*?"}%"    	{yytext = yytext.substr(2, yytext.length-4);return 'ACTION';}
+.    	{/* ignore bad characters */}
+<<EOF>>    	{return 'EOF';}
+
diff --git a/tests/lex/lex_grammar.lex.json b/tests/lex/lex_grammar.lex.json
new file mode 100644
index 0000000..44d7aac
--- /dev/null
+++ b/tests/lex/lex_grammar.lex.json
@@ -0,0 +1,30 @@
+{
+    "rules": [
+        ["\\n+", "yy.freshLine = true;"],
+        ["\\s+", "yy.freshLine = false;"],
+        ["y\\{[^}]*\\}", "yytext = yytext.substr(2, yytext.length-3);return 'ACTION';"],
+        ["[a-zA-Z_][a-zA-Z0-9_-]*", "return 'NAME';"],
+        ["\"([^\"]|\\\\\")*\"", "return 'STRING_LIT';"],
+        ["'([^']|\\\\')*'", "return 'STRING_LIT';"],
+        ["\\|", "return '|';"],
+        ["\\[(\\\\\\]|[^\\]])*\\]", "return 'ANY_GROUP_REGEX';"],
+        ["\\(", "return '(';"],
+        ["\\)", "return ')';"],
+        ["\\+", "return '+';"],
+        ["\\*", "return '*';"],
+        ["\\?", "return '?';"],
+        ["\\^", "return '^';"],
+        ["\\/", "return '/';"],
+        ["\\\\[a-zA-Z0]", "return 'ESCAPE_CHAR';"],
+        ["\\$", "return '$';"],
+        ["<<EOF>>", "return '$';"],
+        ["\\.", "return '.';"],
+        ["%%", "return '%%';"],
+        ["\\{\\d+(,\\s?\\d+|,)?\\}", "return 'RANGE_REGEX';"],
+        ["(?=\\{)", "if(yy.freshLine){this.input('{');return '{';} else this.unput('y');"],
+        ["\\}", "return '}';"],
+        ["%\\{(.|\\n)*?\\}%", "yytext = yytext.substr(2, yytext.length-4);return 'ACTION';"],
+        [".", "/* ignore bad characters */"],
+        ["$", "return 'EOF';"]
+    ]
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-lex-parser.git



More information about the Pkg-javascript-commits mailing list