[Pkg-javascript-commits] [node-domhandler] 01/05: Import Upstream version 2.3.0
Paolo Greppi
paolog-guest at moszumanska.debian.org
Fri Dec 16 11:27:52 UTC 2016
This is an automated email from the git hooks/post-receive script.
paolog-guest pushed a commit to branch master
in repository node-domhandler.
commit 554298c65391036036dab5cf3d694005867882c8
Author: Paolo Greppi <paolo.greppi at libpf.com>
Date: Fri Dec 16 10:29:28 2016 +0000
Import Upstream version 2.3.0
---
.travis.yml | 7 ++
LICENSE | 11 ++
index.js | 182 +++++++++++++++++++++++++++
lib/element.js | 20 +++
lib/node.js | 44 +++++++
package.json | 40 ++++++
readme.md | 105 ++++++++++++++++
test/cases/01-basic.json | 57 +++++++++
test/cases/02-single_tag_1.json | 21 ++++
test/cases/03-single_tag_2.json | 21 ++++
test/cases/04-unescaped_in_script.json | 27 ++++
test/cases/05-tags_in_comment.json | 18 +++
test/cases/06-comment_in_script.json | 18 +++
test/cases/07-unescaped_in_style.json | 20 +++
test/cases/08-extra_spaces_in_tag.json | 20 +++
test/cases/09-unquoted_attrib.json | 20 +++
test/cases/10-singular_attribute.json | 15 +++
test/cases/11-text_outside_tags.json | 40 ++++++
test/cases/12-text_only.json | 11 ++
test/cases/13-comment_in_text.json | 19 +++
test/cases/14-comment_in_text_in_script.json | 18 +++
test/cases/15-non-verbose.json | 22 ++++
test/cases/16-normalize_whitespace.json | 47 +++++++
test/cases/17-xml_namespace.json | 18 +++
test/cases/18-enforce_empty_tags.json | 16 +++
test/cases/19-ignore_empty_tags.json | 20 +++
test/cases/20-template_script_tags.json | 20 +++
test/cases/21-conditional_comments.json | 15 +++
test/cases/22-lowercase_tags.json | 41 ++++++
test/cases/23-dom-lvl1.json | 131 +++++++++++++++++++
test/cases/24-with-start-indices.json | 85 +++++++++++++
test/tests.js | 60 +++++++++
32 files changed, 1209 insertions(+)
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..7f1b0a1
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,7 @@
+before_install:
+ - '[ "${TRAVIS_NODE_VERSION}" != "0.8" ] || npm install -g npm at 1.4.28'
+ - npm install -g npm at latest
+language: node_js
+node_js:
+ - 0.8
+ - 0.10
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..c464f86
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,11 @@
+Copyright (c) Felix Böhm
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR B [...]
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..41fd0bb
--- /dev/null
+++ b/index.js
@@ -0,0 +1,182 @@
+var ElementType = require("domelementtype");
+
+var re_whitespace = /\s+/g;
+var NodePrototype = require("./lib/node");
+var ElementPrototype = require("./lib/element");
+
+function DomHandler(callback, options, elementCB){
+ if(typeof callback === "object"){
+ elementCB = options;
+ options = callback;
+ callback = null;
+ } else if(typeof options === "function"){
+ elementCB = options;
+ options = defaultOpts;
+ }
+ this._callback = callback;
+ this._options = options || defaultOpts;
+ this._elementCB = elementCB;
+ this.dom = [];
+ this._done = false;
+ this._tagStack = [];
+ this._parser = this._parser || null;
+}
+
+//default options
+var defaultOpts = {
+ normalizeWhitespace: false, //Replace all whitespace with single spaces
+ withStartIndices: false, //Add startIndex properties to nodes
+};
+
+DomHandler.prototype.onparserinit = function(parser){
+ this._parser = parser;
+};
+
+//Resets the handler back to starting state
+DomHandler.prototype.onreset = function(){
+ DomHandler.call(this, this._callback, this._options, this._elementCB);
+};
+
+//Signals the handler that parsing is done
+DomHandler.prototype.onend = function(){
+ if(this._done) return;
+ this._done = true;
+ this._parser = null;
+ this._handleCallback(null);
+};
+
+DomHandler.prototype._handleCallback =
+DomHandler.prototype.onerror = function(error){
+ if(typeof this._callback === "function"){
+ this._callback(error, this.dom);
+ } else {
+ if(error) throw error;
+ }
+};
+
+DomHandler.prototype.onclosetag = function(){
+ //if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
+ var elem = this._tagStack.pop();
+ if(this._elementCB) this._elementCB(elem);
+};
+
+DomHandler.prototype._addDomElement = function(element){
+ var parent = this._tagStack[this._tagStack.length - 1];
+ var siblings = parent ? parent.children : this.dom;
+ var previousSibling = siblings[siblings.length - 1];
+
+ element.next = null;
+
+ if(this._options.withStartIndices){
+ element.startIndex = this._parser.startIndex;
+ }
+
+ if (this._options.withDomLvl1) {
+ element.__proto__ = element.type === "tag" ? ElementPrototype : NodePrototype;
+ }
+
+ if(previousSibling){
+ element.prev = previousSibling;
+ previousSibling.next = element;
+ } else {
+ element.prev = null;
+ }
+
+ siblings.push(element);
+ element.parent = parent || null;
+};
+
+DomHandler.prototype.onopentag = function(name, attribs){
+ var element = {
+ type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,
+ name: name,
+ attribs: attribs,
+ children: []
+ };
+
+ this._addDomElement(element);
+
+ this._tagStack.push(element);
+};
+
+DomHandler.prototype.ontext = function(data){
+ //the ignoreWhitespace is officially dropped, but for now,
+ //it's an alias for normalizeWhitespace
+ var normalize = this._options.normalizeWhitespace || this._options.ignoreWhitespace;
+
+ var lastTag;
+
+ if(!this._tagStack.length && this.dom.length && (lastTag = this.dom[this.dom.length-1]).type === ElementType.Text){
+ if(normalize){
+ lastTag.data = (lastTag.data + data).replace(re_whitespace, " ");
+ } else {
+ lastTag.data += data;
+ }
+ } else {
+ if(
+ this._tagStack.length &&
+ (lastTag = this._tagStack[this._tagStack.length - 1]) &&
+ (lastTag = lastTag.children[lastTag.children.length - 1]) &&
+ lastTag.type === ElementType.Text
+ ){
+ if(normalize){
+ lastTag.data = (lastTag.data + data).replace(re_whitespace, " ");
+ } else {
+ lastTag.data += data;
+ }
+ } else {
+ if(normalize){
+ data = data.replace(re_whitespace, " ");
+ }
+
+ this._addDomElement({
+ data: data,
+ type: ElementType.Text
+ });
+ }
+ }
+};
+
+DomHandler.prototype.oncomment = function(data){
+ var lastTag = this._tagStack[this._tagStack.length - 1];
+
+ if(lastTag && lastTag.type === ElementType.Comment){
+ lastTag.data += data;
+ return;
+ }
+
+ var element = {
+ data: data,
+ type: ElementType.Comment
+ };
+
+ this._addDomElement(element);
+ this._tagStack.push(element);
+};
+
+DomHandler.prototype.oncdatastart = function(){
+ var element = {
+ children: [{
+ data: "",
+ type: ElementType.Text
+ }],
+ type: ElementType.CDATA
+ };
+
+ this._addDomElement(element);
+ this._tagStack.push(element);
+};
+
+DomHandler.prototype.oncommentend = DomHandler.prototype.oncdataend = function(){
+ this._tagStack.pop();
+};
+
+DomHandler.prototype.onprocessinginstruction = function(name, data){
+ this._addDomElement({
+ name: name,
+ data: data,
+ type: ElementType.Directive
+ });
+};
+
+module.exports = DomHandler;
diff --git a/lib/element.js b/lib/element.js
new file mode 100644
index 0000000..e147215
--- /dev/null
+++ b/lib/element.js
@@ -0,0 +1,20 @@
+// DOM-Level-1-compliant structure
+var NodePrototype = require('./node');
+var ElementPrototype = module.exports = Object.create(NodePrototype);
+
+var domLvl1 = {
+ tagName: "name"
+};
+
+Object.keys(domLvl1).forEach(function(key) {
+ var shorthand = domLvl1[key];
+ Object.defineProperty(ElementPrototype, key, {
+ get: function() {
+ return this[shorthand] || null;
+ },
+ set: function(val) {
+ this[shorthand] = val;
+ return val;
+ }
+ });
+});
diff --git a/lib/node.js b/lib/node.js
new file mode 100644
index 0000000..7a36a9a
--- /dev/null
+++ b/lib/node.js
@@ -0,0 +1,44 @@
+// This object will be used as the prototype for Nodes when creating a
+// DOM-Level-1-compliant structure.
+var NodePrototype = module.exports = {
+ get firstChild() {
+ var children = this.children;
+ return children && children[0] || null;
+ },
+ get lastChild() {
+ var children = this.children;
+ return children && children[children.length - 1] || null;
+ },
+ get nodeType() {
+ return nodeTypes[this.type] || nodeTypes.element;
+ }
+};
+
+var domLvl1 = {
+ tagName: "name",
+ childNodes: "children",
+ parentNode: "parent",
+ previousSibling: "prev",
+ nextSibling: "next",
+ nodeValue: "data"
+};
+
+var nodeTypes = {
+ element: 1,
+ text: 3,
+ cdata: 4,
+ comment: 8
+};
+
+Object.keys(domLvl1).forEach(function(key) {
+ var shorthand = domLvl1[key];
+ Object.defineProperty(NodePrototype, key, {
+ get: function() {
+ return this[shorthand] || null;
+ },
+ set: function(val) {
+ this[shorthand] = val;
+ return val;
+ }
+ });
+});
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..4a63e33
--- /dev/null
+++ b/package.json
@@ -0,0 +1,40 @@
+{
+ "name": "domhandler",
+ "version": "2.3.0",
+ "description": "handler for htmlparser2 that turns pages into a dom",
+ "main": "index.js",
+ "directories": {
+ "test": "tests"
+ },
+ "scripts": {
+ "test": "mocha -R list && jshint index.js test/"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/fb55/DomHandler.git"
+ },
+ "keywords": [
+ "dom",
+ "htmlparser2"
+ ],
+ "dependencies": {
+ "domelementtype": "1"
+ },
+ "devDependencies": {
+ "htmlparser2": "3.8",
+ "mocha": "1",
+ "jshint": "~2.3.0"
+ },
+ "author": "Felix Boehm <me at feedic.com>",
+ "jshintConfig": {
+ "quotmark": "double",
+ "trailing": true,
+ "unused": true,
+ "undef": true,
+ "node": true,
+ "proto": true,
+ "globals": {
+ "it": true
+ }
+ }
+}
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..4301f60
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,105 @@
+#DOMHandler [![Build Status](https://secure.travis-ci.org/fb55/DomHandler.png)](http://travis-ci.org/fb55/DomHandler)
+
+The DOM handler (formally known as DefaultHandler) creates a tree containing all nodes of a page. The tree may be manipulated using the DOMUtils library.
+
+##Usage
+```javascript
+var handler = new DomHandler([ <func> callback(err, dom), ] [ <obj> options ]);
+// var parser = new Parser(handler[, options]);
+```
+
+##Example
+```javascript
+var htmlparser = require("htmlparser2");
+var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->";
+var handler = new htmlparser.DomHandler(function (error, dom) {
+ if (error)
+ [...do something for errors...]
+ else
+ [...parsing done, do something...]
+ console.log(dom);
+});
+var parser = new htmlparser.Parser(handler);
+parser.write(rawHtml);
+parser.done();
+```
+
+Output:
+
+```javascript
+[{
+ data: 'Xyz ',
+ type: 'text'
+}, {
+ type: 'script',
+ name: 'script',
+ attribs: {
+ language: 'javascript'
+ },
+ children: [{
+ data: 'var foo = \'<bar>\';<',
+ type: 'text'
+ }]
+}, {
+ data: '<!-- Waah! -- ',
+ type: 'comment'
+}]
+```
+
+##Option: normalizeWhitespace
+Indicates whether the whitespace in text nodes should be normalized (= all whitespace should be replaced with single spaces). The default value is "false".
+
+The following HTML will be used:
+
+```html
+<font>
+ <br>this is the text
+<font>
+```
+
+###Example: true
+
+```javascript
+[{
+ type: 'tag',
+ name: 'font',
+ children: [{
+ data: ' ',
+ type: 'text'
+ }, {
+ type: 'tag',
+ name: 'br'
+ }, {
+ data: 'this is the text ',
+ type: 'text'
+ }, {
+ type: 'tag',
+ name: 'font'
+ }]
+}]
+```
+
+###Example: false
+
+```javascript
+[{
+ type: 'tag',
+ name: 'font',
+ children: [{
+ data: '\n\t',
+ type: 'text'
+ }, {
+ type: 'tag',
+ name: 'br'
+ }, {
+ data: 'this is the text\n',
+ type: 'text'
+ }, {
+ type: 'tag',
+ name: 'font'
+ }]
+}]
+```
+
+##Option: withStartIndices
+Indicates whether a `startIndex` property will be added to nodes. When the parser is used in a non-streaming fashion, `startIndex` is an integer indicating the position of the start of the node in the document. The default value is "false".
diff --git a/test/cases/01-basic.json b/test/cases/01-basic.json
new file mode 100644
index 0000000..61759fd
--- /dev/null
+++ b/test/cases/01-basic.json
@@ -0,0 +1,57 @@
+{
+ "name": "Basic test",
+ "options": {},
+ "html": "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>",
+ "expected": [
+ {
+ "name": "!doctype",
+ "data": "!DOCTYPE html",
+ "type": "directive"
+ },
+ {
+ "type": "tag",
+ "name": "html",
+ "attribs": {},
+ "parent": null,
+ "children": [
+ {
+ "type": "tag",
+ "name": "title",
+ "attribs": {},
+ "parent": {
+ "type": "tag",
+ "name": "html",
+ "attribs": {}
+ },
+ "children": [
+ {
+ "data": "The Title",
+ "type": "text",
+ "parent": {
+ "type": "tag",
+ "name": "title",
+ "attribs": {}
+ }
+ }
+ ]
+ },
+ {
+ "type": "tag",
+ "name": "body",
+ "attribs": {},
+ "children": [
+ {
+ "data": "Hello world",
+ "type": "text"
+ }
+ ],
+ "prev": {
+ "type": "tag",
+ "name": "title",
+ "attribs": {}
+ }
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/02-single_tag_1.json b/test/cases/02-single_tag_1.json
new file mode 100644
index 0000000..51ff845
--- /dev/null
+++ b/test/cases/02-single_tag_1.json
@@ -0,0 +1,21 @@
+{
+ "name": "Single Tag 1",
+ "options": {},
+ "html": "<br>text</br>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": "text",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/03-single_tag_2.json b/test/cases/03-single_tag_2.json
new file mode 100644
index 0000000..1c56dc9
--- /dev/null
+++ b/test/cases/03-single_tag_2.json
@@ -0,0 +1,21 @@
+{
+ "name": "Single Tag 2",
+ "options": {},
+ "html": "<br>text<br>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": "text",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/04-unescaped_in_script.json b/test/cases/04-unescaped_in_script.json
new file mode 100644
index 0000000..f31f5fa
--- /dev/null
+++ b/test/cases/04-unescaped_in_script.json
@@ -0,0 +1,27 @@
+{
+ "name": "Unescaped chars in script",
+ "options": {},
+ "html": "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "head",
+ "attribs": {},
+ "children": [
+ {
+ "type": "script",
+ "name": "script",
+ "attribs": {
+ "language": "Javascript"
+ },
+ "children": [
+ {
+ "data": "var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/05-tags_in_comment.json b/test/cases/05-tags_in_comment.json
new file mode 100644
index 0000000..2d22d9e
--- /dev/null
+++ b/test/cases/05-tags_in_comment.json
@@ -0,0 +1,18 @@
+{
+ "name": "Special char in comment",
+ "options": {},
+ "html": "<head><!-- commented out tags <title>Test</title>--></head>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "head",
+ "attribs": {},
+ "children": [
+ {
+ "data": " commented out tags <title>Test</title>",
+ "type": "comment"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/06-comment_in_script.json b/test/cases/06-comment_in_script.json
new file mode 100644
index 0000000..9a21cda
--- /dev/null
+++ b/test/cases/06-comment_in_script.json
@@ -0,0 +1,18 @@
+{
+ "name": "Script source in comment",
+ "options": {},
+ "html": "<script><!--var foo = 1;--></script>",
+ "expected": [
+ {
+ "type": "script",
+ "name": "script",
+ "attribs": {},
+ "children": [
+ {
+ "data": "<!--var foo = 1;-->",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/07-unescaped_in_style.json b/test/cases/07-unescaped_in_style.json
new file mode 100644
index 0000000..77438fd
--- /dev/null
+++ b/test/cases/07-unescaped_in_style.json
@@ -0,0 +1,20 @@
+{
+ "name": "Unescaped chars in style",
+ "options": {},
+ "html": "<style type=\"text/css\">\n body > p\n\t{ font-weight: bold; }</style>",
+ "expected": [
+ {
+ "type": "style",
+ "name": "style",
+ "attribs": {
+ "type": "text/css"
+ },
+ "children": [
+ {
+ "data": "\n body > p\n\t{ font-weight: bold; }",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/08-extra_spaces_in_tag.json b/test/cases/08-extra_spaces_in_tag.json
new file mode 100644
index 0000000..5c2492e
--- /dev/null
+++ b/test/cases/08-extra_spaces_in_tag.json
@@ -0,0 +1,20 @@
+{
+ "name": "Extra spaces in tag",
+ "options": {},
+ "html": "<font\t\n size='14' \n>the text</\t\nfont\t \n>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {
+ "size": "14"
+ },
+ "children": [
+ {
+ "data": "the text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/09-unquoted_attrib.json b/test/cases/09-unquoted_attrib.json
new file mode 100644
index 0000000..543ccee
--- /dev/null
+++ b/test/cases/09-unquoted_attrib.json
@@ -0,0 +1,20 @@
+{
+ "name": "Unquoted attributes",
+ "options": {},
+ "html": "<font size= 14>the text</font>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {
+ "size": "14"
+ },
+ "children": [
+ {
+ "data": "the text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/10-singular_attribute.json b/test/cases/10-singular_attribute.json
new file mode 100644
index 0000000..544636e
--- /dev/null
+++ b/test/cases/10-singular_attribute.json
@@ -0,0 +1,15 @@
+{
+ "name": "Singular attribute",
+ "options": {},
+ "html": "<option value='foo' selected>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "option",
+ "attribs": {
+ "value": "foo",
+ "selected": ""
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/11-text_outside_tags.json b/test/cases/11-text_outside_tags.json
new file mode 100644
index 0000000..d328572
--- /dev/null
+++ b/test/cases/11-text_outside_tags.json
@@ -0,0 +1,40 @@
+{
+ "name": "Text outside tags",
+ "options": {},
+ "html": "Line one\n<br>\nline two",
+ "expected": [
+ {
+ "data": "Line one\n",
+ "type": "text",
+ "prev": null,
+ "next": {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ }
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {},
+ "prev": {
+ "data": "Line one\n",
+ "type": "text"
+ },
+ "next": {
+ "data": "\nline two",
+ "type": "text"
+ }
+ },
+ {
+ "data": "\nline two",
+ "type": "text",
+ "prev": {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ "next": null
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/12-text_only.json b/test/cases/12-text_only.json
new file mode 100644
index 0000000..342dc45
--- /dev/null
+++ b/test/cases/12-text_only.json
@@ -0,0 +1,11 @@
+{
+ "name": "Only text",
+ "options": {},
+ "html": "this is the text",
+ "expected": [
+ {
+ "data": "this is the text",
+ "type": "text"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/13-comment_in_text.json b/test/cases/13-comment_in_text.json
new file mode 100644
index 0000000..672dbbe
--- /dev/null
+++ b/test/cases/13-comment_in_text.json
@@ -0,0 +1,19 @@
+{
+ "name": "Comment within text",
+ "options": {},
+ "html": "this is <!-- the comment --> the text",
+ "expected": [
+ {
+ "data": "this is ",
+ "type": "text"
+ },
+ {
+ "data": " the comment ",
+ "type": "comment"
+ },
+ {
+ "data": " the text",
+ "type": "text"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/14-comment_in_text_in_script.json b/test/cases/14-comment_in_text_in_script.json
new file mode 100644
index 0000000..b69d04b
--- /dev/null
+++ b/test/cases/14-comment_in_text_in_script.json
@@ -0,0 +1,18 @@
+{
+ "name": "Comment within text within script",
+ "options": {},
+ "html": "<script>this is <!-- the comment --> the text</script>",
+ "expected": [
+ {
+ "type": "script",
+ "name": "script",
+ "attribs": {},
+ "children": [
+ {
+ "data": "this is <!-- the comment --> the text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/15-non-verbose.json b/test/cases/15-non-verbose.json
new file mode 100644
index 0000000..069db84
--- /dev/null
+++ b/test/cases/15-non-verbose.json
@@ -0,0 +1,22 @@
+{
+ "name": "Option 'verbose' set to 'false'",
+ "options": {
+ "verbose": false
+ },
+ "html": "<font\t\n size='14' \n>the text</\t\nfont\t \n>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {
+ "size": "14"
+ },
+ "children": [
+ {
+ "data": "the text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/16-normalize_whitespace.json b/test/cases/16-normalize_whitespace.json
new file mode 100644
index 0000000..d4490af
--- /dev/null
+++ b/test/cases/16-normalize_whitespace.json
@@ -0,0 +1,47 @@
+{
+ "name": "Normalize whitespace",
+ "options": {
+ "normalizeWhitespace": true
+ },
+ "html": "Line one\n<br>\t \r\n\f <br>\nline two<font><br> x </font>",
+ "expected": [
+ {
+ "data": "Line one ",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " ",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " line two",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {},
+ "children": [
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " x ",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/17-xml_namespace.json b/test/cases/17-xml_namespace.json
new file mode 100644
index 0000000..2171c49
--- /dev/null
+++ b/test/cases/17-xml_namespace.json
@@ -0,0 +1,18 @@
+{
+ "name": "XML Namespace",
+ "options": {},
+ "html": "<ns:tag>text</ns:tag>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "ns:tag",
+ "attribs": {},
+ "children": [
+ {
+ "data": "text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/18-enforce_empty_tags.json b/test/cases/18-enforce_empty_tags.json
new file mode 100644
index 0000000..ce6c455
--- /dev/null
+++ b/test/cases/18-enforce_empty_tags.json
@@ -0,0 +1,16 @@
+{
+ "name": "Enforce empty tags",
+ "options": {},
+ "html": "<link>text</link>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "link",
+ "attribs": {}
+ },
+ {
+ "data": "text",
+ "type": "text"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/19-ignore_empty_tags.json b/test/cases/19-ignore_empty_tags.json
new file mode 100644
index 0000000..fe59cf9
--- /dev/null
+++ b/test/cases/19-ignore_empty_tags.json
@@ -0,0 +1,20 @@
+{
+ "name": "Ignore empty tags (xml mode)",
+ "options": {
+ "xmlMode": true
+ },
+ "html": "<link>text</link>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "link",
+ "attribs": {},
+ "children": [
+ {
+ "data": "text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/20-template_script_tags.json b/test/cases/20-template_script_tags.json
new file mode 100644
index 0000000..dae5f1f
--- /dev/null
+++ b/test/cases/20-template_script_tags.json
@@ -0,0 +1,20 @@
+{
+ "name": "Template script tags",
+ "options": {},
+ "html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
+ "expected": [
+ {
+ "type": "script",
+ "name": "script",
+ "attribs": {
+ "type": "text/template"
+ },
+ "children": [
+ {
+ "data": "<h1>Heading1</h1>",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/21-conditional_comments.json b/test/cases/21-conditional_comments.json
new file mode 100644
index 0000000..c034acd
--- /dev/null
+++ b/test/cases/21-conditional_comments.json
@@ -0,0 +1,15 @@
+{
+ "name": "Conditional comments",
+ "options": {},
+ "html": "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->",
+ "expected": [
+ {
+ "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
+ "type": "comment"
+ },
+ {
+ "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
+ "type": "comment"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/22-lowercase_tags.json b/test/cases/22-lowercase_tags.json
new file mode 100644
index 0000000..b3d70e4
--- /dev/null
+++ b/test/cases/22-lowercase_tags.json
@@ -0,0 +1,41 @@
+{
+ "name": "lowercase tags",
+ "options": {},
+ "html": "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>",
+ "expected": [
+ {
+ "name": "!doctype",
+ "data": "!DOCTYPE html",
+ "type": "directive"
+ },
+ {
+ "type": "tag",
+ "name": "html",
+ "attribs": {},
+ "children": [
+ {
+ "type": "tag",
+ "name": "title",
+ "attribs": {},
+ "children": [
+ {
+ "data": "The Title",
+ "type": "text"
+ }
+ ]
+ },
+ {
+ "type": "tag",
+ "name": "body",
+ "attribs": {},
+ "children": [
+ {
+ "data": "Hello world",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/cases/23-dom-lvl1.json b/test/cases/23-dom-lvl1.json
new file mode 100644
index 0000000..ab25d88
--- /dev/null
+++ b/test/cases/23-dom-lvl1.json
@@ -0,0 +1,131 @@
+{
+ "name": "DOM level 1",
+ "options": { "withDomLvl1": true },
+ "html": "<div>some stray text<h1>Hello, world.</h1><!-- comment node -->more stray text</div>",
+ "expected": [
+ {
+ "type": "tag",
+ "nodeType": 1,
+ "name": "div",
+ "tagName": "div",
+ "attribs": {},
+ "nodeValue": null,
+ "children": [
+ {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "some stray text",
+ "nodeValue": "some stray text",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ },
+ {
+ "type": "tag",
+ "nodeType": 1,
+ "name": "h1",
+ "tagName": "h1",
+ "nodeValue": null,
+ "attribs": {},
+ "children": [
+ {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "Hello, world.",
+ "nodeValue": "Hello, world.",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ }
+ ],
+ "firstChild": {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "Hello, world.",
+ "nodeValue": "Hello, world.",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ },
+ "lastChild": {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "Hello, world.",
+ "nodeValue": "Hello, world.",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ }
+ },
+ {
+ "type": "comment",
+ "nodeType": 8,
+ "tagName": null,
+ "data": " comment node ",
+ "nodeValue": " comment node ",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null,
+ "prev": {
+ "type": "tag",
+ "name": "h1",
+ "nodeValue": null,
+ "attribs": {}
+ },
+ "previousSibling": {
+ "type": "tag",
+ "name": "h1",
+ "nodeValue": null,
+ "attribs": {}
+ },
+ "next": {
+ "type": "text",
+ "tagName": null,
+ "data": "more stray text"
+ },
+ "nextSibling": {
+ "type": "text",
+ "tagName": null,
+ "data": "more stray text"
+ }
+ },
+ {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "more stray text",
+ "nodeValue": "more stray text",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null,
+ "next": null,
+ "nextSibling": null
+ }
+ ],
+ "firstChild": {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "some stray text",
+ "nodeValue": "some stray text",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ },
+ "lastChild": {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "more stray text",
+ "nodeValue": "more stray text",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ }
+ }
+ ]
+}
diff --git a/test/cases/24-with-start-indices.json b/test/cases/24-with-start-indices.json
new file mode 100644
index 0000000..02228f1
--- /dev/null
+++ b/test/cases/24-with-start-indices.json
@@ -0,0 +1,85 @@
+{
+ "name": "withStartIndices adds correct startIndex properties",
+ "options": {"withStartIndices": true},
+ "streaming": false,
+ "html": "<!DOCTYPE html> <html> <title>The Title</title> <body class='foo'>Hello world <p></p></body> <!-- the comment --> </html> ",
+ "expected": [
+ {
+ "startIndex": 0,
+ "name": "!doctype",
+ "data": "!DOCTYPE html",
+ "type": "directive"
+ },
+ {
+ "type": "text",
+ "data": " "
+ },
+ {
+ "startIndex": 16,
+ "type": "tag",
+ "name": "html",
+ "attribs": {},
+ "parent": null,
+ "children": [
+ {
+ "startIndex": 22,
+ "type": "text",
+ "data": " "
+ },
+ {
+ "startIndex": 23,
+ "type": "tag",
+ "name": "title",
+ "attribs": {},
+ "children": [
+ {
+ "startIndex": 30,
+ "data": "The Title",
+ "type": "text"
+ }
+ ]
+ },
+ {
+ "startIndex": 47,
+ "type": "text",
+ "data": " "
+ },
+ {
+ "startIndex": 48,
+ "type": "tag",
+ "name": "body",
+ "attribs": {"class": "foo"},
+ "children": [
+ {
+ "startIndex": 66,
+ "data": "Hello world ",
+ "type": "text"
+ },
+ {
+ "startIndex": 78,
+ "type": "tag",
+ "name": "p",
+ "attribs": {},
+ "children": []
+ }
+ ]
+ },
+ {
+ "startIndex": 92,
+ "type": "text",
+ "data": " "
+ },
+ {
+ "startIndex": 93,
+ "type": "comment",
+ "data": " the comment "
+ },
+ {
+ "startIndex": 113,
+ "type": "text",
+ "data": " "
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/tests.js b/test/tests.js
new file mode 100644
index 0000000..6dc96a2
--- /dev/null
+++ b/test/tests.js
@@ -0,0 +1,60 @@
+var fs = require("fs"),
+ path = require("path"),
+ assert = require("assert"),
+ util = require("util"),
+ Parser = require("htmlparser2").Parser,
+ Handler = require("../");
+
+var basePath = path.resolve(__dirname, "cases"),
+ inspectOpts = { showHidden: true, depth: null };
+
+fs
+.readdirSync(basePath)
+.filter(RegExp.prototype.test, /\.json$/) //only allow .json files
+.map(function(name){
+ return path.resolve(basePath, name);
+})
+.map(require)
+.forEach(function(test){
+ it(test.name, function(){
+ var expected = test.expected;
+
+ var handler = new Handler(function(err, actual){
+ assert.ifError(err);
+ try {
+ compare(expected, actual);
+ } catch(e){
+ e.expected = util.inspect(expected, inspectOpts);
+ e.actual = util.inspect(actual, inspectOpts);
+ throw e;
+ }
+ }, test.options);
+
+ var data = test.html;
+
+ var parser = new Parser(handler, test.options);
+
+ //first, try to run the test via chunks
+ if (test.streaming || test.streaming === undefined){
+ for(var i = 0; i < data.length; i++){
+ parser.write(data.charAt(i));
+ }
+ parser.done();
+ }
+
+ //then parse everything
+ parser.parseComplete(data);
+ });
+});
+
+function compare(expected, result){
+ assert.equal(typeof expected, typeof result, "types didn't match");
+ if(typeof expected !== "object" || expected === null){
+ assert.strictEqual(expected, result, "result doesn't equal expected");
+ } else {
+ for(var prop in expected){
+ assert.ok(prop in result, "result didn't contain property " + prop);
+ compare(expected[prop], result[prop]);
+ }
+ }
+}
\ No newline at end of file
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-domhandler.git
More information about the Pkg-javascript-commits
mailing list