[Pkg-javascript-commits] [node-domhandler] 01/05: Import Upstream version 2.3.0
Paolo Greppi
paolog-guest at moszumanska.debian.org
Fri Dec 16 11:27:52 UTC 2016
This is an automated email from the git hooks/post-receive script.
paolog-guest pushed a commit to branch master
in repository node-domhandler.
commit 554298c65391036036dab5cf3d694005867882c8
Author: Paolo Greppi <paolo.greppi at libpf.com>
Date: Fri Dec 16 10:29:28 2016 +0000
Import Upstream version 2.3.0
.travis.yml | 7 ++
LICENSE | 11 ++
index.js | 182 +++++++++++++++++++++++++++
lib/element.js | 20 +++
lib/node.js | 44 +++++++
package.json | 40 ++++++
readme.md | 105 ++++++++++++++++
test/cases/01-basic.json | 57 +++++++++
test/cases/02-single_tag_1.json | 21 ++++
test/cases/03-single_tag_2.json | 21 ++++
test/cases/04-unescaped_in_script.json | 27 ++++
test/cases/05-tags_in_comment.json | 18 +++
test/cases/06-comment_in_script.json | 18 +++
test/cases/07-unescaped_in_style.json | 20 +++
test/cases/08-extra_spaces_in_tag.json | 20 +++
test/cases/09-unquoted_attrib.json | 20 +++
test/cases/10-singular_attribute.json | 15 +++
test/cases/11-text_outside_tags.json | 40 ++++++
test/cases/12-text_only.json | 11 ++
test/cases/13-comment_in_text.json | 19 +++
test/cases/14-comment_in_text_in_script.json | 18 +++
test/cases/15-non-verbose.json | 22 ++++
test/cases/16-normalize_whitespace.json | 47 +++++++
test/cases/17-xml_namespace.json | 18 +++
test/cases/18-enforce_empty_tags.json | 16 +++
test/cases/19-ignore_empty_tags.json | 20 +++
test/cases/20-template_script_tags.json | 20 +++
test/cases/21-conditional_comments.json | 15 +++
test/cases/22-lowercase_tags.json | 41 ++++++
test/cases/23-dom-lvl1.json | 131 +++++++++++++++++++
test/cases/24-with-start-indices.json | 85 +++++++++++++
test/tests.js | 60 +++++++++
32 files changed, 1209 insertions(+)
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..7f1b0a1
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,7 @@
+ - '[ "${TRAVIS_NODE_VERSION}" != "0.8" ] || npm install -g npm at 1.4.28'
+ - npm install -g npm at latest
+language: node_js
+ - 0.8
+ - 0.10
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..c464f86
--- /dev/null
@@ -0,0 +1,11 @@
+Copyright (c) Felix Böhm
+All rights reserved.
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..41fd0bb
--- /dev/null
+++ b/index.js
@@ -0,0 +1,182 @@
+var ElementType = require("domelementtype");
+var re_whitespace = /\s+/g;
+var NodePrototype = require("./lib/node");
+var ElementPrototype = require("./lib/element");
+function DomHandler(callback, options, elementCB){
+ if(typeof callback === "object"){
+ elementCB = options;
+ options = callback;
+ callback = null;
+ } else if(typeof options === "function"){
+ elementCB = options;
+ options = defaultOpts;
+ }
+ this._callback = callback;
+ this._options = options || defaultOpts;
+ this._elementCB = elementCB;
+ this.dom = [];
+ this._done = false;
+ this._tagStack = [];
+ this._parser = this._parser || null;
+//default options
+var defaultOpts = {
+ normalizeWhitespace: false, //Replace all whitespace with single spaces
+ withStartIndices: false, //Add startIndex properties to nodes
+DomHandler.prototype.onparserinit = function(parser){
+ this._parser = parser;
+//Resets the handler back to starting state
+DomHandler.prototype.onreset = function(){
+ DomHandler.call(this, this._callback, this._options, this._elementCB);
+//Signals the handler that parsing is done
+DomHandler.prototype.onend = function(){
+ if(this._done) return;
+ this._done = true;
+ this._parser = null;
+ this._handleCallback(null);
+DomHandler.prototype._handleCallback =
+DomHandler.prototype.onerror = function(error){
+ if(typeof this._callback === "function"){
+ this._callback(error, this.dom);
+ } else {
+ if(error) throw error;
+ }
+DomHandler.prototype.onclosetag = function(){
+ //if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
+ var elem = this._tagStack.pop();
+ if(this._elementCB) this._elementCB(elem);
+DomHandler.prototype._addDomElement = function(element){
+ var parent = this._tagStack[this._tagStack.length - 1];
+ var siblings = parent ? parent.children : this.dom;
+ var previousSibling = siblings[siblings.length - 1];
+ element.next = null;
+ if(this._options.withStartIndices){
+ element.startIndex = this._parser.startIndex;
+ }
+ if (this._options.withDomLvl1) {
+ element.__proto__ = element.type === "tag" ? ElementPrototype : NodePrototype;
+ }
+ if(previousSibling){
+ element.prev = previousSibling;
+ previousSibling.next = element;
+ } else {
+ element.prev = null;
+ }
+ siblings.push(element);
+ element.parent = parent || null;
+DomHandler.prototype.onopentag = function(name, attribs){
+ var element = {
+ type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,
+ name: name,
+ attribs: attribs,
+ children: []
+ };
+ this._addDomElement(element);
+ this._tagStack.push(element);
+DomHandler.prototype.ontext = function(data){
+ //the ignoreWhitespace is officially dropped, but for now,
+ //it's an alias for normalizeWhitespace
+ var normalize = this._options.normalizeWhitespace || this._options.ignoreWhitespace;
+ var lastTag;
+ if(!this._tagStack.length && this.dom.length && (lastTag = this.dom[this.dom.length-1]).type === ElementType.Text){
+ if(normalize){
+ lastTag.data = (lastTag.data + data).replace(re_whitespace, " ");
+ } else {
+ lastTag.data += data;
+ }
+ } else {
+ if(
+ this._tagStack.length &&
+ (lastTag = this._tagStack[this._tagStack.length - 1]) &&
+ (lastTag = lastTag.children[lastTag.children.length - 1]) &&
+ lastTag.type === ElementType.Text
+ ){
+ if(normalize){
+ lastTag.data = (lastTag.data + data).replace(re_whitespace, " ");
+ } else {
+ lastTag.data += data;
+ }
+ } else {
+ if(normalize){
+ data = data.replace(re_whitespace, " ");
+ }
+ this._addDomElement({
+ data: data,
+ type: ElementType.Text
+ });
+ }
+ }
+DomHandler.prototype.oncomment = function(data){
+ var lastTag = this._tagStack[this._tagStack.length - 1];
+ if(lastTag && lastTag.type === ElementType.Comment){
+ lastTag.data += data;
+ return;
+ }
+ var element = {
+ data: data,
+ type: ElementType.Comment
+ };
+ this._addDomElement(element);
+ this._tagStack.push(element);
+DomHandler.prototype.oncdatastart = function(){
+ var element = {
+ children: [{
+ data: "",
+ type: ElementType.Text
+ }],
+ type: ElementType.CDATA
+ };
+ this._addDomElement(element);
+ this._tagStack.push(element);
+DomHandler.prototype.oncommentend = DomHandler.prototype.oncdataend = function(){
+ this._tagStack.pop();
+DomHandler.prototype.onprocessinginstruction = function(name, data){
+ this._addDomElement({
+ name: name,
+ data: data,
+ type: ElementType.Directive
+ });
+module.exports = DomHandler;
diff --git a/lib/element.js b/lib/element.js
new file mode 100644
index 0000000..e147215
--- /dev/null
+++ b/lib/element.js
@@ -0,0 +1,20 @@
+// DOM-Level-1-compliant structure
+var NodePrototype = require('./node');
+var ElementPrototype = module.exports = Object.create(NodePrototype);
+var domLvl1 = {
+ tagName: "name"
+Object.keys(domLvl1).forEach(function(key) {
+ var shorthand = domLvl1[key];
+ Object.defineProperty(ElementPrototype, key, {
+ get: function() {
+ return this[shorthand] || null;
+ },
+ set: function(val) {
+ this[shorthand] = val;
+ return val;
+ }
+ });
diff --git a/lib/node.js b/lib/node.js
new file mode 100644
index 0000000..7a36a9a
--- /dev/null
+++ b/lib/node.js
@@ -0,0 +1,44 @@
+// This object will be used as the prototype for Nodes when creating a
+// DOM-Level-1-compliant structure.
+var NodePrototype = module.exports = {
+ get firstChild() {
+ var children = this.children;
+ return children && children[0] || null;
+ },
+ get lastChild() {
+ var children = this.children;
+ return children && children[children.length - 1] || null;
+ },
+ get nodeType() {
+ return nodeTypes[this.type] || nodeTypes.element;
+ }
+var domLvl1 = {
+ tagName: "name",
+ childNodes: "children",
+ parentNode: "parent",
+ previousSibling: "prev",
+ nextSibling: "next",
+ nodeValue: "data"
+var nodeTypes = {
+ element: 1,
+ text: 3,
+ cdata: 4,
+ comment: 8
+Object.keys(domLvl1).forEach(function(key) {
+ var shorthand = domLvl1[key];
+ Object.defineProperty(NodePrototype, key, {
+ get: function() {
+ return this[shorthand] || null;
+ },
+ set: function(val) {
+ this[shorthand] = val;
+ return val;
+ }
+ });
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..4a63e33
--- /dev/null
+++ b/package.json
@@ -0,0 +1,40 @@
+ "name": "domhandler",
+ "version": "2.3.0",
+ "description": "handler for htmlparser2 that turns pages into a dom",
+ "main": "index.js",
+ "directories": {
+ "test": "tests"
+ },
+ "scripts": {
+ "test": "mocha -R list && jshint index.js test/"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/fb55/DomHandler.git"
+ },
+ "keywords": [
+ "dom",
+ "htmlparser2"
+ ],
+ "dependencies": {
+ "domelementtype": "1"
+ },
+ "devDependencies": {
+ "htmlparser2": "3.8",
+ "mocha": "1",
+ "jshint": "~2.3.0"
+ },
+ "author": "Felix Boehm <me at feedic.com>",
+ "jshintConfig": {
+ "quotmark": "double",
+ "trailing": true,
+ "unused": true,
+ "undef": true,
+ "node": true,
+ "proto": true,
+ "globals": {
+ "it": true
+ }
+ }
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..4301f60
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,105 @@
+#DOMHandler [](http://travis-ci.org/fb55/DomHandler)
+The DOM handler (formally known as DefaultHandler) creates a tree containing all nodes of a page. The tree may be manipulated using the DOMUtils library.
+var handler = new DomHandler([ <func> callback(err, dom), ] [ <obj> options ]);
+// var parser = new Parser(handler[, options]);
+var htmlparser = require("htmlparser2");
+var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->";
+var handler = new htmlparser.DomHandler(function (error, dom) {
+ if (error)
+ [...do something for errors...]
+ else
+ [...parsing done, do something...]
+ console.log(dom);
+var parser = new htmlparser.Parser(handler);
+ data: 'Xyz ',
+ type: 'text'
+}, {
+ type: 'script',
+ name: 'script',
+ attribs: {
+ language: 'javascript'
+ },
+ children: [{
+ data: 'var foo = \'<bar>\';<',
+ type: 'text'
+ }]
+}, {
+ data: '<!-- Waah! -- ',
+ type: 'comment'
+##Option: normalizeWhitespace
+Indicates whether the whitespace in text nodes should be normalized (= all whitespace should be replaced with single spaces). The default value is "false".
+The following HTML will be used:
+ <br>this is the text
+###Example: true
+ type: 'tag',
+ name: 'font',
+ children: [{
+ data: ' ',
+ type: 'text'
+ }, {
+ type: 'tag',
+ name: 'br'
+ }, {
+ data: 'this is the text ',
+ type: 'text'
+ }, {
+ type: 'tag',
+ name: 'font'
+ }]
+###Example: false
+ type: 'tag',
+ name: 'font',
+ children: [{
+ data: '\n\t',
+ type: 'text'
+ }, {
+ type: 'tag',
+ name: 'br'
+ }, {
+ data: 'this is the text\n',
+ type: 'text'
+ }, {
+ type: 'tag',
+ name: 'font'
+ }]
+##Option: withStartIndices
+Indicates whether a `startIndex` property will be added to nodes. When the parser is used in a non-streaming fashion, `startIndex` is an integer indicating the position of the start of the node in the document. The default value is "false".
diff --git a/test/cases/01-basic.json b/test/cases/01-basic.json
new file mode 100644
index 0000000..61759fd
--- /dev/null
+++ b/test/cases/01-basic.json
@@ -0,0 +1,57 @@
+ "name": "Basic test",
+ "options": {},
+ "html": "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>",
+ "expected": [
+ {
+ "name": "!doctype",
+ "data": "!DOCTYPE html",
+ "type": "directive"
+ },
+ {
+ "type": "tag",
+ "name": "html",
+ "attribs": {},
+ "parent": null,
+ "children": [
+ {
+ "type": "tag",
+ "name": "title",
+ "attribs": {},
+ "parent": {
+ "type": "tag",
+ "name": "html",
+ "attribs": {}
+ },
+ "children": [
+ {
+ "data": "The Title",
+ "type": "text",
+ "parent": {
+ "type": "tag",
+ "name": "title",
+ "attribs": {}
+ }
+ }
+ ]
+ },
+ {
+ "type": "tag",
+ "name": "body",
+ "attribs": {},
+ "children": [
+ {
+ "data": "Hello world",
+ "type": "text"
+ }
+ ],
+ "prev": {
+ "type": "tag",
+ "name": "title",
+ "attribs": {}
+ }
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/02-single_tag_1.json b/test/cases/02-single_tag_1.json
new file mode 100644
index 0000000..51ff845
--- /dev/null
+++ b/test/cases/02-single_tag_1.json
@@ -0,0 +1,21 @@
+ "name": "Single Tag 1",
+ "options": {},
+ "html": "<br>text</br>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": "text",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/03-single_tag_2.json b/test/cases/03-single_tag_2.json
new file mode 100644
index 0000000..1c56dc9
--- /dev/null
+++ b/test/cases/03-single_tag_2.json
@@ -0,0 +1,21 @@
+ "name": "Single Tag 2",
+ "options": {},
+ "html": "<br>text<br>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": "text",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/04-unescaped_in_script.json b/test/cases/04-unescaped_in_script.json
new file mode 100644
index 0000000..f31f5fa
--- /dev/null
+++ b/test/cases/04-unescaped_in_script.json
@@ -0,0 +1,27 @@
+ "name": "Unescaped chars in script",
+ "options": {},
+ "html": "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "head",
+ "attribs": {},
+ "children": [
+ {
+ "type": "script",
+ "name": "script",
+ "attribs": {
+ "language": "Javascript"
+ },
+ "children": [
+ {
+ "data": "var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/05-tags_in_comment.json b/test/cases/05-tags_in_comment.json
new file mode 100644
index 0000000..2d22d9e
--- /dev/null
+++ b/test/cases/05-tags_in_comment.json
@@ -0,0 +1,18 @@
+ "name": "Special char in comment",
+ "options": {},
+ "html": "<head><!-- commented out tags <title>Test</title>--></head>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "head",
+ "attribs": {},
+ "children": [
+ {
+ "data": " commented out tags <title>Test</title>",
+ "type": "comment"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/06-comment_in_script.json b/test/cases/06-comment_in_script.json
new file mode 100644
index 0000000..9a21cda
--- /dev/null
+++ b/test/cases/06-comment_in_script.json
@@ -0,0 +1,18 @@
+ "name": "Script source in comment",
+ "options": {},
+ "html": "<script><!--var foo = 1;--></script>",
+ "expected": [
+ {
+ "type": "script",
+ "name": "script",
+ "attribs": {},
+ "children": [
+ {
+ "data": "<!--var foo = 1;-->",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/07-unescaped_in_style.json b/test/cases/07-unescaped_in_style.json
new file mode 100644
index 0000000..77438fd
--- /dev/null
+++ b/test/cases/07-unescaped_in_style.json
@@ -0,0 +1,20 @@
+ "name": "Unescaped chars in style",
+ "options": {},
+ "html": "<style type=\"text/css\">\n body > p\n\t{ font-weight: bold; }</style>",
+ "expected": [
+ {
+ "type": "style",
+ "name": "style",
+ "attribs": {
+ "type": "text/css"
+ },
+ "children": [
+ {
+ "data": "\n body > p\n\t{ font-weight: bold; }",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/08-extra_spaces_in_tag.json b/test/cases/08-extra_spaces_in_tag.json
new file mode 100644
index 0000000..5c2492e
--- /dev/null
+++ b/test/cases/08-extra_spaces_in_tag.json
@@ -0,0 +1,20 @@
+ "name": "Extra spaces in tag",
+ "options": {},
+ "html": "<font\t\n size='14' \n>the text</\t\nfont\t \n>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {
+ "size": "14"
+ },
+ "children": [
+ {
+ "data": "the text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/09-unquoted_attrib.json b/test/cases/09-unquoted_attrib.json
new file mode 100644
index 0000000..543ccee
--- /dev/null
+++ b/test/cases/09-unquoted_attrib.json
@@ -0,0 +1,20 @@
+ "name": "Unquoted attributes",
+ "options": {},
+ "html": "<font size= 14>the text</font>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {
+ "size": "14"
+ },
+ "children": [
+ {
+ "data": "the text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/10-singular_attribute.json b/test/cases/10-singular_attribute.json
new file mode 100644
index 0000000..544636e
--- /dev/null
+++ b/test/cases/10-singular_attribute.json
@@ -0,0 +1,15 @@
+ "name": "Singular attribute",
+ "options": {},
+ "html": "<option value='foo' selected>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "option",
+ "attribs": {
+ "value": "foo",
+ "selected": ""
+ }
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/11-text_outside_tags.json b/test/cases/11-text_outside_tags.json
new file mode 100644
index 0000000..d328572
--- /dev/null
+++ b/test/cases/11-text_outside_tags.json
@@ -0,0 +1,40 @@
+ "name": "Text outside tags",
+ "options": {},
+ "html": "Line one\n<br>\nline two",
+ "expected": [
+ {
+ "data": "Line one\n",
+ "type": "text",
+ "prev": null,
+ "next": {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ }
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {},
+ "prev": {
+ "data": "Line one\n",
+ "type": "text"
+ },
+ "next": {
+ "data": "\nline two",
+ "type": "text"
+ }
+ },
+ {
+ "data": "\nline two",
+ "type": "text",
+ "prev": {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ "next": null
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/12-text_only.json b/test/cases/12-text_only.json
new file mode 100644
index 0000000..342dc45
--- /dev/null
+++ b/test/cases/12-text_only.json
@@ -0,0 +1,11 @@
+ "name": "Only text",
+ "options": {},
+ "html": "this is the text",
+ "expected": [
+ {
+ "data": "this is the text",
+ "type": "text"
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/13-comment_in_text.json b/test/cases/13-comment_in_text.json
new file mode 100644
index 0000000..672dbbe
--- /dev/null
+++ b/test/cases/13-comment_in_text.json
@@ -0,0 +1,19 @@
+ "name": "Comment within text",
+ "options": {},
+ "html": "this is <!-- the comment --> the text",
+ "expected": [
+ {
+ "data": "this is ",
+ "type": "text"
+ },
+ {
+ "data": " the comment ",
+ "type": "comment"
+ },
+ {
+ "data": " the text",
+ "type": "text"
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/14-comment_in_text_in_script.json b/test/cases/14-comment_in_text_in_script.json
new file mode 100644
index 0000000..b69d04b
--- /dev/null
+++ b/test/cases/14-comment_in_text_in_script.json
@@ -0,0 +1,18 @@
+ "name": "Comment within text within script",
+ "options": {},
+ "html": "<script>this is <!-- the comment --> the text</script>",
+ "expected": [
+ {
+ "type": "script",
+ "name": "script",
+ "attribs": {},
+ "children": [
+ {
+ "data": "this is <!-- the comment --> the text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/15-non-verbose.json b/test/cases/15-non-verbose.json
new file mode 100644
index 0000000..069db84
--- /dev/null
+++ b/test/cases/15-non-verbose.json
@@ -0,0 +1,22 @@
+ "name": "Option 'verbose' set to 'false'",
+ "options": {
+ "verbose": false
+ },
+ "html": "<font\t\n size='14' \n>the text</\t\nfont\t \n>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {
+ "size": "14"
+ },
+ "children": [
+ {
+ "data": "the text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/16-normalize_whitespace.json b/test/cases/16-normalize_whitespace.json
new file mode 100644
index 0000000..d4490af
--- /dev/null
+++ b/test/cases/16-normalize_whitespace.json
@@ -0,0 +1,47 @@
+ "name": "Normalize whitespace",
+ "options": {
+ "normalizeWhitespace": true
+ },
+ "html": "Line one\n<br>\t \r\n\f <br>\nline two<font><br> x </font>",
+ "expected": [
+ {
+ "data": "Line one ",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " ",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " line two",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {},
+ "children": [
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " x ",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/17-xml_namespace.json b/test/cases/17-xml_namespace.json
new file mode 100644
index 0000000..2171c49
--- /dev/null
+++ b/test/cases/17-xml_namespace.json
@@ -0,0 +1,18 @@
+ "name": "XML Namespace",
+ "options": {},
+ "html": "<ns:tag>text</ns:tag>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "ns:tag",
+ "attribs": {},
+ "children": [
+ {
+ "data": "text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/18-enforce_empty_tags.json b/test/cases/18-enforce_empty_tags.json
new file mode 100644
index 0000000..ce6c455
--- /dev/null
+++ b/test/cases/18-enforce_empty_tags.json
@@ -0,0 +1,16 @@
+ "name": "Enforce empty tags",
+ "options": {},
+ "html": "<link>text</link>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "link",
+ "attribs": {}
+ },
+ {
+ "data": "text",
+ "type": "text"
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/19-ignore_empty_tags.json b/test/cases/19-ignore_empty_tags.json
new file mode 100644
index 0000000..fe59cf9
--- /dev/null
+++ b/test/cases/19-ignore_empty_tags.json
@@ -0,0 +1,20 @@
+ "name": "Ignore empty tags (xml mode)",
+ "options": {
+ "xmlMode": true
+ },
+ "html": "<link>text</link>",
+ "expected": [
+ {
+ "type": "tag",
+ "name": "link",
+ "attribs": {},
+ "children": [
+ {
+ "data": "text",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/20-template_script_tags.json b/test/cases/20-template_script_tags.json
new file mode 100644
index 0000000..dae5f1f
--- /dev/null
+++ b/test/cases/20-template_script_tags.json
@@ -0,0 +1,20 @@
+ "name": "Template script tags",
+ "options": {},
+ "html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
+ "expected": [
+ {
+ "type": "script",
+ "name": "script",
+ "attribs": {
+ "type": "text/template"
+ },
+ "children": [
+ {
+ "data": "<h1>Heading1</h1>",
+ "type": "text"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/21-conditional_comments.json b/test/cases/21-conditional_comments.json
new file mode 100644
index 0000000..c034acd
--- /dev/null
+++ b/test/cases/21-conditional_comments.json
@@ -0,0 +1,15 @@
+ "name": "Conditional comments",
+ "options": {},
+ "html": "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->",
+ "expected": [
+ {
+ "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
+ "type": "comment"
+ },
+ {
+ "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
+ "type": "comment"
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/22-lowercase_tags.json b/test/cases/22-lowercase_tags.json
new file mode 100644
index 0000000..b3d70e4
--- /dev/null
+++ b/test/cases/22-lowercase_tags.json
@@ -0,0 +1,41 @@
+ "name": "lowercase tags",
+ "options": {},
+ "html": "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>",
+ "expected": [
+ {
+ "name": "!doctype",
+ "data": "!DOCTYPE html",
+ "type": "directive"
+ },
+ {
+ "type": "tag",
+ "name": "html",
+ "attribs": {},
+ "children": [
+ {
+ "type": "tag",
+ "name": "title",
+ "attribs": {},
+ "children": [
+ {
+ "data": "The Title",
+ "type": "text"
+ }
+ ]
+ },
+ {
+ "type": "tag",
+ "name": "body",
+ "attribs": {},
+ "children": [
+ {
+ "data": "Hello world",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/cases/23-dom-lvl1.json b/test/cases/23-dom-lvl1.json
new file mode 100644
index 0000000..ab25d88
--- /dev/null
+++ b/test/cases/23-dom-lvl1.json
@@ -0,0 +1,131 @@
+ "name": "DOM level 1",
+ "options": { "withDomLvl1": true },
+ "html": "<div>some stray text<h1>Hello, world.</h1><!-- comment node -->more stray text</div>",
+ "expected": [
+ {
+ "type": "tag",
+ "nodeType": 1,
+ "name": "div",
+ "tagName": "div",
+ "attribs": {},
+ "nodeValue": null,
+ "children": [
+ {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "some stray text",
+ "nodeValue": "some stray text",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ },
+ {
+ "type": "tag",
+ "nodeType": 1,
+ "name": "h1",
+ "tagName": "h1",
+ "nodeValue": null,
+ "attribs": {},
+ "children": [
+ {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "Hello, world.",
+ "nodeValue": "Hello, world.",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ }
+ ],
+ "firstChild": {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "Hello, world.",
+ "nodeValue": "Hello, world.",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ },
+ "lastChild": {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "Hello, world.",
+ "nodeValue": "Hello, world.",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ }
+ },
+ {
+ "type": "comment",
+ "nodeType": 8,
+ "tagName": null,
+ "data": " comment node ",
+ "nodeValue": " comment node ",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null,
+ "prev": {
+ "type": "tag",
+ "name": "h1",
+ "nodeValue": null,
+ "attribs": {}
+ },
+ "previousSibling": {
+ "type": "tag",
+ "name": "h1",
+ "nodeValue": null,
+ "attribs": {}
+ },
+ "next": {
+ "type": "text",
+ "tagName": null,
+ "data": "more stray text"
+ },
+ "nextSibling": {
+ "type": "text",
+ "tagName": null,
+ "data": "more stray text"
+ }
+ },
+ {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "more stray text",
+ "nodeValue": "more stray text",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null,
+ "next": null,
+ "nextSibling": null
+ }
+ ],
+ "firstChild": {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "some stray text",
+ "nodeValue": "some stray text",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ },
+ "lastChild": {
+ "type": "text",
+ "nodeType": 3,
+ "tagName": null,
+ "data": "more stray text",
+ "nodeValue": "more stray text",
+ "childNodes": null,
+ "firstChild": null,
+ "lastChild": null
+ }
+ }
+ ]
diff --git a/test/cases/24-with-start-indices.json b/test/cases/24-with-start-indices.json
new file mode 100644
index 0000000..02228f1
--- /dev/null
+++ b/test/cases/24-with-start-indices.json
@@ -0,0 +1,85 @@
+ "name": "withStartIndices adds correct startIndex properties",
+ "options": {"withStartIndices": true},
+ "streaming": false,
+ "html": "<!DOCTYPE html> <html> <title>The Title</title> <body class='foo'>Hello world <p></p></body> <!-- the comment --> </html> ",
+ "expected": [
+ {
+ "startIndex": 0,
+ "name": "!doctype",
+ "data": "!DOCTYPE html",
+ "type": "directive"
+ },
+ {
+ "type": "text",
+ "data": " "
+ },
+ {
+ "startIndex": 16,
+ "type": "tag",
+ "name": "html",
+ "attribs": {},
+ "parent": null,
+ "children": [
+ {
+ "startIndex": 22,
+ "type": "text",
+ "data": " "
+ },
+ {
+ "startIndex": 23,
+ "type": "tag",
+ "name": "title",
+ "attribs": {},
+ "children": [
+ {
+ "startIndex": 30,
+ "data": "The Title",
+ "type": "text"
+ }
+ ]
+ },
+ {
+ "startIndex": 47,
+ "type": "text",
+ "data": " "
+ },
+ {
+ "startIndex": 48,
+ "type": "tag",
+ "name": "body",
+ "attribs": {"class": "foo"},
+ "children": [
+ {
+ "startIndex": 66,
+ "data": "Hello world ",
+ "type": "text"
+ },
+ {
+ "startIndex": 78,
+ "type": "tag",
+ "name": "p",
+ "attribs": {},
+ "children": []
+ }
+ ]
+ },
+ {
+ "startIndex": 92,
+ "type": "text",
+ "data": " "
+ },
+ {
+ "startIndex": 93,
+ "type": "comment",
+ "data": " the comment "
+ },
+ {
+ "startIndex": 113,
+ "type": "text",
+ "data": " "
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/test/tests.js b/test/tests.js
new file mode 100644
index 0000000..6dc96a2
--- /dev/null
+++ b/test/tests.js
@@ -0,0 +1,60 @@
+var fs = require("fs"),
+ path = require("path"),
+ assert = require("assert"),
+ util = require("util"),
+ Parser = require("htmlparser2").Parser,
+ Handler = require("../");
+var basePath = path.resolve(__dirname, "cases"),
+ inspectOpts = { showHidden: true, depth: null };
+.filter(RegExp.prototype.test, /\.json$/) //only allow .json files
+ return path.resolve(basePath, name);
+ it(test.name, function(){
+ var expected = test.expected;
+ var handler = new Handler(function(err, actual){
+ assert.ifError(err);
+ try {
+ compare(expected, actual);
+ } catch(e){
+ e.expected = util.inspect(expected, inspectOpts);
+ e.actual = util.inspect(actual, inspectOpts);
+ throw e;
+ }
+ }, test.options);
+ var data = test.html;
+ var parser = new Parser(handler, test.options);
+ //first, try to run the test via chunks
+ if (test.streaming || test.streaming === undefined){
+ for(var i = 0; i < data.length; i++){
+ parser.write(data.charAt(i));
+ }
+ parser.done();
+ }
+ //then parse everything
+ parser.parseComplete(data);
+ });
+function compare(expected, result){
+ assert.equal(typeof expected, typeof result, "types didn't match");
+ if(typeof expected !== "object" || expected === null){
+ assert.strictEqual(expected, result, "result doesn't equal expected");
+ } else {
+ for(var prop in expected){
+ assert.ok(prop in result, "result didn't contain property " + prop);
+ compare(expected[prop], result[prop]);
+ }
+ }
\ No newline at end of file
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-domhandler.git
More information about the Pkg-javascript-commits
mailing list