[Pkg-javascript-commits] [node-htmlparser2_new] 01/06: Import Upstream version 3.9.2

Paolo Greppi paolog-guest at moszumanska.debian.org
Sat Dec 24 01:12:20 UTC 2016


This is an automated email from the git hooks/post-receive script.

paolog-guest pushed a commit to branch master
in repository node-htmlparser2_new.

commit 3d60433fc7f9bf55e368e88b1bdab4dd2ac2f205
Author: Paolo Greppi <paolo.greppi at libpf.com>
Date:   Sat Dec 24 00:57:07 2016 +0000

    Import Upstream version 3.9.2
---
 .eslintrc                                       |   94 ++
 .gitattributes                                  |    2 +
 .gitignore                                      |    0
 .travis.yml                                     |   16 +
 LICENSE                                         |   18 +
 README.md                                       |   91 ++
 lib/CollectingHandler.js                        |   55 +
 lib/FeedHandler.js                              |   95 ++
 lib/Parser.js                                   |  353 ++++++
 lib/ProxyHandler.js                             |   27 +
 lib/Stream.js                                   |   35 +
 lib/Tokenizer.js                                |  906 +++++++++++++++
 lib/WritableStream.js                           |   25 +
 lib/index.js                                    |   68 ++
 package.json                                    |   56 +
 test/01-events.js                               |    9 +
 test/02-stream.js                               |   23 +
 test/03-feed.js                                 |   19 +
 test/Documents/Atom_Example.xml                 |   25 +
 test/Documents/Attributes.html                  |   16 +
 test/Documents/Basic.html                       |    1 +
 test/Documents/RDF_Example.xml                  |   63 +
 test/Documents/RSS_Example.xml                  |   48 +
 test/Events/01-simple.json                      |   44 +
 test/Events/02-template.json                    |   63 +
 test/Events/03-lowercase_tags.json              |   46 +
 test/Events/04-cdata.json                       |   50 +
 test/Events/05-cdata-special.json               |   35 +
 test/Events/06-leading-lt.json                  |   16 +
 test/Events/07-self-closing.json                |   67 ++
 test/Events/08-implicit-close-tags.json         |   71 ++
 test/Events/09-attributes.json                  |   68 ++
 test/Events/10-crazy-attrib.json                |   52 +
 test/Events/11-script_in_script.json            |   54 +
 test/Events/12-long-comment-end.json            |   20 +
 test/Events/13-long-cdata-end.json              |   22 +
 test/Events/14-implicit-open-tags.json          |   27 +
 test/Events/15-lt-whitespace.json               |   16 +
 test/Events/16-double_attribs.json              |   45 +
 test/Events/17-numeric_entities.json            |   16 +
 test/Events/18-legacy_entities.json             |   16 +
 test/Events/19-named_entities.json              |   16 +
 test/Events/20-xml_entities.json                |   16 +
 test/Events/21-entity_in_attribute.json         |   38 +
 test/Events/22-double_brackets.json             |   41 +
 test/Events/23-legacy_entity_fail.json          |   16 +
 test/Events/24-special_special.json             |  133 +++
 test/Events/25-empty_tag_name.json              |   13 +
 test/Events/26-not-quite-closed.json            |   35 +
 test/Events/27-entities_in_attributes.json      |   62 +
 test/Events/28-cdata_in_html.json               |    9 +
 test/Events/29-comment_edge-cases.json          |   18 +
 test/Events/30-cdata_edge-cases.json            |   22 +
 test/Events/31-comment_false-ending.json        |    9 +
 test/Events/32-script-ending-with-lessthan.json |   35 +
 test/Feeds/01-rss.js                            |   34 +
 test/Feeds/02-atom.js                           |   18 +
 test/Feeds/03-rdf.js                            |   20 +
 test/Stream/01-basic.json                       |   83 ++
 test/Stream/02-RSS.json                         | 1093 ++++++++++++++++++
 test/Stream/03-Atom.json                        |  678 +++++++++++
 test/Stream/04-RDF.json                         | 1399 +++++++++++++++++++++++
 test/Stream/05-Attributes.json                  |  354 ++++++
 test/api.js                                     |  103 ++
 test/test-helper.js                             |   83 ++
 test/unicode.js                                 |   21 +
 66 files changed, 7122 insertions(+)

diff --git a/.eslintrc b/.eslintrc
new file mode 100644
index 0000000..24d1a9e
--- /dev/null
+++ b/.eslintrc
@@ -0,0 +1,94 @@
+{
+  "extends": "eslint:recommended",
+  "env": {
+    "node": true
+  },
+  "globals": {
+    "describe": true,
+    "it": true
+  },
+  "rules": {
+    "eqeqeq": 2,
+    "no-extend-native": 2,
+    "no-use-before-define": [
+      2,
+      {
+        "functions": false,
+        "classes": false
+      }
+    ],
+    "no-caller": 2,
+    "no-irregular-whitespace": 2,
+    "quotes": [
+      2,
+      "double"
+    ],
+    "no-undef": 2,
+    "no-unused-vars": 2,
+    "no-eq-null": 2,
+    "no-proto": 2,
+    "curly": [
+      2,
+      "multi-line"
+    ],
+    "no-mixed-spaces-and-tabs": [
+      2,
+      "smart-tabs"
+    ],
+    "space-infix-ops": 2,
+    "keyword-spacing": [
+      2,
+      {
+        "overrides": {
+          "if": {
+            "after": false
+          },
+          "catch": {
+            "after": false
+          },
+          "for": {
+            "after": false
+          },
+          "while": {
+            "after": false
+          }
+        }
+      }
+    ],
+    "new-cap": 2,
+    "comma-style": [
+      2,
+      "last"
+    ],
+    "dot-notation": 2,
+    "wrap-iife": 2,
+    "no-empty": 2,
+    "space-unary-ops": [
+      2,
+      {
+        "words": false,
+        "nonwords": false
+      }
+    ],
+    "no-with": 2,
+    "no-multi-str": 2,
+    "no-trailing-spaces": 2,
+    "indent": [
+      2,
+      "tab",
+      {
+        "SwitchCase": 1,
+        "VariableDeclarator": 0
+      }
+    ],
+    "linebreak-style": [
+      2,
+      "unix"
+    ],
+    "consistent-this": [
+      2,
+      "_this"
+    ],
+    "no-extra-semi": 0 // https://github.com/eslint/eslint/issues/6386
+  }
+}
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..4bb50dc
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text eol=lf
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..15628ee
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,16 @@
+language: node_js
+node_js:
+  - stable
+  - unstable
+  - 5.1
+  - 4.2
+  - 0.12
+
+sudo: false
+
+matrix:
+  fast_finish: true
+  allow_failures:
+    - node_js: unstable
+
+script: npm run coveralls
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0a35e02
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,18 @@
+Copyright 2010, 2011, Chris Winberry <chris at winberry.net>. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+ 
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7590a6a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,91 @@
+# htmlparser2
+
+[![NPM version](http://img.shields.io/npm/v/htmlparser2.svg?style=flat)](https://npmjs.org/package/htmlparser2)
+[![Downloads](https://img.shields.io/npm/dm/htmlparser2.svg?style=flat)](https://npmjs.org/package/htmlparser2)
+[![Build Status](http://img.shields.io/travis/fb55/htmlparser2/master.svg?style=flat)](http://travis-ci.org/fb55/htmlparser2)
+[![Coverage](http://img.shields.io/coveralls/fb55/htmlparser2.svg?style=flat)](https://coveralls.io/r/fb55/htmlparser2)
+
+A forgiving HTML/XML/RSS parser. The parser can handle streams and provides a callback interface.
+
+## Installation
+	npm install htmlparser2
+
+A live demo of htmlparser2 is available [here](http://demos.forbeslindesay.co.uk/htmlparser2/).
+
+## Usage
+
+```javascript
+var htmlparser = require("htmlparser2");
+var parser = new htmlparser.Parser({
+	onopentag: function(name, attribs){
+		if(name === "script" && attribs.type === "text/javascript"){
+			console.log("JS! Hooray!");
+		}
+	},
+	ontext: function(text){
+		console.log("-->", text);
+	},
+	onclosetag: function(tagname){
+		if(tagname === "script"){
+			console.log("That's it?!");
+		}
+	}
+}, {decodeEntities: true});
+parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>");
+parser.end();
+```
+
+Output (simplified):
+
+```
+--> Xyz
+JS! Hooray!
+--> var foo = '<<bar>>';
+That's it?!
+```
+
+## Documentation
+
+Read more about the parser and its options in the [wiki](https://github.com/fb55/htmlparser2/wiki/Parser-options).
+
+## Get a DOM
+The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper.
+
+The `DomHandler`, while still bundled with this module, was moved to its [own module](https://github.com/fb55/domhandler). Have a look at it for further information.
+
+## Parsing RSS/RDF/Atom Feeds
+
+```javascript
+new htmlparser.FeedHandler(function(<error> error, <object> feed){
+    ...
+});
+```
+
+Note: While the provided feed handler works for most feeds, you might want to use  [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained.
+
+## Performance
+
+After having some artificial benchmarks for some time, __ at AndreasMadsen__ published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites.
+
+At the time of writing, the latest versions of all supported parsers show the following performance characteristics on [Travis CI](https://travis-ci.org/AndreasMadsen/htmlparser-benchmark/builds/10805007) (please note that Travis doesn't guarantee equal conditions for all tests):
+
+```
+gumbo-parser   : 34.9208 ms/file ± 21.4238
+html-parser    : 24.8224 ms/file ± 15.8703
+html5          : 419.597 ms/file ± 264.265
+htmlparser     : 60.0722 ms/file ± 384.844
+htmlparser2-dom: 12.0749 ms/file ± 6.49474
+htmlparser2    : 7.49130 ms/file ± 5.74368
+hubbub         : 30.4980 ms/file ± 16.4682
+libxmljs       : 14.1338 ms/file ± 18.6541
+parse5         : 22.0439 ms/file ± 15.3743
+sax            : 49.6513 ms/file ± 26.6032
+```
+
+## How does this module differ from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
+
+This is a fork of the `htmlparser` module. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). `htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore.
+
+The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). As a result, old handlers won't work anymore.
+
+The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, your code should work as expected.
diff --git a/lib/CollectingHandler.js b/lib/CollectingHandler.js
new file mode 100644
index 0000000..93d97a7
--- /dev/null
+++ b/lib/CollectingHandler.js
@@ -0,0 +1,55 @@
+module.exports = CollectingHandler;
+
+function CollectingHandler(cbs){
+	this._cbs = cbs || {};
+	this.events = [];
+}
+
+var EVENTS = require("./").EVENTS;
+Object.keys(EVENTS).forEach(function(name){
+	if(EVENTS[name] === 0){
+		name = "on" + name;
+		CollectingHandler.prototype[name] = function(){
+			this.events.push([name]);
+			if(this._cbs[name]) this._cbs[name]();
+		};
+	} else if(EVENTS[name] === 1){
+		name = "on" + name;
+		CollectingHandler.prototype[name] = function(a){
+			this.events.push([name, a]);
+			if(this._cbs[name]) this._cbs[name](a);
+		};
+	} else if(EVENTS[name] === 2){
+		name = "on" + name;
+		CollectingHandler.prototype[name] = function(a, b){
+			this.events.push([name, a, b]);
+			if(this._cbs[name]) this._cbs[name](a, b);
+		};
+	} else {
+		throw Error("wrong number of arguments");
+	}
+});
+
+CollectingHandler.prototype.onreset = function(){
+	this.events = [];
+	if(this._cbs.onreset) this._cbs.onreset();
+};
+
+CollectingHandler.prototype.restart = function(){
+	if(this._cbs.onreset) this._cbs.onreset();
+
+	for(var i = 0, len = this.events.length; i < len; i++){
+		if(this._cbs[this.events[i][0]]){
+
+			var num = this.events[i].length;
+
+			if(num === 1){
+				this._cbs[this.events[i][0]]();
+			} else if(num === 2){
+				this._cbs[this.events[i][0]](this.events[i][1]);
+			} else {
+				this._cbs[this.events[i][0]](this.events[i][1], this.events[i][2]);
+			}
+		}
+	}
+};
diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
new file mode 100644
index 0000000..329c94a
--- /dev/null
+++ b/lib/FeedHandler.js
@@ -0,0 +1,95 @@
+var index = require("./index.js"),
+    DomHandler = index.DomHandler,
+    DomUtils = index.DomUtils;
+
+//TODO: make this a streamable handler
+function FeedHandler(callback, options){
+	this.init(callback, options);
+}
+
+require("inherits")(FeedHandler, DomHandler);
+
+FeedHandler.prototype.init = DomHandler;
+
+function getElements(what, where){
+	return DomUtils.getElementsByTagName(what, where, true);
+}
+function getOneElement(what, where){
+	return DomUtils.getElementsByTagName(what, where, true, 1)[0];
+}
+function fetch(what, where, recurse){
+	return DomUtils.getText(
+		DomUtils.getElementsByTagName(what, where, recurse, 1)
+	).trim();
+}
+
+function addConditionally(obj, prop, what, where, recurse){
+	var tmp = fetch(what, where, recurse);
+	if(tmp) obj[prop] = tmp;
+}
+
+var isValidFeed = function(value){
+	return value === "rss" || value === "feed" || value === "rdf:RDF";
+};
+
+FeedHandler.prototype.onend = function(){
+	var feed = {},
+	    feedRoot = getOneElement(isValidFeed, this.dom),
+	    tmp, childs;
+
+	if(feedRoot){
+		if(feedRoot.name === "feed"){
+			childs = feedRoot.children;
+
+			feed.type = "atom";
+			addConditionally(feed, "id", "id", childs);
+			addConditionally(feed, "title", "title", childs);
+			if((tmp = getOneElement("link", childs)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
+			addConditionally(feed, "description", "subtitle", childs);
+			if((tmp = fetch("updated", childs))) feed.updated = new Date(tmp);
+			addConditionally(feed, "author", "email", childs, true);
+
+			feed.items = getElements("entry", childs).map(function(item){
+				var entry = {}, tmp;
+
+				item = item.children;
+
+				addConditionally(entry, "id", "id", item);
+				addConditionally(entry, "title", "title", item);
+				if((tmp = getOneElement("link", item)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
+				if((tmp = fetch("summary", item) || fetch("content", item))) entry.description = tmp;
+				if((tmp = fetch("updated", item))) entry.pubDate = new Date(tmp);
+				return entry;
+			});
+		} else {
+			childs = getOneElement("channel", feedRoot.children).children;
+
+			feed.type = feedRoot.name.substr(0, 3);
+			feed.id = "";
+			addConditionally(feed, "title", "title", childs);
+			addConditionally(feed, "link", "link", childs);
+			addConditionally(feed, "description", "description", childs);
+			if((tmp = fetch("lastBuildDate", childs))) feed.updated = new Date(tmp);
+			addConditionally(feed, "author", "managingEditor", childs, true);
+
+			feed.items = getElements("item", feedRoot.children).map(function(item){
+				var entry = {}, tmp;
+
+				item = item.children;
+
+				addConditionally(entry, "id", "guid", item);
+				addConditionally(entry, "title", "title", item);
+				addConditionally(entry, "link", "link", item);
+				addConditionally(entry, "description", "description", item);
+				if((tmp = fetch("pubDate", item))) entry.pubDate = new Date(tmp);
+				return entry;
+			});
+		}
+	}
+	this.dom = feed;
+	DomHandler.prototype._handleCallback.call(
+		this, feedRoot ? null : Error("couldn't find root of feed")
+	);
+};
+
+module.exports = FeedHandler;
diff --git a/lib/Parser.js b/lib/Parser.js
new file mode 100644
index 0000000..12db240
--- /dev/null
+++ b/lib/Parser.js
@@ -0,0 +1,353 @@
+var Tokenizer = require("./Tokenizer.js");
+
+/*
+	Options:
+
+	xmlMode: Disables the special behavior for script/style tags (false by default)
+	lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)
+	lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)
+*/
+
+/*
+	Callbacks:
+
+	oncdataend,
+	oncdatastart,
+	onclosetag,
+	oncomment,
+	oncommentend,
+	onerror,
+	onopentag,
+	onprocessinginstruction,
+	onreset,
+	ontext
+*/
+
+var formTags = {
+	input: true,
+	option: true,
+	optgroup: true,
+	select: true,
+	button: true,
+	datalist: true,
+	textarea: true
+};
+
+var openImpliesClose = {
+	tr      : { tr:true, th:true, td:true },
+	th      : { th:true },
+	td      : { thead:true, th:true, td:true },
+	body    : { head:true, link:true, script:true },
+	li      : { li:true },
+	p       : { p:true },
+	h1      : { p:true },
+	h2      : { p:true },
+	h3      : { p:true },
+	h4      : { p:true },
+	h5      : { p:true },
+	h6      : { p:true },
+	select  : formTags,
+	input   : formTags,
+	output  : formTags,
+	button  : formTags,
+	datalist: formTags,
+	textarea: formTags,
+	option  : { option:true },
+	optgroup: { optgroup:true }
+};
+
+var voidElements = {
+	__proto__: null,
+	area: true,
+	base: true,
+	basefont: true,
+	br: true,
+	col: true,
+	command: true,
+	embed: true,
+	frame: true,
+	hr: true,
+	img: true,
+	input: true,
+	isindex: true,
+	keygen: true,
+	link: true,
+	meta: true,
+	param: true,
+	source: true,
+	track: true,
+	wbr: true,
+
+	//common self closing svg elements
+	path: true,
+	circle: true,
+	ellipse: true,
+	line: true,
+	rect: true,
+	use: true,
+	stop: true,
+	polyline: true,
+	polygon: true
+};
+
+var re_nameEnd = /\s|\//;
+
+function Parser(cbs, options){
+	this._options = options || {};
+	this._cbs = cbs || {};
+
+	this._tagname = "";
+	this._attribname = "";
+	this._attribvalue = "";
+	this._attribs = null;
+	this._stack = [];
+
+	this.startIndex = 0;
+	this.endIndex = null;
+
+	this._lowerCaseTagNames = "lowerCaseTags" in this._options ?
+									!!this._options.lowerCaseTags :
+									!this._options.xmlMode;
+	this._lowerCaseAttributeNames = "lowerCaseAttributeNames" in this._options ?
+									!!this._options.lowerCaseAttributeNames :
+									!this._options.xmlMode;
+
+	if(this._options.Tokenizer) {
+		Tokenizer = this._options.Tokenizer;
+	}
+	this._tokenizer = new Tokenizer(this._options, this);
+
+	if(this._cbs.onparserinit) this._cbs.onparserinit(this);
+}
+
+require("inherits")(Parser, require("events").EventEmitter);
+
+Parser.prototype._updatePosition = function(initialOffset){
+	if(this.endIndex === null){
+		if(this._tokenizer._sectionStart <= initialOffset){
+			this.startIndex = 0;
+		} else {
+			this.startIndex = this._tokenizer._sectionStart - initialOffset;
+		}
+	}
+	else this.startIndex = this.endIndex + 1;
+	this.endIndex = this._tokenizer.getAbsoluteIndex();
+};
+
+//Tokenizer event handlers
+Parser.prototype.ontext = function(data){
+	this._updatePosition(1);
+	this.endIndex--;
+
+	if(this._cbs.ontext) this._cbs.ontext(data);
+};
+
+Parser.prototype.onopentagname = function(name){
+	if(this._lowerCaseTagNames){
+		name = name.toLowerCase();
+	}
+
+	this._tagname = name;
+
+	if(!this._options.xmlMode && name in openImpliesClose) {
+		for(
+			var el;
+			(el = this._stack[this._stack.length - 1]) in openImpliesClose[name];
+			this.onclosetag(el)
+		);
+	}
+
+	if(this._options.xmlMode || !(name in voidElements)){
+		this._stack.push(name);
+	}
+
+	if(this._cbs.onopentagname) this._cbs.onopentagname(name);
+	if(this._cbs.onopentag) this._attribs = {};
+};
+
+Parser.prototype.onopentagend = function(){
+	this._updatePosition(1);
+
+	if(this._attribs){
+		if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
+		this._attribs = null;
+	}
+
+	if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){
+		this._cbs.onclosetag(this._tagname);
+	}
+
+	this._tagname = "";
+};
+
+Parser.prototype.onclosetag = function(name){
+	this._updatePosition(1);
+
+	if(this._lowerCaseTagNames){
+		name = name.toLowerCase();
+	}
+
+	if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){
+		var pos = this._stack.lastIndexOf(name);
+		if(pos !== -1){
+			if(this._cbs.onclosetag){
+				pos = this._stack.length - pos;
+				while(pos--) this._cbs.onclosetag(this._stack.pop());
+			}
+			else this._stack.length = pos;
+		} else if(name === "p" && !this._options.xmlMode){
+			this.onopentagname(name);
+			this._closeCurrentTag();
+		}
+	} else if(!this._options.xmlMode && (name === "br" || name === "p")){
+		this.onopentagname(name);
+		this._closeCurrentTag();
+	}
+};
+
+Parser.prototype.onselfclosingtag = function(){
+	if(this._options.xmlMode || this._options.recognizeSelfClosing){
+		this._closeCurrentTag();
+	} else {
+		this.onopentagend();
+	}
+};
+
+Parser.prototype._closeCurrentTag = function(){
+	var name = this._tagname;
+
+	this.onopentagend();
+
+	//self-closing tags will be on the top of the stack
+	//(cheaper check than in onclosetag)
+	if(this._stack[this._stack.length - 1] === name){
+		if(this._cbs.onclosetag){
+			this._cbs.onclosetag(name);
+		}
+		this._stack.pop();
+	}
+};
+
+Parser.prototype.onattribname = function(name){
+	if(this._lowerCaseAttributeNames){
+		name = name.toLowerCase();
+	}
+	this._attribname = name;
+};
+
+Parser.prototype.onattribdata = function(value){
+	this._attribvalue += value;
+};
+
+Parser.prototype.onattribend = function(){
+	if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue);
+	if(
+		this._attribs &&
+		!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
+	){
+		this._attribs[this._attribname] = this._attribvalue;
+	}
+	this._attribname = "";
+	this._attribvalue = "";
+};
+
+Parser.prototype._getInstructionName = function(value){
+	var idx = value.search(re_nameEnd),
+	    name = idx < 0 ? value : value.substr(0, idx);
+
+	if(this._lowerCaseTagNames){
+		name = name.toLowerCase();
+	}
+
+	return name;
+};
+
+Parser.prototype.ondeclaration = function(value){
+	if(this._cbs.onprocessinginstruction){
+		var name = this._getInstructionName(value);
+		this._cbs.onprocessinginstruction("!" + name, "!" + value);
+	}
+};
+
+Parser.prototype.onprocessinginstruction = function(value){
+	if(this._cbs.onprocessinginstruction){
+		var name = this._getInstructionName(value);
+		this._cbs.onprocessinginstruction("?" + name, "?" + value);
+	}
+};
+
+Parser.prototype.oncomment = function(value){
+	this._updatePosition(4);
+
+	if(this._cbs.oncomment) this._cbs.oncomment(value);
+	if(this._cbs.oncommentend) this._cbs.oncommentend();
+};
+
+Parser.prototype.oncdata = function(value){
+	this._updatePosition(1);
+
+	if(this._options.xmlMode || this._options.recognizeCDATA){
+		if(this._cbs.oncdatastart) this._cbs.oncdatastart();
+		if(this._cbs.ontext) this._cbs.ontext(value);
+		if(this._cbs.oncdataend) this._cbs.oncdataend();
+	} else {
+		this.oncomment("[CDATA[" + value + "]]");
+	}
+};
+
+Parser.prototype.onerror = function(err){
+	if(this._cbs.onerror) this._cbs.onerror(err);
+};
+
+Parser.prototype.onend = function(){
+	if(this._cbs.onclosetag){
+		for(
+			var i = this._stack.length;
+			i > 0;
+			this._cbs.onclosetag(this._stack[--i])
+		);
+	}
+	if(this._cbs.onend) this._cbs.onend();
+};
+
+
+//Resets the parser to a blank state, ready to parse a new HTML document
+Parser.prototype.reset = function(){
+	if(this._cbs.onreset) this._cbs.onreset();
+	this._tokenizer.reset();
+
+	this._tagname = "";
+	this._attribname = "";
+	this._attribs = null;
+	this._stack = [];
+
+	if(this._cbs.onparserinit) this._cbs.onparserinit(this);
+};
+
+//Parses a complete HTML document and pushes it to the handler
+Parser.prototype.parseComplete = function(data){
+	this.reset();
+	this.end(data);
+};
+
+Parser.prototype.write = function(chunk){
+	this._tokenizer.write(chunk);
+};
+
+Parser.prototype.end = function(chunk){
+	this._tokenizer.end(chunk);
+};
+
+Parser.prototype.pause = function(){
+	this._tokenizer.pause();
+};
+
+Parser.prototype.resume = function(){
+	this._tokenizer.resume();
+};
+
+//alias for backwards compat
+Parser.prototype.parseChunk = Parser.prototype.write;
+Parser.prototype.done = Parser.prototype.end;
+
+module.exports = Parser;
diff --git a/lib/ProxyHandler.js b/lib/ProxyHandler.js
new file mode 100644
index 0000000..858e975
--- /dev/null
+++ b/lib/ProxyHandler.js
@@ -0,0 +1,27 @@
+module.exports = ProxyHandler;
+
+function ProxyHandler(cbs){
+	this._cbs = cbs || {};
+}
+
+var EVENTS = require("./").EVENTS;
+Object.keys(EVENTS).forEach(function(name){
+	if(EVENTS[name] === 0){
+		name = "on" + name;
+		ProxyHandler.prototype[name] = function(){
+			if(this._cbs[name]) this._cbs[name]();
+		};
+	} else if(EVENTS[name] === 1){
+		name = "on" + name;
+		ProxyHandler.prototype[name] = function(a){
+			if(this._cbs[name]) this._cbs[name](a);
+		};
+	} else if(EVENTS[name] === 2){
+		name = "on" + name;
+		ProxyHandler.prototype[name] = function(a, b){
+			if(this._cbs[name]) this._cbs[name](a, b);
+		};
+	} else {
+		throw Error("wrong number of arguments");
+	}
+});
\ No newline at end of file
diff --git a/lib/Stream.js b/lib/Stream.js
new file mode 100644
index 0000000..0ac49a6
--- /dev/null
+++ b/lib/Stream.js
@@ -0,0 +1,35 @@
+module.exports = Stream;
+
+var Parser = require("./WritableStream.js");
+
+function Stream(options){
+	Parser.call(this, new Cbs(this), options);
+}
+
+require("inherits")(Stream, Parser);
+
+Stream.prototype.readable = true;
+
+function Cbs(scope){
+	this.scope = scope;
+}
+
+var EVENTS = require("../").EVENTS;
+
+Object.keys(EVENTS).forEach(function(name){
+	if(EVENTS[name] === 0){
+		Cbs.prototype["on" + name] = function(){
+			this.scope.emit(name);
+		};
+	} else if(EVENTS[name] === 1){
+		Cbs.prototype["on" + name] = function(a){
+			this.scope.emit(name, a);
+		};
+	} else if(EVENTS[name] === 2){
+		Cbs.prototype["on" + name] = function(a, b){
+			this.scope.emit(name, a, b);
+		};
+	} else {
+		throw Error("wrong number of arguments!");
+	}
+});
\ No newline at end of file
diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
new file mode 100644
index 0000000..d67427c
--- /dev/null
+++ b/lib/Tokenizer.js
@@ -0,0 +1,906 @@
+module.exports = Tokenizer;
+
+var decodeCodePoint = require("entities/lib/decode_codepoint.js"),
+    entityMap = require("entities/maps/entities.json"),
+    legacyMap = require("entities/maps/legacy.json"),
+    xmlMap    = require("entities/maps/xml.json"),
+
+    i = 0,
+
+    TEXT                      = i++,
+    BEFORE_TAG_NAME           = i++, //after <
+    IN_TAG_NAME               = i++,
+    IN_SELF_CLOSING_TAG       = i++,
+    BEFORE_CLOSING_TAG_NAME   = i++,
+    IN_CLOSING_TAG_NAME       = i++,
+    AFTER_CLOSING_TAG_NAME    = i++,
+
+    //attributes
+    BEFORE_ATTRIBUTE_NAME     = i++,
+    IN_ATTRIBUTE_NAME         = i++,
+    AFTER_ATTRIBUTE_NAME      = i++,
+    BEFORE_ATTRIBUTE_VALUE    = i++,
+    IN_ATTRIBUTE_VALUE_DQ     = i++, // "
+    IN_ATTRIBUTE_VALUE_SQ     = i++, // '
+    IN_ATTRIBUTE_VALUE_NQ     = i++,
+
+    //declarations
+    BEFORE_DECLARATION        = i++, // !
+    IN_DECLARATION            = i++,
+
+    //processing instructions
+    IN_PROCESSING_INSTRUCTION = i++, // ?
+
+    //comments
+    BEFORE_COMMENT            = i++,
+    IN_COMMENT                = i++,
+    AFTER_COMMENT_1           = i++,
+    AFTER_COMMENT_2           = i++,
+
+    //cdata
+    BEFORE_CDATA_1            = i++, // [
+    BEFORE_CDATA_2            = i++, // C
+    BEFORE_CDATA_3            = i++, // D
+    BEFORE_CDATA_4            = i++, // A
+    BEFORE_CDATA_5            = i++, // T
+    BEFORE_CDATA_6            = i++, // A
+    IN_CDATA                  = i++, // [
+    AFTER_CDATA_1             = i++, // ]
+    AFTER_CDATA_2             = i++, // ]
+
+    //special tags
+    BEFORE_SPECIAL            = i++, //S
+    BEFORE_SPECIAL_END        = i++,   //S
+
+    BEFORE_SCRIPT_1           = i++, //C
+    BEFORE_SCRIPT_2           = i++, //R
+    BEFORE_SCRIPT_3           = i++, //I
+    BEFORE_SCRIPT_4           = i++, //P
+    BEFORE_SCRIPT_5           = i++, //T
+    AFTER_SCRIPT_1            = i++, //C
+    AFTER_SCRIPT_2            = i++, //R
+    AFTER_SCRIPT_3            = i++, //I
+    AFTER_SCRIPT_4            = i++, //P
+    AFTER_SCRIPT_5            = i++, //T
+
+    BEFORE_STYLE_1            = i++, //T
+    BEFORE_STYLE_2            = i++, //Y
+    BEFORE_STYLE_3            = i++, //L
+    BEFORE_STYLE_4            = i++, //E
+    AFTER_STYLE_1             = i++, //T
+    AFTER_STYLE_2             = i++, //Y
+    AFTER_STYLE_3             = i++, //L
+    AFTER_STYLE_4             = i++, //E
+
+    BEFORE_ENTITY             = i++, //&
+    BEFORE_NUMERIC_ENTITY     = i++, //#
+    IN_NAMED_ENTITY           = i++,
+    IN_NUMERIC_ENTITY         = i++,
+    IN_HEX_ENTITY             = i++, //X
+
+    j = 0,
+
+    SPECIAL_NONE              = j++,
+    SPECIAL_SCRIPT            = j++,
+    SPECIAL_STYLE             = j++;
+
+function whitespace(c){
+	return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
+}
+
+function characterState(char, SUCCESS){
+	return function(c){
+		if(c === char) this._state = SUCCESS;
+	};
+}
+
+function ifElseState(upper, SUCCESS, FAILURE){
+	var lower = upper.toLowerCase();
+
+	if(upper === lower){
+		return function(c){
+			if(c === lower){
+				this._state = SUCCESS;
+			} else {
+				this._state = FAILURE;
+				this._index--;
+			}
+		};
+	} else {
+		return function(c){
+			if(c === lower || c === upper){
+				this._state = SUCCESS;
+			} else {
+				this._state = FAILURE;
+				this._index--;
+			}
+		};
+	}
+}
+
+function consumeSpecialNameChar(upper, NEXT_STATE){
+	var lower = upper.toLowerCase();
+
+	return function(c){
+		if(c === lower || c === upper){
+			this._state = NEXT_STATE;
+		} else {
+			this._state = IN_TAG_NAME;
+			this._index--; //consume the token again
+		}
+	};
+}
+
+function Tokenizer(options, cbs){
+	this._state = TEXT;
+	this._buffer = "";
+	this._sectionStart = 0;
+	this._index = 0;
+	this._bufferOffset = 0; //chars removed from _buffer
+	this._baseState = TEXT;
+	this._special = SPECIAL_NONE;
+	this._cbs = cbs;
+	this._running = true;
+	this._ended = false;
+	this._xmlMode = !!(options && options.xmlMode);
+	this._decodeEntities = !!(options && options.decodeEntities);
+}
+
+Tokenizer.prototype._stateText = function(c){
+	if(c === "<"){
+		if(this._index > this._sectionStart){
+			this._cbs.ontext(this._getSection());
+		}
+		this._state = BEFORE_TAG_NAME;
+		this._sectionStart = this._index;
+	} else if(this._decodeEntities && this._special === SPECIAL_NONE && c === "&"){
+		if(this._index > this._sectionStart){
+			this._cbs.ontext(this._getSection());
+		}
+		this._baseState = TEXT;
+		this._state = BEFORE_ENTITY;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateBeforeTagName = function(c){
+	if(c === "/"){
+		this._state = BEFORE_CLOSING_TAG_NAME;
+	} else if(c === "<"){
+		this._cbs.ontext(this._getSection());
+		this._sectionStart = this._index;
+	} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
+		this._state = TEXT;
+	} else if(c === "!"){
+		this._state = BEFORE_DECLARATION;
+		this._sectionStart = this._index + 1;
+	} else if(c === "?"){
+		this._state = IN_PROCESSING_INSTRUCTION;
+		this._sectionStart = this._index + 1;
+	} else {
+		this._state = (!this._xmlMode && (c === "s" || c === "S")) ?
+						BEFORE_SPECIAL : IN_TAG_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInTagName = function(c){
+	if(c === "/" || c === ">" || whitespace(c)){
+		this._emitToken("onopentagname");
+		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._stateBeforeCloseingTagName = function(c){
+	if(whitespace(c));
+	else if(c === ">"){
+		this._state = TEXT;
+	} else if(this._special !== SPECIAL_NONE){
+		if(c === "s" || c === "S"){
+			this._state = BEFORE_SPECIAL_END;
+		} else {
+			this._state = TEXT;
+			this._index--;
+		}
+	} else {
+		this._state = IN_CLOSING_TAG_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInCloseingTagName = function(c){
+	if(c === ">" || whitespace(c)){
+		this._emitToken("onclosetag");
+		this._state = AFTER_CLOSING_TAG_NAME;
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._stateAfterCloseingTagName = function(c){
+	//skip everything until ">"
+	if(c === ">"){
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	}
+};
+
+Tokenizer.prototype._stateBeforeAttributeName = function(c){
+	if(c === ">"){
+		this._cbs.onopentagend();
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	} else if(c === "/"){
+		this._state = IN_SELF_CLOSING_TAG;
+	} else if(!whitespace(c)){
+		this._state = IN_ATTRIBUTE_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInSelfClosingTag = function(c){
+	if(c === ">"){
+		this._cbs.onselfclosingtag();
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	} else if(!whitespace(c)){
+		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._stateInAttributeName = function(c){
+	if(c === "=" || c === "/" || c === ">" || whitespace(c)){
+		this._cbs.onattribname(this._getSection());
+		this._sectionStart = -1;
+		this._state = AFTER_ATTRIBUTE_NAME;
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._stateAfterAttributeName = function(c){
+	if(c === "="){
+		this._state = BEFORE_ATTRIBUTE_VALUE;
+	} else if(c === "/" || c === ">"){
+		this._cbs.onattribend();
+		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._index--;
+	} else if(!whitespace(c)){
+		this._cbs.onattribend();
+		this._state = IN_ATTRIBUTE_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateBeforeAttributeValue = function(c){
+	if(c === "\""){
+		this._state = IN_ATTRIBUTE_VALUE_DQ;
+		this._sectionStart = this._index + 1;
+	} else if(c === "'"){
+		this._state = IN_ATTRIBUTE_VALUE_SQ;
+		this._sectionStart = this._index + 1;
+	} else if(!whitespace(c)){
+		this._state = IN_ATTRIBUTE_VALUE_NQ;
+		this._sectionStart = this._index;
+		this._index--; //reconsume token
+	}
+};
+
+Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c){
+	if(c === "\""){
+		this._emitToken("onattribdata");
+		this._cbs.onattribend();
+		this._state = BEFORE_ATTRIBUTE_NAME;
+	} else if(this._decodeEntities && c === "&"){
+		this._emitToken("onattribdata");
+		this._baseState = this._state;
+		this._state = BEFORE_ENTITY;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c){
+	if(c === "'"){
+		this._emitToken("onattribdata");
+		this._cbs.onattribend();
+		this._state = BEFORE_ATTRIBUTE_NAME;
+	} else if(this._decodeEntities && c === "&"){
+		this._emitToken("onattribdata");
+		this._baseState = this._state;
+		this._state = BEFORE_ENTITY;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c){
+	if(whitespace(c) || c === ">"){
+		this._emitToken("onattribdata");
+		this._cbs.onattribend();
+		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._index--;
+	} else if(this._decodeEntities && c === "&"){
+		this._emitToken("onattribdata");
+		this._baseState = this._state;
+		this._state = BEFORE_ENTITY;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateBeforeDeclaration = function(c){
+	this._state = c === "[" ? BEFORE_CDATA_1 :
+					c === "-" ? BEFORE_COMMENT :
+						IN_DECLARATION;
+};
+
+Tokenizer.prototype._stateInDeclaration = function(c){
+	if(c === ">"){
+		this._cbs.ondeclaration(this._getSection());
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	}
+};
+
+Tokenizer.prototype._stateInProcessingInstruction = function(c){
+	if(c === ">"){
+		this._cbs.onprocessinginstruction(this._getSection());
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	}
+};
+
+Tokenizer.prototype._stateBeforeComment = function(c){
+	if(c === "-"){
+		this._state = IN_COMMENT;
+		this._sectionStart = this._index + 1;
+	} else {
+		this._state = IN_DECLARATION;
+	}
+};
+
+Tokenizer.prototype._stateInComment = function(c){
+	if(c === "-") this._state = AFTER_COMMENT_1;
+};
+
+Tokenizer.prototype._stateAfterComment1 = function(c){
+	if(c === "-"){
+		this._state = AFTER_COMMENT_2;
+	} else {
+		this._state = IN_COMMENT;
+	}
+};
+
+Tokenizer.prototype._stateAfterComment2 = function(c){
+	if(c === ">"){
+		//remove 2 trailing chars
+		this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	} else if(c !== "-"){
+		this._state = IN_COMMENT;
+	}
+	// else: stay in AFTER_COMMENT_2 (`--->`)
+};
+
+Tokenizer.prototype._stateBeforeCdata1 = ifElseState("C", BEFORE_CDATA_2, IN_DECLARATION);
+Tokenizer.prototype._stateBeforeCdata2 = ifElseState("D", BEFORE_CDATA_3, IN_DECLARATION);
+Tokenizer.prototype._stateBeforeCdata3 = ifElseState("A", BEFORE_CDATA_4, IN_DECLARATION);
+Tokenizer.prototype._stateBeforeCdata4 = ifElseState("T", BEFORE_CDATA_5, IN_DECLARATION);
+Tokenizer.prototype._stateBeforeCdata5 = ifElseState("A", BEFORE_CDATA_6, IN_DECLARATION);
+
+Tokenizer.prototype._stateBeforeCdata6 = function(c){
+	if(c === "["){
+		this._state = IN_CDATA;
+		this._sectionStart = this._index + 1;
+	} else {
+		this._state = IN_DECLARATION;
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._stateInCdata = function(c){
+	if(c === "]") this._state = AFTER_CDATA_1;
+};
+
+Tokenizer.prototype._stateAfterCdata1 = characterState("]", AFTER_CDATA_2);
+
+Tokenizer.prototype._stateAfterCdata2 = function(c){
+	if(c === ">"){
+		//remove 2 trailing chars
+		this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	} else if(c !== "]") {
+		this._state = IN_CDATA;
+	}
+	//else: stay in AFTER_CDATA_2 (`]]]>`)
+};
+
+Tokenizer.prototype._stateBeforeSpecial = function(c){
+	if(c === "c" || c === "C"){
+		this._state = BEFORE_SCRIPT_1;
+	} else if(c === "t" || c === "T"){
+		this._state = BEFORE_STYLE_1;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._index--; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeSpecialEnd = function(c){
+	if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){
+		this._state = AFTER_SCRIPT_1;
+	} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){
+		this._state = AFTER_STYLE_1;
+	}
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar("R", BEFORE_SCRIPT_2);
+Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar("I", BEFORE_SCRIPT_3);
+Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar("P", BEFORE_SCRIPT_4);
+Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("T", BEFORE_SCRIPT_5);
+
+Tokenizer.prototype._stateBeforeScript5 = function(c){
+	if(c === "/" || c === ">" || whitespace(c)){
+		this._special = SPECIAL_SCRIPT;
+	}
+	this._state = IN_TAG_NAME;
+	this._index--; //consume the token again
+};
+
+Tokenizer.prototype._stateAfterScript1 = ifElseState("R", AFTER_SCRIPT_2, TEXT);
+Tokenizer.prototype._stateAfterScript2 = ifElseState("I", AFTER_SCRIPT_3, TEXT);
+Tokenizer.prototype._stateAfterScript3 = ifElseState("P", AFTER_SCRIPT_4, TEXT);
+Tokenizer.prototype._stateAfterScript4 = ifElseState("T", AFTER_SCRIPT_5, TEXT);
+
+Tokenizer.prototype._stateAfterScript5 = function(c){
+	if(c === ">" || whitespace(c)){
+		this._special = SPECIAL_NONE;
+		this._state = IN_CLOSING_TAG_NAME;
+		this._sectionStart = this._index - 6;
+		this._index--; //reconsume the token
+	}
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar("Y", BEFORE_STYLE_2);
+Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar("L", BEFORE_STYLE_3);
+Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("E", BEFORE_STYLE_4);
+
+Tokenizer.prototype._stateBeforeStyle4 = function(c){
+	if(c === "/" || c === ">" || whitespace(c)){
+		this._special = SPECIAL_STYLE;
+	}
+	this._state = IN_TAG_NAME;
+	this._index--; //consume the token again
+};
+
+Tokenizer.prototype._stateAfterStyle1 = ifElseState("Y", AFTER_STYLE_2, TEXT);
+Tokenizer.prototype._stateAfterStyle2 = ifElseState("L", AFTER_STYLE_3, TEXT);
+Tokenizer.prototype._stateAfterStyle3 = ifElseState("E", AFTER_STYLE_4, TEXT);
+
+Tokenizer.prototype._stateAfterStyle4 = function(c){
+	if(c === ">" || whitespace(c)){
+		this._special = SPECIAL_NONE;
+		this._state = IN_CLOSING_TAG_NAME;
+		this._sectionStart = this._index - 5;
+		this._index--; //reconsume the token
+	}
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateBeforeEntity = ifElseState("#", BEFORE_NUMERIC_ENTITY, IN_NAMED_ENTITY);
+Tokenizer.prototype._stateBeforeNumericEntity = ifElseState("X", IN_HEX_ENTITY, IN_NUMERIC_ENTITY);
+
+//for entities terminated with a semicolon
+Tokenizer.prototype._parseNamedEntityStrict = function(){
+	//offset = 1
+	if(this._sectionStart + 1 < this._index){
+		var entity = this._buffer.substring(this._sectionStart + 1, this._index),
+		    map = this._xmlMode ? xmlMap : entityMap;
+
+		if(map.hasOwnProperty(entity)){
+			this._emitPartial(map[entity]);
+			this._sectionStart = this._index + 1;
+		}
+	}
+};
+
+
+//parses legacy entities (without trailing semicolon)
+Tokenizer.prototype._parseLegacyEntity = function(){
+	var start = this._sectionStart + 1,
+	    limit = this._index - start;
+
+	if(limit > 6) limit = 6; //the max length of legacy entities is 6
+
+	while(limit >= 2){ //the min length of legacy entities is 2
+		var entity = this._buffer.substr(start, limit);
+
+		if(legacyMap.hasOwnProperty(entity)){
+			this._emitPartial(legacyMap[entity]);
+			this._sectionStart += limit + 1;
+			return;
+		} else {
+			limit--;
+		}
+	}
+};
+
+Tokenizer.prototype._stateInNamedEntity = function(c){
+	if(c === ";"){
+		this._parseNamedEntityStrict();
+		if(this._sectionStart + 1 < this._index && !this._xmlMode){
+			this._parseLegacyEntity();
+		}
+		this._state = this._baseState;
+	} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
+		if(this._xmlMode);
+		else if(this._sectionStart + 1 === this._index);
+		else if(this._baseState !== TEXT){
+			if(c !== "="){
+				this._parseNamedEntityStrict();
+			}
+		} else {
+			this._parseLegacyEntity();
+		}
+
+		this._state = this._baseState;
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._decodeNumericEntity = function(offset, base){
+	var sectionStart = this._sectionStart + offset;
+
+	if(sectionStart !== this._index){
+		//parse entity
+		var entity = this._buffer.substring(sectionStart, this._index);
+		var parsed = parseInt(entity, base);
+
+		this._emitPartial(decodeCodePoint(parsed));
+		this._sectionStart = this._index;
+	} else {
+		this._sectionStart--;
+	}
+
+	this._state = this._baseState;
+};
+
+Tokenizer.prototype._stateInNumericEntity = function(c){
+	if(c === ";"){
+		this._decodeNumericEntity(2, 10);
+		this._sectionStart++;
+	} else if(c < "0" || c > "9"){
+		if(!this._xmlMode){
+			this._decodeNumericEntity(2, 10);
+		} else {
+			this._state = this._baseState;
+		}
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._stateInHexEntity = function(c){
+	if(c === ";"){
+		this._decodeNumericEntity(3, 16);
+		this._sectionStart++;
+	} else if((c < "a" || c > "f") && (c < "A" || c > "F") && (c < "0" || c > "9")){
+		if(!this._xmlMode){
+			this._decodeNumericEntity(3, 16);
+		} else {
+			this._state = this._baseState;
+		}
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._cleanup = function (){
+	if(this._sectionStart < 0){
+		this._buffer = "";
+		this._bufferOffset += this._index;
+		this._index = 0;
+	} else if(this._running){
+		if(this._state === TEXT){
+			if(this._sectionStart !== this._index){
+				this._cbs.ontext(this._buffer.substr(this._sectionStart));
+			}
+			this._buffer = "";
+			this._bufferOffset += this._index;
+			this._index = 0;
+		} else if(this._sectionStart === this._index){
+			//the section just started
+			this._buffer = "";
+			this._bufferOffset += this._index;
+			this._index = 0;
+		} else {
+			//remove everything unnecessary
+			this._buffer = this._buffer.substr(this._sectionStart);
+			this._index -= this._sectionStart;
+			this._bufferOffset += this._sectionStart;
+		}
+
+		this._sectionStart = 0;
+	}
+};
+
+//TODO make events conditional
+Tokenizer.prototype.write = function(chunk){
+	if(this._ended) this._cbs.onerror(Error(".write() after done!"));
+
+	this._buffer += chunk;
+	this._parse();
+};
+
+Tokenizer.prototype._parse = function(){
+	while(this._index < this._buffer.length && this._running){
+		var c = this._buffer.charAt(this._index);
+		if(this._state === TEXT) {
+			this._stateText(c);
+		} else if(this._state === BEFORE_TAG_NAME){
+			this._stateBeforeTagName(c);
+		} else if(this._state === IN_TAG_NAME) {
+			this._stateInTagName(c);
+		} else if(this._state === BEFORE_CLOSING_TAG_NAME){
+			this._stateBeforeCloseingTagName(c);
+		} else if(this._state === IN_CLOSING_TAG_NAME){
+			this._stateInCloseingTagName(c);
+		} else if(this._state === AFTER_CLOSING_TAG_NAME){
+			this._stateAfterCloseingTagName(c);
+		} else if(this._state === IN_SELF_CLOSING_TAG){
+			this._stateInSelfClosingTag(c);
+		}
+
+		/*
+		*	attributes
+		*/
+		else if(this._state === BEFORE_ATTRIBUTE_NAME){
+			this._stateBeforeAttributeName(c);
+		} else if(this._state === IN_ATTRIBUTE_NAME){
+			this._stateInAttributeName(c);
+		} else if(this._state === AFTER_ATTRIBUTE_NAME){
+			this._stateAfterAttributeName(c);
+		} else if(this._state === BEFORE_ATTRIBUTE_VALUE){
+			this._stateBeforeAttributeValue(c);
+		} else if(this._state === IN_ATTRIBUTE_VALUE_DQ){
+			this._stateInAttributeValueDoubleQuotes(c);
+		} else if(this._state === IN_ATTRIBUTE_VALUE_SQ){
+			this._stateInAttributeValueSingleQuotes(c);
+		} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){
+			this._stateInAttributeValueNoQuotes(c);
+		}
+
+		/*
+		*	declarations
+		*/
+		else if(this._state === BEFORE_DECLARATION){
+			this._stateBeforeDeclaration(c);
+		} else if(this._state === IN_DECLARATION){
+			this._stateInDeclaration(c);
+		}
+
+		/*
+		*	processing instructions
+		*/
+		else if(this._state === IN_PROCESSING_INSTRUCTION){
+			this._stateInProcessingInstruction(c);
+		}
+
+		/*
+		*	comments
+		*/
+		else if(this._state === BEFORE_COMMENT){
+			this._stateBeforeComment(c);
+		} else if(this._state === IN_COMMENT){
+			this._stateInComment(c);
+		} else if(this._state === AFTER_COMMENT_1){
+			this._stateAfterComment1(c);
+		} else if(this._state === AFTER_COMMENT_2){
+			this._stateAfterComment2(c);
+		}
+
+		/*
+		*	cdata
+		*/
+		else if(this._state === BEFORE_CDATA_1){
+			this._stateBeforeCdata1(c);
+		} else if(this._state === BEFORE_CDATA_2){
+			this._stateBeforeCdata2(c);
+		} else if(this._state === BEFORE_CDATA_3){
+			this._stateBeforeCdata3(c);
+		} else if(this._state === BEFORE_CDATA_4){
+			this._stateBeforeCdata4(c);
+		} else if(this._state === BEFORE_CDATA_5){
+			this._stateBeforeCdata5(c);
+		} else if(this._state === BEFORE_CDATA_6){
+			this._stateBeforeCdata6(c);
+		} else if(this._state === IN_CDATA){
+			this._stateInCdata(c);
+		} else if(this._state === AFTER_CDATA_1){
+			this._stateAfterCdata1(c);
+		} else if(this._state === AFTER_CDATA_2){
+			this._stateAfterCdata2(c);
+		}
+
+		/*
+		* special tags
+		*/
+		else if(this._state === BEFORE_SPECIAL){
+			this._stateBeforeSpecial(c);
+		} else if(this._state === BEFORE_SPECIAL_END){
+			this._stateBeforeSpecialEnd(c);
+		}
+
+		/*
+		* script
+		*/
+		else if(this._state === BEFORE_SCRIPT_1){
+			this._stateBeforeScript1(c);
+		} else if(this._state === BEFORE_SCRIPT_2){
+			this._stateBeforeScript2(c);
+		} else if(this._state === BEFORE_SCRIPT_3){
+			this._stateBeforeScript3(c);
+		} else if(this._state === BEFORE_SCRIPT_4){
+			this._stateBeforeScript4(c);
+		} else if(this._state === BEFORE_SCRIPT_5){
+			this._stateBeforeScript5(c);
+		}
+
+		else if(this._state === AFTER_SCRIPT_1){
+			this._stateAfterScript1(c);
+		} else if(this._state === AFTER_SCRIPT_2){
+			this._stateAfterScript2(c);
+		} else if(this._state === AFTER_SCRIPT_3){
+			this._stateAfterScript3(c);
+		} else if(this._state === AFTER_SCRIPT_4){
+			this._stateAfterScript4(c);
+		} else if(this._state === AFTER_SCRIPT_5){
+			this._stateAfterScript5(c);
+		}
+
+		/*
+		* style
+		*/
+		else if(this._state === BEFORE_STYLE_1){
+			this._stateBeforeStyle1(c);
+		} else if(this._state === BEFORE_STYLE_2){
+			this._stateBeforeStyle2(c);
+		} else if(this._state === BEFORE_STYLE_3){
+			this._stateBeforeStyle3(c);
+		} else if(this._state === BEFORE_STYLE_4){
+			this._stateBeforeStyle4(c);
+		}
+
+		else if(this._state === AFTER_STYLE_1){
+			this._stateAfterStyle1(c);
+		} else if(this._state === AFTER_STYLE_2){
+			this._stateAfterStyle2(c);
+		} else if(this._state === AFTER_STYLE_3){
+			this._stateAfterStyle3(c);
+		} else if(this._state === AFTER_STYLE_4){
+			this._stateAfterStyle4(c);
+		}
+
+		/*
+		* entities
+		*/
+		else if(this._state === BEFORE_ENTITY){
+			this._stateBeforeEntity(c);
+		} else if(this._state === BEFORE_NUMERIC_ENTITY){
+			this._stateBeforeNumericEntity(c);
+		} else if(this._state === IN_NAMED_ENTITY){
+			this._stateInNamedEntity(c);
+		} else if(this._state === IN_NUMERIC_ENTITY){
+			this._stateInNumericEntity(c);
+		} else if(this._state === IN_HEX_ENTITY){
+			this._stateInHexEntity(c);
+		}
+
+		else {
+			this._cbs.onerror(Error("unknown _state"), this._state);
+		}
+
+		this._index++;
+	}
+
+	this._cleanup();
+};
+
+Tokenizer.prototype.pause = function(){
+	this._running = false;
+};
+Tokenizer.prototype.resume = function(){
+	this._running = true;
+
+	if(this._index < this._buffer.length){
+		this._parse();
+	}
+	if(this._ended){
+		this._finish();
+	}
+};
+
+Tokenizer.prototype.end = function(chunk){
+	if(this._ended) this._cbs.onerror(Error(".end() after done!"));
+	if(chunk) this.write(chunk);
+
+	this._ended = true;
+
+	if(this._running) this._finish();
+};
+
+Tokenizer.prototype._finish = function(){
+	//if there is remaining data, emit it in a reasonable way
+	if(this._sectionStart < this._index){
+		this._handleTrailingData();
+	}
+
+	this._cbs.onend();
+};
+
+Tokenizer.prototype._handleTrailingData = function(){
+	var data = this._buffer.substr(this._sectionStart);
+
+	if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
+		this._cbs.oncdata(data);
+	} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
+		this._cbs.oncomment(data);
+	} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
+		this._parseLegacyEntity();
+		if(this._sectionStart < this._index){
+			this._state = this._baseState;
+			this._handleTrailingData();
+		}
+	} else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
+		this._decodeNumericEntity(2, 10);
+		if(this._sectionStart < this._index){
+			this._state = this._baseState;
+			this._handleTrailingData();
+		}
+	} else if(this._state === IN_HEX_ENTITY && !this._xmlMode){
+		this._decodeNumericEntity(3, 16);
+		if(this._sectionStart < this._index){
+			this._state = this._baseState;
+			this._handleTrailingData();
+		}
+	} else if(
+		this._state !== IN_TAG_NAME &&
+		this._state !== BEFORE_ATTRIBUTE_NAME &&
+		this._state !== BEFORE_ATTRIBUTE_VALUE &&
+		this._state !== AFTER_ATTRIBUTE_NAME &&
+		this._state !== IN_ATTRIBUTE_NAME &&
+		this._state !== IN_ATTRIBUTE_VALUE_SQ &&
+		this._state !== IN_ATTRIBUTE_VALUE_DQ &&
+		this._state !== IN_ATTRIBUTE_VALUE_NQ &&
+		this._state !== IN_CLOSING_TAG_NAME
+	){
+		this._cbs.ontext(data);
+	}
+	//else, ignore remaining data
+	//TODO add a way to remove current tag
+};
+
+Tokenizer.prototype.reset = function(){
+	Tokenizer.call(this, {xmlMode: this._xmlMode, decodeEntities: this._decodeEntities}, this._cbs);
+};
+
+Tokenizer.prototype.getAbsoluteIndex = function(){
+	return this._bufferOffset + this._index;
+};
+
+Tokenizer.prototype._getSection = function(){
+	return this._buffer.substring(this._sectionStart, this._index);
+};
+
+Tokenizer.prototype._emitToken = function(name){
+	this._cbs[name](this._getSection());
+	this._sectionStart = -1;
+};
+
+Tokenizer.prototype._emitPartial = function(value){
+	if(this._baseState !== TEXT){
+		this._cbs.onattribdata(value); //TODO implement the new event
+	} else {
+		this._cbs.ontext(value);
+	}
+};
diff --git a/lib/WritableStream.js b/lib/WritableStream.js
new file mode 100644
index 0000000..9868f8a
--- /dev/null
+++ b/lib/WritableStream.js
@@ -0,0 +1,25 @@
+module.exports = Stream;
+
+var Parser = require("./Parser.js"),
+    WritableStream = require("stream").Writable || require("readable-stream").Writable,
+    StringDecoder = require("string_decoder").StringDecoder,
+    Buffer = require("buffer").Buffer;
+
+function Stream(cbs, options){
+	var parser = this._parser = new Parser(cbs, options);
+	var decoder = this._decoder = new StringDecoder();
+
+	WritableStream.call(this, {decodeStrings: false});
+
+	this.once("finish", function(){
+		parser.end(decoder.end());
+	});
+}
+
+require("inherits")(Stream, WritableStream);
+
+WritableStream.prototype._write = function(chunk, encoding, cb){
+	if(chunk instanceof Buffer) chunk = this._decoder.write(chunk);
+	this._parser.write(chunk);
+	cb();
+};
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
new file mode 100644
index 0000000..880f57e
--- /dev/null
+++ b/lib/index.js
@@ -0,0 +1,68 @@
+var Parser = require("./Parser.js"),
+    DomHandler = require("domhandler");
+
+function defineProp(name, value){
+	delete module.exports[name];
+	module.exports[name] = value;
+	return value;
+}
+
+module.exports = {
+	Parser: Parser,
+	Tokenizer: require("./Tokenizer.js"),
+	ElementType: require("domelementtype"),
+	DomHandler: DomHandler,
+	get FeedHandler(){
+		return defineProp("FeedHandler", require("./FeedHandler.js"));
+	},
+	get Stream(){
+		return defineProp("Stream", require("./Stream.js"));
+	},
+	get WritableStream(){
+		return defineProp("WritableStream", require("./WritableStream.js"));
+	},
+	get ProxyHandler(){
+		return defineProp("ProxyHandler", require("./ProxyHandler.js"));
+	},
+	get DomUtils(){
+		return defineProp("DomUtils", require("domutils"));
+	},
+	get CollectingHandler(){
+		return defineProp("CollectingHandler", require("./CollectingHandler.js"));
+	},
+	// For legacy support
+	DefaultHandler: DomHandler,
+	get RssHandler(){
+		return defineProp("RssHandler", this.FeedHandler);
+	},
+	//helper methods
+	parseDOM: function(data, options){
+		var handler = new DomHandler(options);
+		new Parser(handler, options).end(data);
+		return handler.dom;
+	},
+	parseFeed: function(feed, options){
+		var handler = new module.exports.FeedHandler(options);
+		new Parser(handler, options).end(feed);
+		return handler.dom;
+	},
+	createDomStream: function(cb, options, elementCb){
+		var handler = new DomHandler(cb, options, elementCb);
+		return new Parser(handler, options);
+	},
+	// List of all events that the parser emits
+	EVENTS: { /* Format: eventname: number of arguments */
+		attribute: 2,
+		cdatastart: 0,
+		cdataend: 0,
+		text: 1,
+		processinginstruction: 2,
+		comment: 1,
+		commentend: 0,
+		closetag: 1,
+		opentag: 2,
+		opentagname: 1,
+		error: 1,
+		end: 0
+	}
+};
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..95f2d54
--- /dev/null
+++ b/package.json
@@ -0,0 +1,56 @@
+{
+  "name": "htmlparser2",
+  "description": "Fast & forgiving HTML/XML/RSS parser",
+  "version": "3.9.2",
+  "author": "Felix Boehm <me at feedic.com>",
+  "keywords": [
+    "html",
+    "parser",
+    "streams",
+    "xml",
+    "dom",
+    "rss",
+    "feed",
+    "atom"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/fb55/htmlparser2.git"
+  },
+  "bugs": {
+    "mail": "me at feedic.com",
+    "url": "http://github.com/fb55/htmlparser2/issues"
+  },
+  "directories": {
+    "lib": "lib/"
+  },
+  "main": "lib/index.js",
+  "files": [
+    "lib"
+  ],
+  "scripts": {
+    "lcov": "istanbul cover _mocha --report lcovonly -- -R spec",
+    "coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)",
+    "test": "mocha && npm run lint",
+    "lint": "eslint lib test"
+  },
+  "dependencies": {
+    "domelementtype": "^1.3.0",
+    "domhandler": "^2.3.0",
+    "domutils": "^1.5.1",
+    "entities": "^1.1.1",
+    "inherits": "^2.0.1",
+    "readable-stream": "^2.0.2"
+  },
+  "devDependencies": {
+    "coveralls": "^2.11.4",
+    "istanbul": "^0.4.3",
+    "mocha": "^2.2.5",
+    "eslint": "^2.12.0",
+    "mocha-lcov-reporter": "^1.2.0"
+  },
+  "browser": {
+    "readable-stream": false
+  },
+  "license": "MIT"
+}
diff --git a/test/01-events.js b/test/01-events.js
new file mode 100644
index 0000000..a3c7cf3
--- /dev/null
+++ b/test/01-events.js
@@ -0,0 +1,9 @@
+var helper = require("./test-helper.js");
+
+helper.mochaTest("Events", __dirname, function(test, cb){
+	helper.writeToParser(
+		helper.getEventCollector(cb),
+		test.options.parser,
+		test.html
+	);
+});
\ No newline at end of file
diff --git a/test/02-stream.js b/test/02-stream.js
new file mode 100644
index 0000000..fe6044d
--- /dev/null
+++ b/test/02-stream.js
@@ -0,0 +1,23 @@
+var helper = require("./test-helper.js"),
+    Stream = require("..").WritableStream,
+    fs = require("fs"),
+    path = require("path");
+
+helper.mochaTest("Stream", __dirname, function(test, cb){
+	var filePath = path.join(__dirname, "Documents", test.file);
+	fs.createReadStream(filePath).pipe(
+		new Stream(
+			helper.getEventCollector(function(err, events){
+				cb(err, events);
+
+				var handler = helper.getEventCollector(cb),
+				    stream = new Stream(handler, test.options);
+
+				fs.readFile(filePath, function(err, data){
+					if(err) throw err;
+					else stream.end(data);
+				});
+			}
+		), test.options)
+	).on("error", cb);
+});
\ No newline at end of file
diff --git a/test/03-feed.js b/test/03-feed.js
new file mode 100644
index 0000000..7849953
--- /dev/null
+++ b/test/03-feed.js
@@ -0,0 +1,19 @@
+//Runs tests for feeds
+
+var helper = require("./test-helper.js"),
+    FeedHandler = require("..").RssHandler,
+    fs = require("fs"),
+    path = require("path");
+
+helper.mochaTest("Feeds", __dirname, function(test, cb){
+	fs.readFile(
+		path.join(__dirname, "Documents", test.file),
+		function(err, file){
+			helper.writeToParser(
+				new FeedHandler(cb),
+				{ xmlMode: true },
+				file.toString()
+			);
+		}
+	);
+});
\ No newline at end of file
diff --git a/test/Documents/Atom_Example.xml b/test/Documents/Atom_Example.xml
new file mode 100644
index 0000000..f836380
--- /dev/null
+++ b/test/Documents/Atom_Example.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- http://en.wikipedia.org/wiki/Atom_%28standard%29 -->
+<feed xmlns="http://www.w3.org/2005/Atom">
+	<title>Example Feed</title>
+	<subtitle>A subtitle.</subtitle>
+	<link href="http://example.org/feed/" rel="self" />
+	<link href="http://example.org/" />
+	<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
+	<updated>2003-12-13T18:30:02Z</updated>
+	<author>
+		<name>John Doe</name>
+		<email>johndoe at example.com</email>
+	</author>
+
+	<entry>
+		<title>Atom-Powered Robots Run Amok</title>
+		<link href="http://example.org/2003/12/13/atom03" />
+		<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
+		<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
+		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<content type="html"><p>Some content.</p></content>
+	</entry>
+
+</feed>
diff --git a/test/Documents/Attributes.html b/test/Documents/Attributes.html
new file mode 100644
index 0000000..f3bfa09
--- /dev/null
+++ b/test/Documents/Attributes.html
@@ -0,0 +1,16 @@
+<!doctype html>
+<html>
+<head>
+	<title>Attributes test</title>
+</head>
+<body>
+	<!-- Normal attributes -->
+	<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>
+
+	<!-- Attributes with no quotes or value -->
+	<button id="test1" class=value2 disabled>class=value2 disabled</button>
+
+	<!-- Attributes with no space between them. No valid, but accepted by the browser -->
+	<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>
+</body>
+</html>
\ No newline at end of file
diff --git a/test/Documents/Basic.html b/test/Documents/Basic.html
new file mode 100644
index 0000000..65957a2
--- /dev/null
+++ b/test/Documents/Basic.html
@@ -0,0 +1 @@
+<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>
\ No newline at end of file
diff --git a/test/Documents/RDF_Example.xml b/test/Documents/RDF_Example.xml
new file mode 100644
index 0000000..b76dc37
--- /dev/null
+++ b/test/Documents/RDF_Example.xml
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">
+	<channel rdf:about="https://github.com/fb55/htmlparser2/">
+		<title>A title to parse and remember</title>
+		<link>https://github.com/fb55/htmlparser2/</link>
+		<description/>
+		<dc:language>en-us</dc:language>
+		<dc:rights>Copyright 2015 the authors</dc:rights>
+		<dc:publisher>webmaster at thisisafakedoma.in</dc:publisher>
+		<dc:creator>webmaster at thisisafakedoma.in</dc:creator>
+		<dc:source>https://github.com/fb55/htmlparser2/</dc:source>
+		<dc:title>A title to parse and remember</dc:title>
+		<dc:type>Collection</dc:type>
+		<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>
+		<syn:updateFrequency>4</syn:updateFrequency>
+		<syn:updatePeriod>hourly</syn:updatePeriod>
+		<items>
+			<rdf:Seq>
+				<rdf:li rdf:resource="http://somefakesite/path/to/something.html"/>
+			</rdf:Seq>
+		</items>
+	</channel>
+	<item rdf:about="http://somefakesite/path/to/something.html">
+		<title><![CDATA[ Fast HTML Parsing ]]></title>
+		<link>
+http://somefakesite/path/to/something.html
+</link>
+		<description><![CDATA[
+Great test content<br>A link: <a href="http://github.com">Github</a>
+]]></description>
+		<dc:date>2011-11-04T09:35:17-07:00</dc:date>
+		<dc:language>en-us</dc:language>
+		<dc:rights>Copyright 2015 the authors</dc:rights>
+		<dc:source>
+http://somefakesite/path/to/something.html
+</dc:source>
+		<dc:title><![CDATA[ Fast HTML Parsing ]]></dc:title>
+		<dc:type>text</dc:type>
+		<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>
+	</item>
+	<item rdf:about="http://somefakesite/path/to/something-else.html">
+		<title><![CDATA[
+This space intentionally left blank
+]]></title>
+		<link>
+http://somefakesite/path/to/something-else.html
+</link>
+		<description><![CDATA[
+The early bird gets the worm
+]]></description>
+		<dc:date>2011-11-04T09:34:54-07:00</dc:date>
+		<dc:language>en-us</dc:language>
+		<dc:rights>Copyright 2015 the authors</dc:rights>
+		<dc:source>
+http://somefakesite/path/to/something-else.html
+</dc:source>
+		<dc:title><![CDATA[
+This space intentionally left blank
+]]></dc:title>
+		<dc:type>text</dc:type>
+		<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>
+	</item>
+</rdf:RDF>
\ No newline at end of file
diff --git a/test/Documents/RSS_Example.xml b/test/Documents/RSS_Example.xml
new file mode 100644
index 0000000..0d1fde8
--- /dev/null
+++ b/test/Documents/RSS_Example.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0"?>
+<!-- http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
+<rss version="2.0">
+   <channel>
+      <title>Liftoff News</title>
+      <link>http://liftoff.msfc.nasa.gov/</link>
+      <description>Liftoff to Space Exploration.</description>
+      <language>en-us</language>
+      <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
+
+      <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
+      <docs>http://blogs.law.harvard.edu/tech/rss</docs>
+      <generator>Weblog Editor 2.0</generator>
+      <managingEditor>editor at example.com</managingEditor>
+      <webMaster>webmaster at example.com</webMaster>
+      <item>
+
+         <title>Star City</title>
+         <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
+         <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
+         <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
+         <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
+
+      </item>
+      <item>
+         <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>
+         <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
+         <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
+
+      </item>
+      <item>
+         <title>The Engine That Does More</title>
+         <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
+         <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that.</description>
+         <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
+         <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
+
+      </item>
+      <item>
+         <title>Astronauts' Dirty Laundry</title>
+         <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
+         <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options.</description>
+         <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
+         <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
+
+      </item>
+   </channel>
+</rss>
\ No newline at end of file
diff --git a/test/Events/01-simple.json b/test/Events/01-simple.json
new file mode 100644
index 0000000..ab3076a
--- /dev/null
+++ b/test/Events/01-simple.json
@@ -0,0 +1,44 @@
+{
+  "name": "simple",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<h1 class=test>adsf</h1>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "h1"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "h1",
+        {
+          "class": "test"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "adsf"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "h1"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/02-template.json b/test/Events/02-template.json
new file mode 100644
index 0000000..df344b6
--- /dev/null
+++ b/test/Events/02-template.json
@@ -0,0 +1,63 @@
+{
+  "name": "Template script tags",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<p><script type=\"text/template\"><h1>Heading1</h1></script></p>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "p"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "p",
+        {}
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "type",
+        "text/template"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "script",
+        {
+          "type": "text/template"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<h1>Heading1</h1>"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "p"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/03-lowercase_tags.json b/test/Events/03-lowercase_tags.json
new file mode 100644
index 0000000..9b58c59
--- /dev/null
+++ b/test/Events/03-lowercase_tags.json
@@ -0,0 +1,46 @@
+{
+  "name": "Lowercase tags",
+  "options": {
+    "handler": {},
+    "parser": {
+      "lowerCaseTags": true
+    }
+  },
+  "html": "<H1 class=test>adsf</H1>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "h1"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "h1",
+        {
+          "class": "test"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "adsf"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "h1"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/04-cdata.json b/test/Events/04-cdata.json
new file mode 100644
index 0000000..6032b68
--- /dev/null
+++ b/test/Events/04-cdata.json
@@ -0,0 +1,50 @@
+{
+  "name": "CDATA",
+  "options": {
+    "handler": {},
+    "parser": {"xmlMode": true}
+  },
+  "html": "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag><![CD>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "tag"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "tag",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        " asdf ><asdf></adsf><> fo"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "tag"
+      ]
+    },
+    {
+      "event": "processinginstruction",
+      "data": [
+        "![CD",
+        "![CD"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/05-cdata-special.json b/test/Events/05-cdata-special.json
new file mode 100644
index 0000000..686cb1a
--- /dev/null
+++ b/test/Events/05-cdata-special.json
@@ -0,0 +1,35 @@
+{
+  "name": "CDATA (inside special)",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<script>/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/</script>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "script",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "script"
+      ]
+    }
+  ]
+}
diff --git a/test/Events/06-leading-lt.json b/test/Events/06-leading-lt.json
new file mode 100644
index 0000000..fcec852
--- /dev/null
+++ b/test/Events/06-leading-lt.json
@@ -0,0 +1,16 @@
+{
+  "name": "leading lt",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": ">a>",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        ">a>"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/07-self-closing.json b/test/Events/07-self-closing.json
new file mode 100644
index 0000000..49ed93b
--- /dev/null
+++ b/test/Events/07-self-closing.json
@@ -0,0 +1,67 @@
+{
+    "name": "Self-closing tags",
+    "options": {
+        "handler": {
+
+            },
+        "parser": {
+
+            }
+    },
+    "html": "<a href=http://test.com/>Foo</a><hr / >",
+    "expected": [
+		{
+			"event": "opentagname",
+			"data": [
+				"a"
+			]
+		},
+		{
+			"event": "attribute",
+			"data": [
+				"href",
+				"http://test.com/"
+			]
+		},
+		{
+			"event": "opentag",
+			"data": [
+				"a",
+				{
+					"href": "http://test.com/"
+				}
+			]
+		},
+		{
+			"event": "text",
+			"data": [
+				"Foo"
+			]
+		},
+		{
+			"event": "closetag",
+			"data": [
+				"a"
+			]
+		},
+		{
+			"event": "opentagname",
+			"data": [
+				"hr"
+			]
+		},
+		{
+			"event": "opentag",
+			"data": [
+				"hr",
+				{}
+			]
+		},
+		{
+			"event": "closetag",
+			"data": [
+				"hr"
+			]
+		}
+	]
+}
\ No newline at end of file
diff --git a/test/Events/08-implicit-close-tags.json b/test/Events/08-implicit-close-tags.json
new file mode 100644
index 0000000..331e785
--- /dev/null
+++ b/test/Events/08-implicit-close-tags.json
@@ -0,0 +1,71 @@
+{
+  "name": "Implicit close tags",
+  "options": {},
+  "html": "<ol><li class=test><div><table style=width:100%><tr><th>TH<td colspan=2><h3>Heading</h3><tr><td><div>Div</div><td><div>Div2</div></table></div><li><div><h3>Heading 2</h3></div></li></ol><p>Para<h4>Heading 4</h4>",
+  "expected": [
+    { "event": "opentagname", "data": [ "ol" ] },
+    { "event": "opentag", "data": [ "ol", {} ] },
+    { "event": "opentagname", "data": [ "li" ] },
+    { "event": "attribute", "data": [ "class", "test" ] },
+    { "event": "opentag", "data": [ "li", { "class": "test" } ] },
+    { "event": "opentagname", "data": [ "div" ] },
+    { "event": "opentag", "data": [ "div", {} ] },
+    { "event": "opentagname", "data": [ "table" ] },
+    { "event": "attribute", "data": [ "style", "width:100%" ] },
+    { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
+    { "event": "opentagname", "data": [ "tr" ] },
+    { "event": "opentag", "data": [ "tr", {} ] },
+    { "event": "opentagname", "data": [ "th" ] },
+    { "event": "opentag", "data": [ "th", {} ] },
+    { "event": "text", "data": [ "TH" ] },
+    { "event": "closetag", "data": [ "th" ] },
+    { "event": "opentagname", "data": [ "td" ] },
+    { "event": "attribute", "data": [ "colspan", "2" ] },
+    { "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
+    { "event": "opentagname", "data": [ "h3" ] },
+    { "event": "opentag", "data": [ "h3", {} ] },
+    { "event": "text", "data": [ "Heading" ] },
+    { "event": "closetag", "data": [ "h3" ] },
+    { "event": "closetag", "data": [ "td" ] },
+    { "event": "closetag", "data": [ "tr" ] },
+    { "event": "opentagname", "data": [ "tr" ] },
+    { "event": "opentag", "data": [ "tr", {} ] },
+    { "event": "opentagname", "data": [ "td" ] },
+    { "event": "opentag", "data": [ "td", {} ] },
+    { "event": "opentagname", "data": [ "div" ] },
+    { "event": "opentag", "data": [ "div", {} ] },
+    { "event": "text", "data": [ "Div" ] },
+    { "event": "closetag", "data": [ "div" ] },
+    { "event": "closetag", "data": [ "td" ] },
+    { "event": "opentagname", "data": [ "td" ] },
+    { "event": "opentag", "data": [ "td", {} ] },
+    { "event": "opentagname", "data": [ "div" ] },
+    { "event": "opentag", "data": [ "div", {} ] },
+    { "event": "text", "data": [ "Div2" ] },
+    { "event": "closetag", "data": [ "div" ] },
+    { "event": "closetag", "data": [ "td" ] },
+    { "event": "closetag", "data": [ "tr" ] },
+    { "event": "closetag", "data": [ "table" ] },
+    { "event": "closetag", "data": [ "div" ] },
+    { "event": "closetag", "data": [ "li" ] },
+    { "event": "opentagname", "data": [ "li" ] },
+    { "event": "opentag", "data": [ "li", {} ] },
+    { "event": "opentagname", "data": [ "div" ] },
+    { "event": "opentag", "data": [ "div", {} ] },
+    { "event": "opentagname", "data": [ "h3" ] },
+    { "event": "opentag", "data": [ "h3", {} ] },
+    { "event": "text", "data": [ "Heading 2" ] },
+    { "event": "closetag", "data": [ "h3" ] },
+    { "event": "closetag", "data": [ "div" ] },
+    { "event": "closetag", "data": [ "li" ] },
+    { "event": "closetag", "data": [ "ol" ] },
+    { "event": "opentagname", "data": [ "p" ] },
+    { "event": "opentag", "data": [ "p", {} ] },
+    { "event": "text", "data": [ "Para" ] },
+    { "event": "closetag", "data": [ "p" ] },
+    { "event": "opentagname", "data": [ "h4" ] },
+    { "event": "opentag", "data": [ "h4", {} ] },
+    { "event": "text", "data": [ "Heading 4" ] },
+    { "event": "closetag", "data": [ "h4" ] }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/09-attributes.json b/test/Events/09-attributes.json
new file mode 100644
index 0000000..afa6e4a
--- /dev/null
+++ b/test/Events/09-attributes.json
@@ -0,0 +1,68 @@
+{
+  "name": "attributes (no white space, no value, no quotes)",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<button class=\"test0\"title=\"test1\" disabled value=test2>adsf</button>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test0"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "title",
+        "test1"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "disabled",
+        ""
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "value",
+        "test2"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "class": "test0",
+          "title": "test1",
+          "disabled": "",
+          "value": "test2"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "adsf"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "button"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/10-crazy-attrib.json b/test/Events/10-crazy-attrib.json
new file mode 100644
index 0000000..00bad5f
--- /dev/null
+++ b/test/Events/10-crazy-attrib.json
@@ -0,0 +1,52 @@
+{
+  "name": "crazy attribute",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<p < = '' FAIL>stuff</p><a",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "p"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "<",
+        ""
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "fail",
+        ""
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "p",
+        {
+          "<": "",
+          "fail": ""
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "stuff"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "p"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/11-script_in_script.json b/test/Events/11-script_in_script.json
new file mode 100644
index 0000000..ddbb87c
--- /dev/null
+++ b/test/Events/11-script_in_script.json
@@ -0,0 +1,54 @@
+{
+  "name": "Scripts creating other scripts",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<p><script>var str = '<script></'+'script>';</script></p>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "p"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "p",
+        {}
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "script",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "var str = '<script></'+'script>';"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "p"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/12-long-comment-end.json b/test/Events/12-long-comment-end.json
new file mode 100644
index 0000000..e81f307
--- /dev/null
+++ b/test/Events/12-long-comment-end.json
@@ -0,0 +1,20 @@
+{
+  "name": "Long comment ending",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<meta id='before'><!-- text ---><meta id='after'>",
+  "expected": [
+  { "event": "opentagname", "data": [ "meta" ] },
+  { "event": "attribute",   "data": [ "id", "before" ] },
+  { "event": "opentag",     "data": [ "meta", {"id": "before"} ] },
+  { "event": "closetag",    "data": [ "meta" ] },
+  { "event": "comment",     "data": [ " text -" ] },
+  { "event": "commentend",  "data": [] },
+  { "event": "opentagname", "data": [ "meta" ] },
+  { "event": "attribute",   "data": [ "id", "after" ] },
+  { "event": "opentag",     "data": [ "meta", {"id": "after"} ] },
+  { "event": "closetag",    "data": [ "meta" ] }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/13-long-cdata-end.json b/test/Events/13-long-cdata-end.json
new file mode 100644
index 0000000..34b7b41
--- /dev/null
+++ b/test/Events/13-long-cdata-end.json
@@ -0,0 +1,22 @@
+{
+  "name": "Long CDATA ending",
+  "options": {
+    "handler": {},
+    "parser": {"xmlMode": true}
+  },
+  "html": "<before /><tag><![CDATA[ text ]]]></tag><after />",
+  "expected": [
+  { "event": "opentagname", "data": [ "before" ] },
+  { "event": "opentag",     "data": [ "before", {} ] },
+  { "event": "closetag",    "data": [ "before" ] },
+  { "event": "opentagname", "data": [ "tag" ] },
+  { "event": "opentag",     "data": [ "tag", {} ] },
+  { "event": "cdatastart",  "data": [] },
+  { "event": "text",        "data": [ " text ]" ] },
+  { "event": "cdataend",    "data": [] },
+  { "event": "closetag",    "data": [ "tag" ] },
+  { "event": "opentagname", "data": [ "after" ] },
+  { "event": "opentag",     "data": [ "after", {} ] },
+  { "event": "closetag",    "data": [ "after" ] }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/14-implicit-open-tags.json b/test/Events/14-implicit-open-tags.json
new file mode 100644
index 0000000..f02b840
--- /dev/null
+++ b/test/Events/14-implicit-open-tags.json
@@ -0,0 +1,27 @@
+{
+  "name": "Implicit open p and br tags",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<div>Hallo</p>World</br></ignore></div></p></br>",
+  "expected": [
+  	{ "event": "opentagname", "data": [ "div" ] },
+  	{ "event": "opentag",     "data": [ "div", {} ] },
+  	{ "event": "text",        "data": [ "Hallo" ] },
+  	{ "event": "opentagname", "data": [ "p" ] },
+  	{ "event": "opentag",     "data": [ "p", {} ] },
+  	{ "event": "closetag",    "data": [ "p" ] },
+  	{ "event": "text",        "data": [ "World" ] },
+  	{ "event": "opentagname", "data": [ "br" ] },
+  	{ "event": "opentag",     "data": [ "br", {} ] },
+  	{ "event": "closetag",    "data": [ "br" ] },
+  	{ "event": "closetag",    "data": [ "div" ] },
+  	{ "event": "opentagname", "data": [ "p" ] },
+  	{ "event": "opentag",     "data": [ "p", {} ] },
+  	{ "event": "closetag",    "data": [ "p" ] },
+    { "event": "opentagname", "data": [ "br" ] },
+    { "event": "opentag",     "data": [ "br", {} ] },
+    { "event": "closetag",    "data": [ "br" ] }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/15-lt-whitespace.json b/test/Events/15-lt-whitespace.json
new file mode 100644
index 0000000..aae6eb0
--- /dev/null
+++ b/test/Events/15-lt-whitespace.json
@@ -0,0 +1,16 @@
+{
+  "name": "lt followed by whitespace",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "a < b",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "a < b"
+      ]
+    }
+  ]
+}
diff --git a/test/Events/16-double_attribs.json b/test/Events/16-double_attribs.json
new file mode 100644
index 0000000..bed1d8f
--- /dev/null
+++ b/test/Events/16-double_attribs.json
@@ -0,0 +1,45 @@
+{
+  "name": "double attribute",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<h1 class=test class=boo></h1>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "h1"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "boo"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "h1",
+        {
+          "class": "test"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "h1"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/17-numeric_entities.json b/test/Events/17-numeric_entities.json
new file mode 100644
index 0000000..23e0b26
--- /dev/null
+++ b/test/Events/17-numeric_entities.json
@@ -0,0 +1,16 @@
+{
+  "name": "numeric entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "&#x61;&#x62c&#100&#x66g&#x;&#x68",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "abcdfg&#x;h"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/18-legacy_entities.json b/test/Events/18-legacy_entities.json
new file mode 100644
index 0000000..5f34e5b
--- /dev/null
+++ b/test/Events/18-legacy_entities.json
@@ -0,0 +1,16 @@
+{
+  "name": "legacy entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "&AMPel&iacutee&eer;s&lter",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "&el\u00EDe&eer;s<er"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/19-named_entities.json b/test/Events/19-named_entities.json
new file mode 100644
index 0000000..d9068d5
--- /dev/null
+++ b/test/Events/19-named_entities.json
@@ -0,0 +1,16 @@
+{
+  "name": "named entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "&el<er∳foo&bar",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "&el<er\u2233foo&bar"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/20-xml_entities.json b/test/Events/20-xml_entities.json
new file mode 100644
index 0000000..ce82300
--- /dev/null
+++ b/test/Events/20-xml_entities.json
@@ -0,0 +1,16 @@
+{
+  "name": "xml entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true, "xmlMode": true}
+  },
+  "html": "&>&amp<ü&#x61;&#x62c&#100&#101",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "&>&amp<üa&#x62c&#100&#101"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/21-entity_in_attribute.json b/test/Events/21-entity_in_attribute.json
new file mode 100644
index 0000000..e0a3195
--- /dev/null
+++ b/test/Events/21-entity_in_attribute.json
@@ -0,0 +1,38 @@
+{
+  "name": "entity in attribute",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "<a href='http://example.com/p&#x61;ge?param=value&param2&param3=<val&; & &'>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "a"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.com/page?param=value&param2&param3=<val&; & &"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "a",
+        {
+          "href": "http://example.com/page?param=value&param2&param3=<val&; & &"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "a"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/22-double_brackets.json b/test/Events/22-double_brackets.json
new file mode 100644
index 0000000..38a513b
--- /dev/null
+++ b/test/Events/22-double_brackets.json
@@ -0,0 +1,41 @@
+{
+  "name": "double brackets",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<<princess-purpose>>testing</princess-purpose>",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "princess-purpose"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "princess-purpose",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        ">testing"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "princess-purpose"
+      ]
+    }
+  ]
+}
diff --git a/test/Events/23-legacy_entity_fail.json b/test/Events/23-legacy_entity_fail.json
new file mode 100644
index 0000000..4b4320b
--- /dev/null
+++ b/test/Events/23-legacy_entity_fail.json
@@ -0,0 +1,16 @@
+{
+  "name": "legacy entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "M&M",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "M&M"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/24-special_special.json b/test/Events/24-special_special.json
new file mode 100644
index 0000000..e80731f
--- /dev/null
+++ b/test/Events/24-special_special.json
@@ -0,0 +1,133 @@
+{
+  "name": "Special special tags",
+  "options": {},
+  "html": "<sCriPT></scripter</soo</sCript><STyLE></styler</STylE><sCiPt><stylee><scriptee><soo>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "script",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "</scripter</soo"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "style"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "style",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "</styler"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "style"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "scipt"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "scipt",
+        {}
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "stylee"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "stylee",
+        {}
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "scriptee"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "scriptee",
+        {}
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "soo"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "soo",
+        {}
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "soo"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "scriptee"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "stylee"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "scipt"
+      ]
+    }
+  ]
+}
diff --git a/test/Events/25-empty_tag_name.json b/test/Events/25-empty_tag_name.json
new file mode 100644
index 0000000..b3b340c
--- /dev/null
+++ b/test/Events/25-empty_tag_name.json
@@ -0,0 +1,13 @@
+{
+  "name": "Empty tag name",
+  "options": {},
+  "html": "< ></ >",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "< ></ >"
+      ]
+    }
+  ]
+}
diff --git a/test/Events/26-not-quite-closed.json b/test/Events/26-not-quite-closed.json
new file mode 100644
index 0000000..8504440
--- /dev/null
+++ b/test/Events/26-not-quite-closed.json
@@ -0,0 +1,35 @@
+{
+  "name": "Not quite closed",
+  "options": {},
+  "html": "<foo /bar></foo bar>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "foo"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "bar",
+        ""
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "foo",
+        {
+          "bar": ""
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "foo"
+      ]
+    }
+  ]
+}
diff --git a/test/Events/27-entities_in_attributes.json b/test/Events/27-entities_in_attributes.json
new file mode 100644
index 0000000..b03cbdf
--- /dev/null
+++ b/test/Events/27-entities_in_attributes.json
@@ -0,0 +1,62 @@
+{
+  "name": "Entities in attributes",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "<foo bar=& baz=\"&\" boo='&' noo=>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "foo"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "bar",
+        "&"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "baz",
+        "&"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "boo",
+        "&"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "noo",
+        ""
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "foo",
+        {
+          "bar": "&",
+          "baz": "&",
+          "boo": "&",
+          "noo": ""
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "foo"
+      ]
+    }
+  ]
+}
diff --git a/test/Events/28-cdata_in_html.json b/test/Events/28-cdata_in_html.json
new file mode 100644
index 0000000..80c033b
--- /dev/null
+++ b/test/Events/28-cdata_in_html.json
@@ -0,0 +1,9 @@
+{
+  "name": "CDATA in HTML",
+  "options": {},
+  "html": "<![CDATA[ foo ]]>",
+  "expected": [
+    { "event": "comment",     "data": [ "[CDATA[ foo ]]" ] },
+    { "event": "commentend",  "data": [] }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/29-comment_edge-cases.json b/test/Events/29-comment_edge-cases.json
new file mode 100644
index 0000000..9d9709a
--- /dev/null
+++ b/test/Events/29-comment_edge-cases.json
@@ -0,0 +1,18 @@
+{
+  "name": "Comment edge-cases",
+  "options": {},
+  "html": "<!-foo><!-- --- --><!--foo",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "!-foo",
+        "!-foo"
+      ]
+    },
+    { "event": "comment",     "data": [ " --- " ] },
+    { "event": "commentend",  "data": [] },
+    { "event": "comment",     "data": [ "foo" ] },
+    { "event": "commentend",  "data": [] }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/30-cdata_edge-cases.json b/test/Events/30-cdata_edge-cases.json
new file mode 100644
index 0000000..d226f09
--- /dev/null
+++ b/test/Events/30-cdata_edge-cases.json
@@ -0,0 +1,22 @@
+{
+  "name": "CDATA edge-cases",
+  "options": {
+    "parser": {"recognizeCDATA": true}
+  },
+  "html": "<![CDATA><![CDATA[[]]sdaf]]><![CDATA[foo",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "![cdata",
+        "![CDATA"
+      ]
+    },
+    { "event": "cdatastart", "data": [] },
+    { "event": "text",     "data": [ "[]]sdaf" ] },
+    { "event": "cdataend",  "data": [] },
+    { "event": "cdatastart", "data": [] },
+    { "event": "text",     "data": [ "foo" ] },
+    { "event": "cdataend",  "data": [] }
+  ]
+}
\ No newline at end of file
diff --git a/test/Events/31-comment_false-ending.json b/test/Events/31-comment_false-ending.json
new file mode 100644
index 0000000..6658428
--- /dev/null
+++ b/test/Events/31-comment_false-ending.json
@@ -0,0 +1,9 @@
+{
+  "name": "Comment false ending",
+  "options": {},
+  "html": "<!-- a-b-> -->",
+  "expected": [
+    { "event": "comment",     "data": [ " a-b-> " ] },
+    { "event": "commentend",  "data": [] }
+  ]
+}
diff --git a/test/Events/32-script-ending-with-lessthan.json b/test/Events/32-script-ending-with-lessthan.json
new file mode 100644
index 0000000..dcf7690
--- /dev/null
+++ b/test/Events/32-script-ending-with-lessthan.json
@@ -0,0 +1,35 @@
+{
+  "name": "Scripts ending with <",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<script><</script>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "script",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "script"
+      ]
+    }
+  ]
+}
diff --git a/test/Feeds/01-rss.js b/test/Feeds/01-rss.js
new file mode 100644
index 0000000..a3aae47
--- /dev/null
+++ b/test/Feeds/01-rss.js
@@ -0,0 +1,34 @@
+exports.name = "RSS (2.0)";
+exports.file = "/RSS_Example.xml";
+exports.expected = {
+	type: "rss",
+	id: "",
+	title: "Liftoff News",
+	link: "http://liftoff.msfc.nasa.gov/",
+	description: "Liftoff to Space Exploration.",
+	updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
+	author: "editor at example.com",
+	items: [{
+		id: "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573",
+		title: "Star City",
+		link: "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp",
+		description: "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\">Star City</a>.",
+		pubDate: new Date("Tue, 03 Jun 2003 09:39:21 GMT")
+	}, {
+		id: "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572",
+		description: "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\">partial eclipse of the Sun</a> on Saturday, May 31st.",
+		pubDate: new Date("Fri, 30 May 2003 11:06:42 GMT")
+	}, {
+		id: "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571",
+		title: "The Engine That Does More",
+		link: "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp",
+		description: "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that.",
+		pubDate: new Date("Tue, 27 May 2003 08:37:32 GMT")
+	}, {
+		id: "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570",
+		title: "Astronauts' Dirty Laundry",
+		link: "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp",
+		description: "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options.",
+		pubDate: new Date("Tue, 20 May 2003 08:56:02 GMT")
+	}]
+};
\ No newline at end of file
diff --git a/test/Feeds/02-atom.js b/test/Feeds/02-atom.js
new file mode 100644
index 0000000..5b5d88e
--- /dev/null
+++ b/test/Feeds/02-atom.js
@@ -0,0 +1,18 @@
+exports.name = "Atom (1.0)";
+exports.file = "/Atom_Example.xml";
+exports.expected = {
+	type: "atom",
+	id: "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
+	title: "Example Feed",
+	link: "http://example.org/feed/",
+	description: "A subtitle.",
+	updated: new Date("2003-12-13T18:30:02Z"),
+	author: "johndoe at example.com",
+	items: [{
+		id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
+		title: "Atom-Powered Robots Run Amok",
+		link: "http://example.org/2003/12/13/atom03",
+		description: "Some content.",
+		pubDate: new Date("2003-12-13T18:30:02Z")
+	}]
+};
diff --git a/test/Feeds/03-rdf.js b/test/Feeds/03-rdf.js
new file mode 100644
index 0000000..d8f92f5
--- /dev/null
+++ b/test/Feeds/03-rdf.js
@@ -0,0 +1,20 @@
+exports.name = "RDF test";
+exports.file = "/RDF_Example.xml";
+exports.expected = {
+	"type": "rdf",
+	"id": "",
+	"title": "A title to parse and remember",
+	"link": "https://github.com/fb55/htmlparser2/",
+	"items": [
+		{
+			"title": "Fast HTML Parsing",
+			"link": "http://somefakesite/path/to/something.html",
+			"description": "Great test content<br>A link: <a href=\"http://github.com\">Github</a>"
+		},
+		{
+			"title": "This space intentionally left blank",
+			"link": "http://somefakesite/path/to/something-else.html",
+			"description": "The early bird gets the worm"
+		}
+	]
+};
diff --git a/test/Stream/01-basic.json b/test/Stream/01-basic.json
new file mode 100644
index 0000000..e0766e7
--- /dev/null
+++ b/test/Stream/01-basic.json
@@ -0,0 +1,83 @@
+{
+  "name": "Basic html",
+  "options": {},
+  "file": "Basic.html",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "!doctype",
+        "!DOCTYPE html"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "html",
+        {}
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "The Title"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "body"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "body",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Hello world"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "body"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "html"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Stream/02-RSS.json b/test/Stream/02-RSS.json
new file mode 100644
index 0000000..0d5921c
--- /dev/null
+++ b/test/Stream/02-RSS.json
@@ -0,0 +1,1093 @@
+{
+  "name": "RSS feed",
+  "options": {"xmlMode": true},
+  "file": "RSS_Example.xml",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "?xml",
+        "?xml version=\"1.0\"?"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " http://cyber.law.harvard.edu/rss/examples/rss2sample.xml "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "rss"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "version",
+        "2.0"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "rss",
+        {
+          "version": "2.0"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n   "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "channel"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "channel",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Liftoff News"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Liftoff to Space Exploration."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "language"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "language",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "en-us"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "language"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 10 Jun 2003 04:00:00 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "lastBuildDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "lastBuildDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 10 Jun 2003 09:41:01 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "lastBuildDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "docs"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "docs",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://blogs.law.harvard.edu/tech/rss"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "docs"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "generator"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "generator",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Weblog Editor 2.0"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "generator"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "managingEditor"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "managingEditor",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "editor at example.com"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "managingEditor"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "webMaster"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "webMaster",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "webmaster at example.com"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "webMaster"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Star City"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\">Star City</a>."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 03 Jun 2003 09:39:21 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "guid",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\">partial eclipse of the Sun</a> on Saturday, May 31st."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Fri, 30 May 2003 11:06:42 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "guid",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "The Engine That Does More"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 27 May 2003 08:37:32 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "guid",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Astronauts' Dirty Laundry"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 20 May 2003 08:56:02 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "guid",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n   "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "channel"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "rss"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/Stream/03-Atom.json b/test/Stream/03-Atom.json
new file mode 100644
index 0000000..0cbf24e
--- /dev/null
+++ b/test/Stream/03-Atom.json
@@ -0,0 +1,678 @@
+{
+  "name": "Atom feed",
+  "options": {"xmlMode": true},
+  "file": "Atom_Example.xml",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "?xml",
+        "?xml version=\"1.0\" encoding=\"utf-8\"?"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " http://en.wikipedia.org/wiki/Atom_%28standard%29 "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "feed"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns",
+        "http://www.w3.org/2005/Atom"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "feed",
+        {
+          "xmlns": "http://www.w3.org/2005/Atom"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Example Feed"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "subtitle"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "subtitle",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "A subtitle."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "subtitle"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/feed/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rel",
+        "self"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "href": "http://example.org/feed/",
+          "rel": "self"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "href": "http://example.org/"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "id"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "id",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "id"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "updated"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "updated",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2003-12-13T18:30:02Z"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "updated"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "author"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "author",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "name"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "name",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "John Doe"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "name"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "email"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "email",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "johndoe at example.com"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "email"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "author"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "entry"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "entry",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Atom-Powered Robots Run Amok"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/2003/12/13/atom03"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "href": "http://example.org/2003/12/13/atom03"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rel",
+        "alternate"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "type",
+        "text/html"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/2003/12/13/atom03.html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "rel": "alternate",
+          "type": "text/html",
+          "href": "http://example.org/2003/12/13/atom03.html"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rel",
+        "edit"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/2003/12/13/atom03/edit"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "rel": "edit",
+          "href": "http://example.org/2003/12/13/atom03/edit"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "id"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "id",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "id"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "updated"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "updated",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2003-12-13T18:30:02Z"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "updated"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+    "event": "opentagname",
+      "data": [
+        "content"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "type",
+        "html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "content",
+        {
+          "type": "html"
+        }
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "p"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "p",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Some content."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "p"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "content"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "entry"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "feed"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    }
+  ]
+}
diff --git a/test/Stream/04-RDF.json b/test/Stream/04-RDF.json
new file mode 100644
index 0000000..0150eb8
--- /dev/null
+++ b/test/Stream/04-RDF.json
@@ -0,0 +1,1399 @@
+{
+  "name": "RDF feed",
+  "options": {"xmlMode": true},
+  "file": "RDF_Example.xml",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "?xml",
+        "?xml version=\"1.0\" encoding=\"UTF-8\"?"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "rdf:RDF"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:rdf",
+        "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns",
+        "http://purl.org/rss/1.0/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:ev",
+        "http://purl.org/rss/1.0/modules/event/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:content",
+        "http://purl.org/rss/1.0/modules/content/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:taxo",
+        "http://purl.org/rss/1.0/modules/taxonomy/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:dc",
+        "http://purl.org/dc/elements/1.1/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:syn",
+        "http://purl.org/rss/1.0/modules/syndication/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:dcterms",
+        "http://purl.org/dc/terms/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:admin",
+        "http://webns.net/mvcb/"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "rdf:RDF",
+        {
+          "xmlns:rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+          "xmlns": "http://purl.org/rss/1.0/",
+          "xmlns:ev": "http://purl.org/rss/1.0/modules/event/",
+          "xmlns:content": "http://purl.org/rss/1.0/modules/content/",
+          "xmlns:taxo": "http://purl.org/rss/1.0/modules/taxonomy/",
+          "xmlns:dc": "http://purl.org/dc/elements/1.1/",
+          "xmlns:syn": "http://purl.org/rss/1.0/modules/syndication/",
+          "xmlns:dcterms": "http://purl.org/dc/terms/",
+          "xmlns:admin": "http://webns.net/mvcb/"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "channel"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rdf:about",
+        "https://github.com/fb55/htmlparser2/"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "channel",
+        {
+          "rdf:about": "https://github.com/fb55/htmlparser2/"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "A title to parse and remember"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "https://github.com/fb55/htmlparser2/"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:language",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "en-us"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:rights",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Copyright 2015 the authors"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:publisher"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:publisher",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "webmaster at thisisafakedoma.in"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:publisher"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:creator"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:creator",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "webmaster at thisisafakedoma.in"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:creator"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:source",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "https://github.com/fb55/htmlparser2/"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "A title to parse and remember"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:type",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Collection"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "syn:updateBase"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "syn:updateBase",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:39:10-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "syn:updateBase"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "syn:updateFrequency"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "syn:updateFrequency",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "4"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "syn:updateFrequency"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "syn:updatePeriod"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "syn:updatePeriod",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "hourly"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "syn:updatePeriod"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "items"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "items",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "rdf:Seq"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "rdf:Seq",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "rdf:li"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rdf:resource",
+        "http://somefakesite/path/to/something.html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "rdf:li",
+        {
+          "rdf:resource": "http://somefakesite/path/to/something.html"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "rdf:li"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "rdf:Seq"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "items"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "channel"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rdf:about",
+        "http://somefakesite/path/to/something.html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {
+          "rdf:about": "http://somefakesite/path/to/something.html"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        " Fast HTML Parsing "
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nhttp://somefakesite/path/to/something.html\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nGreat test content<br>A link: <a href=\"http://github.com\">Github</a>\n"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:date"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:date",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:35:17-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:date"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:language",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "en-us"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:rights",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Copyright 2015 the authors"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:source",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nhttp://somefakesite/path/to/something.html\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:title",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        " Fast HTML Parsing "
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:type",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "text"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dcterms:issued"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dcterms:issued",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:35:17-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dcterms:issued"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rdf:about",
+        "http://somefakesite/path/to/something-else.html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {
+          "rdf:about": "http://somefakesite/path/to/something-else.html"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nThis space intentionally left blank\n"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nhttp://somefakesite/path/to/something-else.html\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nThe early bird gets the worm\n"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:date"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:date",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:34:54-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:date"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:language",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "en-us"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:rights",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Copyright 2015 the authors"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:source",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nhttp://somefakesite/path/to/something-else.html\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:title",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nThis space intentionally left blank\n"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:type",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "text"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dcterms:issued"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dcterms:issued",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:34:54-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dcterms:issued"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "rdf:RDF"
+      ]
+    }
+  ]
+}
diff --git a/test/Stream/05-Attributes.json b/test/Stream/05-Attributes.json
new file mode 100644
index 0000000..ad364c0
--- /dev/null
+++ b/test/Stream/05-Attributes.json
@@ -0,0 +1,354 @@
+{
+  "name": "Attributes",
+  "options": {},
+  "file": "Attributes.html",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "!doctype",
+        "!doctype html"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "html",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "head"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "head",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Attributes test"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "head"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "body"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "body",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " Normal attributes "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "id",
+        "test0"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "value0"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "title",
+        "value1"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "id": "test0",
+          "class": "value0",
+          "title": "value1"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "class=\"value0\" title=\"value1\""
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n\t"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " Attributes with no quotes or value "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "id",
+        "test1"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "value2"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "disabled",
+        ""
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "id": "test1",
+          "class": "value2",
+          "disabled": ""
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "class=value2 disabled"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n\t"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " Attributes with no space between them. No valid, but accepted by the browser "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "id",
+        "test2"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "value4"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "title",
+        "value5"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "id": "test2",
+          "class": "value4",
+          "title": "value5"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "class=\"value4\"title=\"value5\""
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "body"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "html"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/api.js b/test/api.js
new file mode 100644
index 0000000..a6a76ef
--- /dev/null
+++ b/test/api.js
@@ -0,0 +1,103 @@
+var htmlparser2 = require(".."),
+    assert = require("assert");
+
+describe("API", function(){
+
+	it("should load all modules", function(){
+		var Stream = require("../lib/Stream.js");
+		assert.strictEqual(htmlparser2.Stream, Stream, "should load module");
+		assert.strictEqual(htmlparser2.Stream, Stream, "should load it again (cache)");
+
+		var ProxyHandler = require("../lib/ProxyHandler.js");
+		assert.strictEqual(htmlparser2.ProxyHandler, ProxyHandler, "should load module");
+		assert.strictEqual(htmlparser2.ProxyHandler, ProxyHandler, "should load it again (cache)");
+	});
+
+	it("should work without callbacks", function(){
+		var p = new htmlparser2.Parser(null, {xmlMode: true, lowerCaseAttributeNames: true});
+
+		p.end("<a foo><bar></a><!-- --><![CDATA[]]]><?foo?><!bar><boo/>boohay");
+		p.write("foo");
+
+		//check for an error
+		p.end();
+		var err = false;
+		p._cbs.onerror = function(){ err = true; };
+		p.write("foo");
+		assert(err);
+		err = false;
+		p.end();
+		assert(err);
+
+		p.reset();
+
+		//remove method
+		p._cbs.onopentag = function(){};
+		p.write("<a foo");
+		p._cbs.onopentag = null;
+		p.write(">");
+
+		//pause/resume
+		var processed = false;
+		p._cbs.ontext = function(t){
+			assert.equal(t, "foo");
+			processed = true;
+		};
+		p.pause();
+		p.write("foo");
+		assert(!processed);
+		p.resume();
+		assert(processed);
+		processed = false;
+		p.pause();
+		assert(!processed);
+		p.resume();
+		assert(!processed);
+		p.pause();
+		p.end("foo");
+		assert(!processed);
+		p.resume();
+		assert(processed);
+
+	});
+
+	it("should update the position", function(){
+		var p = new htmlparser2.Parser(null);
+
+		p.write("foo");
+
+		assert.equal(p.startIndex, 0);
+		assert.equal(p.endIndex, 2);
+
+		p.write("<bar>");
+
+		assert.equal(p.startIndex, 3);
+		assert.equal(p.endIndex, 7);
+	});
+
+	it("should update the position when a single tag is spread across multiple chunks", function(){
+		var p = new htmlparser2.Parser(null);
+
+		p.write("<div ");
+		p.write("foo=bar>");
+
+		assert.equal(p.startIndex, 0);
+		assert.equal(p.endIndex, 12);
+	});
+
+	it("should support custom tokenizer", function(){
+		function CustomTokenizer(options, cbs){
+			htmlparser2.Tokenizer.call(this, options, cbs);
+			return this;
+		}
+		CustomTokenizer.prototype = Object.create(htmlparser2.Tokenizer.prototype);
+		CustomTokenizer.prototype.constructor = CustomTokenizer;
+
+		var p = new htmlparser2.Parser({
+			onparserinit: function(parser){
+				assert(parser._tokenizer instanceof CustomTokenizer);
+			}
+		}, { Tokenizer: CustomTokenizer });
+		p.done();
+	});
+});
\ No newline at end of file
diff --git a/test/test-helper.js b/test/test-helper.js
new file mode 100644
index 0000000..3f39bf5
--- /dev/null
+++ b/test/test-helper.js
@@ -0,0 +1,83 @@
+var htmlparser2 = require(".."),
+    fs = require("fs"),
+    path = require("path"),
+    assert = require("assert"),
+    Parser = htmlparser2.Parser,
+    CollectingHandler = htmlparser2.CollectingHandler;
+
+exports.writeToParser = function(handler, options, data){
+	var parser = new Parser(handler, options);
+	//first, try to run the test via chunks
+	for(var i = 0; i < data.length; i++){
+		parser.write(data.charAt(i));
+	}
+	parser.end();
+	//then parse everything
+	parser.parseComplete(data);
+};
+
+//returns a tree structure
+exports.getEventCollector = function(cb){
+	var handler = new CollectingHandler({onerror: cb, onend: onend});
+
+	return handler;
+
+	function onend(){
+		cb(null, handler.events.reduce(eventReducer, []));
+	}
+};
+
+function eventReducer(events, arr){
+	if(arr[0] === "onerror" || arr[0] === "onend");
+	else if(arr[0] === "ontext" && events.length && events[events.length - 1].event === "text"){
+		events[events.length - 1].data[0] += arr[1];
+	} else {
+		events.push({
+			event: arr[0].substr(2),
+			data: arr.slice(1)
+		});
+	}
+
+	return events;
+}
+
+function getCallback(expected, done){
+	var repeated = false;
+
+	return function(err, actual){
+		assert.ifError(err);
+		try {
+			assert.deepEqual(expected, actual, "didn't get expected output");
+		} catch(e){
+			e.expected = JSON.stringify(expected, null, 2);
+			e.actual = JSON.stringify(actual, null, 2);
+			throw e;
+		}
+
+		if(repeated) done();
+		else repeated = true;
+	};
+}
+
+exports.mochaTest = function(name, root, test){
+	describe(name, readDir);
+
+	function readDir(){
+		var dir = path.join(root, name);
+
+		fs
+		.readdirSync(dir)
+		.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
+		.map(function(name){
+			return path.join(dir, name);
+		})
+		.map(require)
+		.forEach(runTest);
+	}
+
+	function runTest(file){
+		it(file.name, function(done){
+			test(file, getCallback(file.expected, done));
+		});
+	}
+};
diff --git a/test/unicode.js b/test/unicode.js
new file mode 100644
index 0000000..602b4ca
--- /dev/null
+++ b/test/unicode.js
@@ -0,0 +1,21 @@
+var htmlparser2 = require(".."),
+    assert = require("assert");
+
+describe("WritableStream", function(){
+
+	it("should decode fragmented unicode characters", function(){
+		var processed = false;
+		var stream = new htmlparser2.WritableStream({
+			ontext: function(text){
+				assert.equal(text, "€");
+				processed = true;
+			}
+		});
+
+		stream.write(new Buffer([0xE2, 0x82]));
+		stream.write(new Buffer([0xAC]));
+		stream.end();
+
+		assert(processed);
+	});
+});
\ No newline at end of file

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-htmlparser2_new.git



More information about the Pkg-javascript-commits mailing list