[Pkg-javascript-commits] [node-expat] 47/371: Imported Upstream version 1.2.0

Jonas Smedegaard dr at jones.dk
Sun Feb 28 09:59:44 UTC 2016


This is an automated email from the git hooks/post-receive script.

js pushed a commit to branch master
in repository node-expat.

commit 89528d4303f475c4c4cfe0381540f5a05134a430
Author: Jonas Smedegaard <dr at jones.dk>
Date:   Wed Jan 26 22:25:54 2011 +0100

    Imported Upstream version 1.2.0
---
 LICENSE         |  20 ++++
 README.markdown |  46 +++++++++
 bench.js        |  40 ++++++++
 install.sh      |   4 +
 node-expat.cc   | 303 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 package.json    |  18 ++++
 test.js         | 133 +++++++++++++++++++++++++
 wscript         |  16 +++
 8 files changed, 580 insertions(+)

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..907e293
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,20 @@
+Copyright (c) 2010 Stephan Maka
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
diff --git a/README.markdown b/README.markdown
new file mode 100644
index 0000000..573daaa
--- /dev/null
+++ b/README.markdown
@@ -0,0 +1,46 @@
+# node-expat #
+
+## Motivation ##
+
+You use [node.js](http://github.com/ry/node) for speed? You process
+XML streams? Then you want the fastest XML parser: [libexpat](http://expat.sourceforge.net/)!
+
+## Speed ##
+
+A stupid speed test is supplied in `bench.js`. We measure how many
+25-byte elements a SAX parser can process:
+
+- [node-xml](http://github.com/robrighter/node-xml) (pure JavaScript): 23,000 el/s
+- [libxmljs](http://github.com/polotek/libxmljs) (libxml2 binding): 77,000 el/s
+- [node-expat](http://github.com/astro/node-expat) (libexpat binding, this): 113,000 el/s
+
+These numbers were recorded on a Core 2 2400 MHz and may turn out to
+be bullshit, given my few node.js experience.
+
+## Instructions ##
+
+    node-waf configure
+    node-waf build
+
+For using the library, make sure `build/default/expat.node` is in
+either `$NODE_PATH` or `require.paths`.
+
+Important events emitted by a parser:
+
+- *startElement* with `name, attrs`
+- *endElement* with `name`
+- *text* with `string`
+
+There are more. Use `test.js` for reference.
+
+## Error handling ##
+
+We don't emit an error event because libexpat doesn't use a callback
+either. Instead, check that `parse()` returns `true`. A descriptive
+string can be obtained via `getError()` to provide user feedback.
+
+## Namespace handling ##
+
+A word about special parsing of *xmlns:* this is not neccessary in a
+bare SAX parser like this, given that the DOM replacement you are
+using (if any) is not relevant to the parser.
diff --git a/bench.js b/bench.js
new file mode 100644
index 0000000..4db66f0
--- /dev/null
+++ b/bench.js
@@ -0,0 +1,40 @@
+var sys = require('sys');
+var node_xml = require("node-xml");
+var libxml = require("libxmljs");
+var expat = require('./build/default/node-expat');
+
+function NodeXmlParser() {
+    var parser = new node_xml.SaxParser(function(cb) { });
+    this.parse = function(s) {
+	parser.parseString(s);
+    };
+}
+function LibXmlJsParser() {
+    var parser = new libxml.SaxPushParser(function(cb) { });
+    this.parse = function(s) {
+	parser.push(s, false);
+    };
+}
+function ExpatParser() {
+    var parser = new expat.Parser();
+    this.parse = function(s) {
+	parser.parse(s, false);
+    };
+}
+
+//var p = new NodeXmlParser();
+//var p = new LibXmlJsParser();
+var p = new ExpatParser();
+p.parse("<r>");
+var nEl = 0;
+function d() {
+    p.parse("<foo bar='baz'>quux</foo>");
+    nEl++;
+    setTimeout(d, 0);
+}
+d();
+
+setInterval(function() {
+    sys.puts(nEl + " el/s");
+    nEl = 0;
+}, 1000);
\ No newline at end of file
diff --git a/install.sh b/install.sh
new file mode 100755
index 0000000..8c8a895
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+node-waf configure
+node-waf build
diff --git a/node-expat.cc b/node-expat.cc
new file mode 100644
index 0000000..3f16237
--- /dev/null
+++ b/node-expat.cc
@@ -0,0 +1,303 @@
+#include <node.h>
+#include <node_version.h>
+#include <node_events.h>
+#include <node_buffer.h>
+extern "C" {
+#include <expat.h>
+}
+
+using namespace v8;
+using namespace node;
+
+static Persistent<String> sym_startElement, sym_endElement,
+  sym_startCdata, sym_endCdata,
+  sym_text, sym_processingInstruction,
+  sym_comment, sym_xmlDecl;
+
+class Parser : public EventEmitter {
+public:
+  static void Initialize(Handle<Object> target)
+  {
+    HandleScope scope;
+    Local<FunctionTemplate> t = FunctionTemplate::New(New);
+
+    t->Inherit(EventEmitter::constructor_template);
+    t->InstanceTemplate()->SetInternalFieldCount(1);
+
+    NODE_SET_PROTOTYPE_METHOD(t, "parse", Parse);
+    NODE_SET_PROTOTYPE_METHOD(t, "setEncoding", SetEncoding);
+    NODE_SET_PROTOTYPE_METHOD(t, "getError", GetError);
+
+    target->Set(String::NewSymbol("Parser"), t->GetFunction());
+
+    sym_startElement = NODE_PSYMBOL("startElement");
+    sym_endElement = NODE_PSYMBOL("endElement");
+    sym_startCdata = NODE_PSYMBOL("startCdata");
+    sym_endCdata = NODE_PSYMBOL("endCdata");
+    sym_text = NODE_PSYMBOL("text");
+    sym_processingInstruction = NODE_PSYMBOL("processingInstruction");
+    sym_comment = NODE_PSYMBOL("comment");
+    sym_xmlDecl = NODE_PSYMBOL("xmlDecl");
+  }
+
+protected:
+  /*** Constructor ***/
+
+  static Handle<Value> New(const Arguments& args)
+  {
+    HandleScope scope;
+    XML_Char *encoding = NULL;
+    if (args.Length() == 1 && args[0]->IsString())
+      {
+        encoding = new XML_Char[32];
+        args[0]->ToString()->WriteAscii(encoding, 0, 32);
+      }
+
+    Parser *parser = new Parser(encoding);
+    if (encoding)
+      delete[] encoding;
+    parser->Wrap(args.This());
+    return args.This();
+  }
+
+  Parser(const XML_Char *encoding)
+    : EventEmitter()
+  {
+    parser = XML_ParserCreate(encoding);
+    assert(parser != NULL);
+
+    XML_SetUserData(parser, this);
+    XML_SetElementHandler(parser, StartElement, EndElement);
+    XML_SetCharacterDataHandler(parser, Text);
+    XML_SetCdataSectionHandler(parser, StartCdata, EndCdata);
+    XML_SetProcessingInstructionHandler(parser, ProcessingInstruction);
+    XML_SetCommentHandler(parser, Comment);
+    XML_SetXmlDeclHandler(parser, XmlDecl);
+  }
+
+  ~Parser()
+  {
+    XML_ParserFree(parser);
+  }
+
+  /*** parse() ***/
+
+  static Handle<Value> Parse(const Arguments& args)
+  {
+    Parser *parser = ObjectWrap::Unwrap<Parser>(args.This());
+    HandleScope scope;
+    Local<String> str;
+    int isFinal = 0;
+
+    /* Argument 2: isFinal :: Bool */
+    if (args.Length() >= 2)
+      {
+        isFinal = args[1]->IsTrue();
+      }
+
+    /* Argument 1: buf :: String or Buffer */
+    if (args.Length() >= 1 && args[0]->IsString())
+      {
+        str = args[0]->ToString();
+        return scope.Close(parser->parseString(**str, isFinal) ? True() : False());
+      }
+    else if (args.Length() >= 1 && args[0]->IsObject())
+      {
+        Local<Object> obj = args[0]->ToObject();
+        if (Buffer::HasInstance(obj))
+        {
+#if NODE_MAJOR_VERSION == 0 && NODE_MINOR_VERSION < 3
+          Buffer *buffer = ObjectWrap::Unwrap<Buffer>(obj);
+          return scope.Close(parser->parseBuffer(*buffer, isFinal) ? True() : False());
+#else
+          return scope.Close(parser->parseBuffer(obj, isFinal) ? True() : False());
+#endif
+        }
+        else
+          return ThrowException(
+            Exception::TypeError(
+              String::New("Parse buffer must be String or Buffer")));
+      }
+    else
+      return ThrowException(
+        Exception::TypeError(
+          String::New("Parse buffer must be String or Buffer")));
+  }
+
+  /** Parse a v8 String by first writing it to the expat parser's
+      buffer */
+  bool parseString(String &str, int isFinal)
+  {
+    int len = str.Utf8Length();
+    void *buf = XML_GetBuffer(parser, len);
+    assert(buf != NULL);
+    assert(str.WriteUtf8(static_cast<char *>(buf), len) == len);
+
+    return XML_ParseBuffer(parser, len, isFinal) != XML_STATUS_ERROR;
+  }
+
+  /** Parse a node.js Buffer directly */
+#if NODE_MAJOR_VERSION == 0 && NODE_MINOR_VERSION < 3
+  bool parseBuffer(Buffer &buffer, int isFinal)
+  {
+    return XML_Parse(parser, buffer.data(), buffer.length(), isFinal) != XML_STATUS_ERROR;
+  }
+#else
+  bool parseBuffer(Local<Object> buffer, int isFinal)
+  {
+    return XML_Parse(parser, Buffer::Data(buffer), Buffer::Length(buffer), isFinal) != XML_STATUS_ERROR;
+  }
+#endif
+
+  /*** setEncoding() ***/
+
+  static Handle<Value> SetEncoding(const Arguments& args)
+  {
+    Parser *parser = ObjectWrap::Unwrap<Parser>(args.This());
+    HandleScope scope;
+
+    if (args.Length() == 1 && args[0]->IsString())
+      {
+        XML_Char *encoding = new XML_Char[32];
+        args[0]->ToString()->WriteAscii(encoding, 0, 32);
+
+        int status = parser->setEncoding(encoding);
+
+        delete[] encoding;
+
+        return scope.Close(status ? True() : False());
+      }
+    else
+      return False();
+  }
+
+  int setEncoding(XML_Char *encoding)
+  {
+    return XML_SetEncoding(parser, encoding) != 0;
+  }
+
+  /*** getError() ***/
+
+  static Handle<Value> GetError(const Arguments& args)
+  {
+    HandleScope scope;
+    Parser *parser = ObjectWrap::Unwrap<Parser>(args.This());
+
+    const XML_LChar *error = parser->getError();
+    if (error)
+      return scope.Close(String::New(error));
+    else
+      return scope.Close(Null());
+  }
+
+  const XML_LChar *getError()
+  {
+    enum XML_Error code;
+    code = XML_GetErrorCode(parser);
+    return XML_ErrorString(code);
+  }
+
+private:
+  /* expat instance */
+  XML_Parser parser;
+
+  /* no default ctor */
+  Parser();
+
+  /*** SAX callbacks ***/
+  /* Should a local HandleScope be used in those callbacks? */
+
+  static void StartElement(void *userData,
+                           const XML_Char *name, const XML_Char **atts)
+  {
+    Parser *parser = reinterpret_cast<Parser *>(userData);
+
+    /* Collect atts into JS object */
+    Local<Object> attr = Object::New();
+    for(const XML_Char **atts1 = atts; *atts1; atts1 += 2)
+      attr->Set(String::New(atts1[0]), String::New(atts1[1]));
+
+    /* Trigger event */
+    Handle<Value> argv[2] = { String::New(name), attr };
+    parser->Emit(sym_startElement, 2, argv);
+  }
+
+  static void EndElement(void *userData,
+                         const XML_Char *name)
+  {
+    Parser *parser = reinterpret_cast<Parser *>(userData);
+
+    /* Trigger event */
+    Handle<Value> argv[1] = { String::New(name) };
+    parser->Emit(sym_endElement, 1, argv);
+  }
+  
+  static void StartCdata(void *userData)
+  {
+    Parser *parser = reinterpret_cast<Parser *>(userData);
+
+    /* Trigger event */
+    Handle<Value> argv[0] = {};
+    parser->Emit(sym_startCdata, 0, argv);
+  }
+
+  static void EndCdata(void *userData)
+  {
+    Parser *parser = reinterpret_cast<Parser *>(userData);
+
+    /* Trigger event */
+    Handle<Value> argv[0] = {};
+    parser->Emit(sym_endCdata, 0, argv);
+  }
+
+  static void Text(void *userData,
+                   const XML_Char *s, int len)
+  {
+    Parser *parser = reinterpret_cast<Parser *>(userData);
+
+    /* Trigger event */
+    Handle<Value> argv[1] = { String::New(s, len) };
+    parser->Emit(sym_text, 1, argv);
+  }
+
+  static void ProcessingInstruction(void *userData,
+                                    const XML_Char *target, const XML_Char *data)
+  {
+    Parser *parser = reinterpret_cast<Parser *>(userData);
+
+    /* Trigger event */
+    Handle<Value> argv[2] = { String::New(target), String::New(data) };
+    parser->Emit(sym_processingInstruction, 2, argv);
+  }
+
+  static void Comment(void *userData,
+                      const XML_Char *data)
+  {
+    Parser *parser = reinterpret_cast<Parser *>(userData);
+
+    /* Trigger event */
+    Handle<Value> argv[1] = { String::New(data) };
+    parser->Emit(sym_comment, 1, argv);
+  }
+
+  static void XmlDecl(void *userData,
+                      const XML_Char *version, const XML_Char *encoding,
+                      int standalone)
+  {
+    Parser *parser = reinterpret_cast<Parser *>(userData);
+
+    /* Trigger event */
+    Handle<Value> argv[3] = { version ? String::New(version) : Null(),
+                              encoding ? String::New(encoding) : Null(),
+                              Boolean::New(standalone) };
+    parser->Emit(sym_xmlDecl, 3, argv);
+  }
+};
+
+
+
+extern "C" void init(Handle<Object> target)
+{
+  HandleScope scope;
+  Parser::Initialize(target);
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..a47a9c8
--- /dev/null
+++ b/package.json
@@ -0,0 +1,18 @@
+{ "name": "node-expat"
+,"version": "1.2.0"
+,"main": "./build/default/node-expat"
+,"description": "NodeJS binding for fast XML parsing."
+,"scripts" : { "install" : "./install.sh" }
+,"dependencies": []
+,"repositories": [{ "type": "git"
+		   ,"path": "git://github.com/astro/node-expat.git"
+		  }]
+,"homepage": "http://github.com/astro/node-expat"
+,"bugs": "http://github.com/astro/node-expat/issues"
+,"maintainers": [{ "name": "Astro"
+		  ,"email": "astro at spaceboyz.net"
+		  ,"web": "http://spaceboyz.net/~astro/"
+		 }]
+,"licenses": [{ "type": "MIT" }]
+,"engine": "node"
+}
diff --git a/test.js b/test.js
new file mode 100644
index 0000000..4367f7e
--- /dev/null
+++ b/test.js
@@ -0,0 +1,133 @@
+var sys = require('sys');
+var expat = require('./build/default/node-expat');
+var Buffer = require('buffer').Buffer;
+
+function collapseTexts(evs) {
+    var r = [];
+    var t = "";
+    evs.forEach(function(ev) {
+	if (ev[0] == 'text')
+	    t += ev[1];
+	else {
+	    if (t != "")
+		r.push(['text', t]);
+	    t = "";
+	    r.push(ev);
+	}
+    });
+    if (t != "")
+	r.push(['text', t]);
+    return r;
+}
+
+var tests = 0, iterations = 0, fails = 0;
+function expect(s, evs_expected) {
+    tests++;
+    for(var step = s.length; step > 0; step--) {
+	iterations++;
+	var evs_received = [];
+	var p = new expat.Parser("UTF-8");
+	//p.setEncoding("UTF-8");
+	p.addListener('startElement', function(name, attrs) {
+	    evs_received.push(['startElement', name, attrs]);
+	});
+	p.addListener('endElement', function(name) {
+	    evs_received.push(['endElement', name]);
+	});
+	p.addListener('text', function(s) {
+	    evs_received.push(['text', s]);
+	});
+	p.addListener('processingInstruction', function(target, data) {
+	    evs_received.push(['processingInstruction', target, data]);
+	});
+	p.addListener('comment', function(s) {
+	    evs_received.push(['comment', s]);
+	});
+	p.addListener('xmlDecl', function(version, encoding, standalone) {
+	    evs_received.push(['xmlDecl', version, encoding, standalone]);
+	});
+	p.addListener('startCdata', function() {
+	    evs_received.push(['startCdata']);
+	});
+	p.addListener('endCdata', function() {
+	    evs_received.push(['endCdata']);
+	});
+	for(var l = 0; l < s.length; l += step)
+	{
+	    var end = l + step;
+	    if (end > s.length)
+		end = s.length;
+
+	    if (!p.parse(s.slice(l, end), false))
+		evs_received.push(['error']);
+	}
+
+	var expected = JSON.stringify(evs_expected);
+	var received = JSON.stringify(collapseTexts(evs_received));
+	if (expected != received) {
+	    fails++;
+	    sys.puts("Fail for: " + s + " (step=" + step + ")");
+	    sys.puts("Expected: " + expected);
+	    sys.puts("Received: " + received);
+	    return;  // don't try with smaller step size
+	}
+    }
+}
+
+expect("<r/>",
+       [['startElement', 'r', {}],
+	['endElement', 'r']]);
+expect("<r foo='bar'/>",
+       [['startElement', 'r', {foo: 'bar'}],
+	['endElement', 'r']]);
+expect("<r foo='bar' baz=\"quux\" test=\"tset\"/>",
+       [['startElement', 'r', {foo: 'bar', baz: 'quux', test: 'tset'}],
+	['endElement', 'r']]);
+expect("<r xmlns='http://localhost/' xmlns:x=\"http://example.com/\"></r>",
+       [['startElement', 'r', {xmlns: 'http://localhost/', 'xmlns:x': 'http://example.com/'}],
+	['endElement', 'r']]);
+expect("<r>foo</r>",
+       [['startElement', 'r', {}],
+	['text', "foo"],
+	['endElement', 'r']]);
+expect("<r>foo\nbar</r>",
+       [['startElement', 'r', {}],
+	['text', "foo\nbar"],
+	['endElement', 'r']]);
+expect("<r><![CDATA[<greeting>Hello, world!</greeting>]]></r>",
+       [['startElement', 'r', {}],
+	['startCdata'],
+	['text', "<greeting>Hello, world!</greeting>"],
+	['endCdata'],
+	['endElement', 'r']]);
+expect("<r>foo&bar</r>",
+       [['startElement', 'r', {}],
+	['text', "foo&bar"],
+	['endElement', 'r']]);
+expect("<r>ß</r>",
+       [['startElement', 'r', {}],
+	['text', "ß"],
+	['endElement', 'r']]);
+expect("<?i like xml?>",
+       [['processingInstruction', 'i', 'like xml']]);
+expect("<?dragons?>",
+       [['processingInstruction', 'dragons', '']]);
+expect("<!-- no comment -->",
+       [['comment', ' no comment ']]);
+expect("<&", [['error']]);
+expect("<?xml version='1.0' encoding='UTF-8'?>",
+       [['xmlDecl', '1.0', 'UTF-8', true]]);
+expect("<?xml version='1.0'?>",
+       [['xmlDecl', '1.0', null, true]]);
+expect(new Buffer('<foo>bar</foo>'),
+       [['startElement', 'foo', {}],
+	['text', 'bar'],
+	['endElement', 'foo']]);
+expect(new Buffer('<foo><![CDATA[bar]]></foo>'),
+       [['startElement', 'foo', {}],
+	['startCdata'],
+	['text', 'bar'],
+	['endCdata'],
+	['endElement', 'foo']]);
+
+sys.puts("Ran "+tests+" tests with "+iterations+" iterations: "+fails+" failures.");
diff --git a/wscript b/wscript
new file mode 100644
index 0000000..ff67c1e
--- /dev/null
+++ b/wscript
@@ -0,0 +1,16 @@
+srcdir = '.'
+blddir = 'build'
+VERSION = '1.1.0'
+
+def set_options(opt):
+  opt.tool_options('compiler_cxx')
+
+def configure(conf):
+  conf.check_tool('compiler_cxx')
+  conf.check_tool('node_addon')
+
+def build(bld):
+  obj = bld.new_task_gen('cxx', 'shlib', 'node_addon')
+  obj.target = 'node-expat'
+  obj.source = 'node-expat.cc'
+  obj.lib = 'expat'

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-expat.git



More information about the Pkg-javascript-commits mailing list