r2345 - in branches: . nekohtml nekohtml/upstream nekohtml/upstream/0.9.5 nekohtml/upstream/0.9.5/data nekohtml/upstream/0.9.5/data/html nekohtml/upstream/0.9.5/data/html/canonical nekohtml/upstream/0.9.5/doc nekohtml/upstream/0.9.5/doc/html nekohtml/upstream/0.9.5/src nekohtml/upstream/0.9.5/src/html nekohtml/upstream/0.9.5/src/html/META-INF nekohtml/upstream/0.9.5/src/html/META-INF/services nekohtml/upstream/0.9.5/src/html/org nekohtml/upstream/0.9.5/src/html/org/cyberneko nekohtml/upstream/0.9.5/src/html/org/cyberneko/html nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res nekohtml/upstream/0.9.5/src/html/sample nekohtml/upstream/0.9.5/src/html/test

Marcus Better marcusb-guest at costa.debian.org
Tue Aug 22 16:00:32 UTC 2006


Author: marcusb-guest
Date: 2006-08-22 16:00:27 +0000 (Tue, 22 Aug 2006)
New Revision: 2345

Added:
   branches/nekohtml/
   branches/nekohtml/upstream/
   branches/nekohtml/upstream/0.9.5/
   branches/nekohtml/upstream/0.9.5/LICENSE
   branches/nekohtml/upstream/0.9.5/LICENSE_apache
   branches/nekohtml/upstream/0.9.5/README_html
   branches/nekohtml/upstream/0.9.5/TODO_html
   branches/nekohtml/upstream/0.9.5/build-html-test.xml
   branches/nekohtml/upstream/0.9.5/build-html.xml
   branches/nekohtml/upstream/0.9.5/build.bat
   branches/nekohtml/upstream/0.9.5/data/
   branches/nekohtml/upstream/0.9.5/data/html/
   branches/nekohtml/upstream/0.9.5/data/html/canonical/
   branches/nekohtml/upstream/0.9.5/data/html/canonical/README
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test00.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test01.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test02.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test03.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test04.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test05.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test06.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test07.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test08.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test09.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test10.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test100.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test101.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test102.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test103.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test11.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test12.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test13.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test14.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test15.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test16.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test17.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test18.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test19.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test20.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test21.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test22.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test23.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test24.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test25.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test26.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test27.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test28.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test29.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test30.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test31.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test32.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test33.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test34.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test35.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test36.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test37.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test38.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test39.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test40.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test41.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test42.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test43.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test44.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test45.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test46.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test47.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test48.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test49.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test50.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test51.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test52.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test53.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test54.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test55.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test56.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test57.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test58.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test59.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test60.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test61.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test62.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test63.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test64.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test65.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test66.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test67.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test68.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test69.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test70.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test71.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test72.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test73.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test74.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test75.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test76.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test77.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test78.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test79.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test80.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test81.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test82.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test83.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test84.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test85.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test86.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test87.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test88.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test89.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test90.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test91.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test92.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test93.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test94.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test95.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test96.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test97.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test98.html
   branches/nekohtml/upstream/0.9.5/data/html/canonical/test99.html
   branches/nekohtml/upstream/0.9.5/data/html/test00.html
   branches/nekohtml/upstream/0.9.5/data/html/test01.html
   branches/nekohtml/upstream/0.9.5/data/html/test02.html
   branches/nekohtml/upstream/0.9.5/data/html/test03.html
   branches/nekohtml/upstream/0.9.5/data/html/test04.html
   branches/nekohtml/upstream/0.9.5/data/html/test05.html
   branches/nekohtml/upstream/0.9.5/data/html/test06.html
   branches/nekohtml/upstream/0.9.5/data/html/test07.html
   branches/nekohtml/upstream/0.9.5/data/html/test08.html
   branches/nekohtml/upstream/0.9.5/data/html/test09.html
   branches/nekohtml/upstream/0.9.5/data/html/test10.html
   branches/nekohtml/upstream/0.9.5/data/html/test100.html
   branches/nekohtml/upstream/0.9.5/data/html/test101.html
   branches/nekohtml/upstream/0.9.5/data/html/test102.html
   branches/nekohtml/upstream/0.9.5/data/html/test102.html.settings
   branches/nekohtml/upstream/0.9.5/data/html/test103.html
   branches/nekohtml/upstream/0.9.5/data/html/test11.html
   branches/nekohtml/upstream/0.9.5/data/html/test12.html
   branches/nekohtml/upstream/0.9.5/data/html/test13.html
   branches/nekohtml/upstream/0.9.5/data/html/test14.html
   branches/nekohtml/upstream/0.9.5/data/html/test15.html
   branches/nekohtml/upstream/0.9.5/data/html/test16.html
   branches/nekohtml/upstream/0.9.5/data/html/test17.html
   branches/nekohtml/upstream/0.9.5/data/html/test18.html
   branches/nekohtml/upstream/0.9.5/data/html/test19.html
   branches/nekohtml/upstream/0.9.5/data/html/test20.html
   branches/nekohtml/upstream/0.9.5/data/html/test21.html
   branches/nekohtml/upstream/0.9.5/data/html/test22.html
   branches/nekohtml/upstream/0.9.5/data/html/test23.html
   branches/nekohtml/upstream/0.9.5/data/html/test24.html
   branches/nekohtml/upstream/0.9.5/data/html/test25.html
   branches/nekohtml/upstream/0.9.5/data/html/test26.html
   branches/nekohtml/upstream/0.9.5/data/html/test27.html
   branches/nekohtml/upstream/0.9.5/data/html/test28.html
   branches/nekohtml/upstream/0.9.5/data/html/test29.html
   branches/nekohtml/upstream/0.9.5/data/html/test30.html
   branches/nekohtml/upstream/0.9.5/data/html/test31.html
   branches/nekohtml/upstream/0.9.5/data/html/test32.html
   branches/nekohtml/upstream/0.9.5/data/html/test33.html
   branches/nekohtml/upstream/0.9.5/data/html/test34.html
   branches/nekohtml/upstream/0.9.5/data/html/test35.html
   branches/nekohtml/upstream/0.9.5/data/html/test36.html
   branches/nekohtml/upstream/0.9.5/data/html/test37.html
   branches/nekohtml/upstream/0.9.5/data/html/test38.html
   branches/nekohtml/upstream/0.9.5/data/html/test39.html
   branches/nekohtml/upstream/0.9.5/data/html/test40.html
   branches/nekohtml/upstream/0.9.5/data/html/test41.html
   branches/nekohtml/upstream/0.9.5/data/html/test42.html
   branches/nekohtml/upstream/0.9.5/data/html/test43.html
   branches/nekohtml/upstream/0.9.5/data/html/test44.html
   branches/nekohtml/upstream/0.9.5/data/html/test45.html
   branches/nekohtml/upstream/0.9.5/data/html/test46.html
   branches/nekohtml/upstream/0.9.5/data/html/test47.html
   branches/nekohtml/upstream/0.9.5/data/html/test48.html
   branches/nekohtml/upstream/0.9.5/data/html/test49.html
   branches/nekohtml/upstream/0.9.5/data/html/test50.html
   branches/nekohtml/upstream/0.9.5/data/html/test51.html
   branches/nekohtml/upstream/0.9.5/data/html/test52.html
   branches/nekohtml/upstream/0.9.5/data/html/test53.html
   branches/nekohtml/upstream/0.9.5/data/html/test54.html
   branches/nekohtml/upstream/0.9.5/data/html/test55.html
   branches/nekohtml/upstream/0.9.5/data/html/test56.html
   branches/nekohtml/upstream/0.9.5/data/html/test57.html
   branches/nekohtml/upstream/0.9.5/data/html/test58.html
   branches/nekohtml/upstream/0.9.5/data/html/test59.html
   branches/nekohtml/upstream/0.9.5/data/html/test60.html
   branches/nekohtml/upstream/0.9.5/data/html/test61.html
   branches/nekohtml/upstream/0.9.5/data/html/test62.html
   branches/nekohtml/upstream/0.9.5/data/html/test63.html
   branches/nekohtml/upstream/0.9.5/data/html/test64.html
   branches/nekohtml/upstream/0.9.5/data/html/test65.html
   branches/nekohtml/upstream/0.9.5/data/html/test66.html
   branches/nekohtml/upstream/0.9.5/data/html/test67.html
   branches/nekohtml/upstream/0.9.5/data/html/test68.html
   branches/nekohtml/upstream/0.9.5/data/html/test69.html
   branches/nekohtml/upstream/0.9.5/data/html/test70.html
   branches/nekohtml/upstream/0.9.5/data/html/test71.html
   branches/nekohtml/upstream/0.9.5/data/html/test72.html
   branches/nekohtml/upstream/0.9.5/data/html/test73.html
   branches/nekohtml/upstream/0.9.5/data/html/test74.html
   branches/nekohtml/upstream/0.9.5/data/html/test75.html
   branches/nekohtml/upstream/0.9.5/data/html/test76.html
   branches/nekohtml/upstream/0.9.5/data/html/test77.html
   branches/nekohtml/upstream/0.9.5/data/html/test78.html
   branches/nekohtml/upstream/0.9.5/data/html/test79.html
   branches/nekohtml/upstream/0.9.5/data/html/test80.html
   branches/nekohtml/upstream/0.9.5/data/html/test81.html
   branches/nekohtml/upstream/0.9.5/data/html/test82.html
   branches/nekohtml/upstream/0.9.5/data/html/test83.html
   branches/nekohtml/upstream/0.9.5/data/html/test84.html
   branches/nekohtml/upstream/0.9.5/data/html/test85.html
   branches/nekohtml/upstream/0.9.5/data/html/test86.html
   branches/nekohtml/upstream/0.9.5/data/html/test87.html
   branches/nekohtml/upstream/0.9.5/data/html/test88.html
   branches/nekohtml/upstream/0.9.5/data/html/test89.html
   branches/nekohtml/upstream/0.9.5/data/html/test90.html
   branches/nekohtml/upstream/0.9.5/data/html/test91.html
   branches/nekohtml/upstream/0.9.5/data/html/test92.html
   branches/nekohtml/upstream/0.9.5/data/html/test93.html
   branches/nekohtml/upstream/0.9.5/data/html/test94.html
   branches/nekohtml/upstream/0.9.5/data/html/test95.html
   branches/nekohtml/upstream/0.9.5/data/html/test95.html.settings
   branches/nekohtml/upstream/0.9.5/data/html/test96.html
   branches/nekohtml/upstream/0.9.5/data/html/test97.html
   branches/nekohtml/upstream/0.9.5/data/html/test97.html.settings
   branches/nekohtml/upstream/0.9.5/data/html/test98.html
   branches/nekohtml/upstream/0.9.5/data/html/test98.html.settings
   branches/nekohtml/upstream/0.9.5/data/html/test99.html
   branches/nekohtml/upstream/0.9.5/doc/
   branches/nekohtml/upstream/0.9.5/doc/html/
   branches/nekohtml/upstream/0.9.5/doc/html/.htaccess
   branches/nekohtml/upstream/0.9.5/doc/html/changes.html
   branches/nekohtml/upstream/0.9.5/doc/html/faq.html
   branches/nekohtml/upstream/0.9.5/doc/html/filters.html
   branches/nekohtml/upstream/0.9.5/doc/html/index.html
   branches/nekohtml/upstream/0.9.5/doc/html/settings.html
   branches/nekohtml/upstream/0.9.5/doc/html/software.html
   branches/nekohtml/upstream/0.9.5/doc/html/usage.html
   branches/nekohtml/upstream/0.9.5/doc/style.css
   branches/nekohtml/upstream/0.9.5/src/
   branches/nekohtml/upstream/0.9.5/src/html/
   branches/nekohtml/upstream/0.9.5/src/html/META-INF/
   branches/nekohtml/upstream/0.9.5/src/html/META-INF/services/
   branches/nekohtml/upstream/0.9.5/src/html/META-INF/services/org.apache.xerces.xni.parser.XMLParserConfiguration
   branches/nekohtml/upstream/0.9.5/src/html/org/
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLAugmentations.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLComponent.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLConfiguration.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLElements.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLEntities.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLErrorReporter.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLEventInfo.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLScanner.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLTagBalancer.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/ObjectFactory.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/SecuritySupport.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/SecuritySupport12.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/DefaultFilter.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/ElementRemover.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Identity.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/NamespaceBinder.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Purifier.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Writer.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/DOMFragmentParser.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/DOMParser.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/SAXParser.java
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/ErrorMessages.properties
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLlat1.properties
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLspecial.properties
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLsymbol.properties
   branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/XMLbuiltin.properties
   branches/nekohtml/upstream/0.9.5/src/html/sample/
   branches/nekohtml/upstream/0.9.5/src/html/sample/HTMLSAXParser.java
   branches/nekohtml/upstream/0.9.5/src/html/sample/RemoveElements.java
   branches/nekohtml/upstream/0.9.5/src/html/sample/Script.java
   branches/nekohtml/upstream/0.9.5/src/html/sample/TestHTMLDOM.java
   branches/nekohtml/upstream/0.9.5/src/html/sample/TestHTMLDOMFragment.java
   branches/nekohtml/upstream/0.9.5/src/html/test/
   branches/nekohtml/upstream/0.9.5/src/html/test/Tester.java
   branches/nekohtml/upstream/0.9.5/src/html/test/UTF8BOMSkipper.java
   branches/nekohtml/upstream/0.9.5/src/html/test/Writer.java
Log:
Imported upstream sources.

Added: branches/nekohtml/upstream/0.9.5/LICENSE
===================================================================
--- branches/nekohtml/upstream/0.9.5/LICENSE	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/LICENSE	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,47 @@
+The CyberNeko Software License, Version 1.0
+
+ 
+(C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ 
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer. 
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in
+   the documentation and/or other materials provided with the
+   distribution.
+
+3. The end-user documentation included with the redistribution,
+   if any, must include the following acknowledgment:  
+     "This product includes software developed by Andy Clark."
+   Alternately, this acknowledgment may appear in the software itself,
+   if and wherever such third-party acknowledgments normally appear.
+
+4. The names "CyberNeko" and "NekoHTML" must not be used to endorse
+   or promote products derived from this software without prior 
+   written permission. For written permission, please contact 
+   andyc at cyberneko.net.
+
+5. Products derived from this software may not be called "CyberNeko",
+   nor may "CyberNeko" appear in their name, without prior written
+   permission of the author.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+====================================================================
+
+This license is based on the Apache Software License, version 1.1.
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/LICENSE_apache
===================================================================
--- branches/nekohtml/upstream/0.9.5/LICENSE_apache	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/LICENSE_apache	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,56 @@
+/*
+ * The Apache Software License, Version 1.1
+ *
+ *
+ * Copyright (c) 1999-2002 The Apache Software Foundation.  All rights 
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:  
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Xerces" and "Apache Software Foundation" must
+ *    not be used to endorse or promote products derived from this
+ *    software without prior written permission. For written 
+ *    permission, please contact apache at apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    nor may "Apache" appear in their name, without prior written
+ *    permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation and was
+ * originally based on software copyright (c) 1999, International
+ * Business Machines, Inc., http://www.ibm.com.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */

Added: branches/nekohtml/upstream/0.9.5/README_html
===================================================================
--- branches/nekohtml/upstream/0.9.5/README_html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/README_html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,28 @@
+NekoHTML Readme
+===============
+
+Building
+--------
+
+You can build NekoHTML with the supplied build.bat file by
+typing the following:
+
+  > build -f build-html.xml (target ...)
+
+The default target will build the entire package.
+
+To build NekoHTML, you need Ant and Xerces2. The first time 
+you try to build, it will tell you is required and where to 
+download the necessary packages.
+
+Documentation
+-------------
+
+The documentation for NekoHTML is located at the following URL:
+
+  doc/html/index.html
+
+Contact Information
+-------------------
+
+Andy Clark <andyc at apache.org>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/TODO_html
===================================================================
--- branches/nekohtml/upstream/0.9.5/TODO_html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/TODO_html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+TODO:
+ * scanner
+   * optimize (more)
+ * tag balancer
+   * optimize (more)
+   * incorporate ideas from JTidy

Added: branches/nekohtml/upstream/0.9.5/build-html-test.xml
===================================================================
--- branches/nekohtml/upstream/0.9.5/build-html-test.xml	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/build-html-test.xml	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+<project default='test'>
+ <taskdef name='tester' classname='test.Tester'/>
+ <target name='test'>
+  <tester canondir='data/html/canonical' outputdir='data/html/output'>
+   <fileset dir='data/html' includes='test*.html'/>
+  </tester>
+ </target>
+</project>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/build-html.xml
===================================================================
--- branches/nekohtml/upstream/0.9.5/build-html.xml	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/build-html.xml	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,186 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!-- $Id: build-html.xml,v 1.22 2005/05/27 04:03:31 andyc Exp $ -->
+<project default='all' basedir='.'>
+
+ <!-- PROPERTIES -->
+ <property name='version'   value='0.9.5'/>
+ <property name='name'      value='nekohtml'/>
+ <property name='fullname'  value='${name}-${version}'/>
+ <property name='Title'     value='NekoHTML'/>
+ <property name='FullTitle' value='CyberNeko HTML Parser'/>
+ <property name='Name'      value='${Title} ${version}'/>
+ <property name='author'    value='Andy Clark'/>
+ <property name='copyright' value='(C) Copyright 2002-2005, ${author}. All rights reserved.'/>
+ <property name='URL'       value='http://www.apache.org/~andyc/neko/doc/html/index.html'/>
+  
+ <property name='jarfile'        value='${name}.jar'/>
+ <property name='jarfileXni'     value='${name}Xni.jar'/>
+ <property name='jarfileSamples' value='${name}Samples.jar'/>
+ <property name='jarfileTest'    value='${name}Test.jar'/>
+
+ <property name='zipfile' value='${fullname}.zip'/>
+ <property name='tarfile' value='${fullname}.tar'/>
+ <property name='tgzfile' value='${fullname}.tar.gz'/>
+
+ <property name='contents.misc'   value='LICENSE,LICENSE_apache,README_html,TODO_html,build.bat,build-html.xml,build-html-test.xml'/>
+ <property name='contents.jars'   value='${jarfile},${jarfileXni},${jarfileSamples}'/>
+ <property name='contents.source' value='src/html/META-INF/**,src/html/**/*.java,src/html/**/*.properties'/>
+ <property name='contents.docs'   value='doc/style.css,doc/html/**,data/html/test*.html*,data/html/canonical/*'/>
+ <property name='contents.libs'   value='lib/xercesMinimal.jar'/>
+ <property name='contents' 
+           value='${contents.misc},${contents.jars},${contents.source},${contents.docs},${contents.libs}'/>
+
+ <property name='package' value='org.cyberneko.html'/>
+
+ <property name='version.dir'      value='bin/html/src'/>
+ <property name='version.manifest' value='${version.dir}/MANIFEST_html'/>
+ <property name='version.dir.java' value='${version.dir}/org/cyberneko/html'/>
+ <property name='version.java'     value='${version.dir.java}/Version.java'/>
+ 
+ <!-- TARGETS --> 
+ <target name='universe' depends='full,all'/>
+
+ <target name='all' depends='zip,tgz'/>
+
+ <target name='full'>
+  <property name='contents.full' value='lib/**'/>
+ </target>
+
+ <target name='compile' depends='version'>
+  <mkdir dir="bin/html"/>
+  <javac srcdir='src/html' destdir='bin/html' includes='org/**,sample/**' debug='true'/>
+  <javac srcdir='bin/html/src' destdir='bin/html' includes='org/**'/>
+ </target>
+
+ <target name='jar' depends='compile'>
+  <copy todir='bin/html'>
+   <fileset dir='.' includes='LICENSE'/>
+   <fileset dir='src/html' includes='**/*.properties'/>
+  </copy>
+  <jar jarfile='${jarfile}' basedir='bin/html' 
+       manifest='${version.manifest}'
+       includes='LICENSE,org/**/*.class,org/**/*.properties'/>
+  <jar jarfile='${jarfileSamples}' basedir='bin/html'
+       includes='LICENSE,sample/**'/>
+ </target> 
+
+ <target name='jar-xni'>
+  <mkdir dir="bin/html"/>
+  <copy todir='bin/html'>
+   <fileset dir='.' includes='LICENSE'/>
+   <fileset dir='src/html' includes='META-INF/services/**'/>
+  </copy>
+  <jar jarfile='${jarfileXni}' basedir='bin/html'
+       includes='LICENSE,META-INF/services/**'/>
+ </target>
+
+ <target name='package' depends='jar,doc'>
+  <mkdir dir='bin/package/${fullname}'/>
+  <copy todir='bin/package/${fullname}'>
+   <fileset dir='.' includes='${contents},${contents.full}'/>
+   <fileset dir='bin' includes='${contents.jars}'/>
+  </copy>
+ </target>
+
+ <target name='package-nodir'>
+  <mkdir dir='bin/package-${name}'/>
+  <copy todir='bin/package-${name}'>
+   <fileset dir='bin/package/${fullname}' includes='**'/>
+  </copy>
+ </target>
+
+ <target name='zip' depends='package'>
+  <zip zipfile='${zipfile}' basedir='bin/package' includes='${fullname}/**'/>
+ </target>
+
+ <target name='tgz' depends='package'>
+  <tar tarfile='${tarfile}' basedir='bin/package' includes='${fullname}/**'/>
+  <gzip zipfile='${tgzfile}' src='${tarfile}'/>
+  <delete file='${tarfile}'/>
+ </target>
+
+ <target name='doc' unless='docs.done'>
+  <delete dir='doc/html/javadoc'/>
+  <mkdir dir='doc/html/javadoc'/>
+  <javadoc packagenames='${package},${package}.parsers,${package}.filters'
+           sourcepath='src/html' destdir='doc/html/javadoc'
+           author='true' version='true' use='true'
+           windowtitle="${Name} Implementation"
+           doctitle="${Name}"
+           bottom="${copyright}" 
+           />
+  <property name='docs.done' value='true'/>
+ </target>
+
+ <target name='version-init'>
+  <mkdir dir='${version.dir.java}/'/>
+  <dependset>
+   <srcfilelist dir='.' files='build-html.xml'/>
+   <targetfilelist dir='.' files='${version.manifest},${version.java}'/>
+  </dependset>
+  <available property='available.version' file='${version.java}'/>
+ </target>
+
+ <target name='version' depends='version-init' unless='available.version'>
+  <echo message='Generating ${version.java}'/>
+  <echo file='${version.java}'>/* ${copyright} */
+
+package org.cyberneko.html;
+
+/**
+ * This class holds version information for the ${FullTitle}.
+ *
+ * @author ${author}
+ */
+public class Version {
+
+    /** Returns the version string. */
+    public static String getVersion() { return "${Name}"; }
+
+    /** Prints the version string to standard output. */
+    public static void main(String[] argv) {
+        System.out.println(getVersion());
+    } // main(String[])
+
+} // class Version</echo>
+  <echo message='Generating ${version.manifest}'/>
+  <echo file='${version.manifest}'>
+Name: org/cyberneko/html/
+Implementation-Title: ${FullTitle}
+Implementation-Version: ${version}
+Implementation-Vendor: ${author}
+Implementation-URL: ${URL}
+Specification-Title: Hyper-Text Markup Language (HTML)
+Specification-Vendor: World Wide Web Consortium (W3C)
+Specification-Version: 4.01
+
+</echo>
+ </target>
+ 
+ <target name='test' depends='jar'>
+  <javac srcdir='src/html' destdir='bin/html' includes='test/**'/>
+  <jar jarfile='${jarfileTest}' basedir='bin/html' 
+       includes='test/**/*.class,test/**/*.properties'/>
+  <mkdir dir='data/html/output'/>
+  <java classname='org.apache.tools.ant.Main' fork='true' failonerror='true'>
+   <classpath>
+    <pathelement path='${java.class.path}'/>
+    <pathelement location='${jarfile}'/>
+    <pathelement location='${jarfileTest}'/>
+   </classpath>
+   <arg value='-f'/>
+   <arg value='build-html-test.xml'/>
+  </java>
+ </target>
+
+ <target name='clean'>
+  <delete dir='bin/html' quiet='true'/>
+  <delete dir='doc/html/javadoc' quiet='true'/>
+  <delete quiet='true'>
+   <fileset dir='.' includes='${name}*.jar,${name}*.zip,${name}*.tar.gz'/>
+  </delete>
+  <delete dir='bin/package' quiet='true'/>
+  <delete dir='bin/package-${name}' quiet='true'/>
+ </target>
+
+</project>

Added: branches/nekohtml/upstream/0.9.5/build.bat
===================================================================
--- branches/nekohtml/upstream/0.9.5/build.bat	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/build.bat	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,40 @@
+ at echo off
+if "%JAVA_HOME%" == "" goto error
+if not exist lib\xalan.jar goto requirements
+set LOCALCLASSPATH=%JAVA_HOME%\lib\tools.jar;%JAVA_HOME%\jre\lib\rt.jar;lib\xml-apis.jar;lib\xalan.jar;lib\xercesImpl.jar;lib\ant.jar;lib\ant-launcher.jar;lib\jing.jar;lib\junit.jar
+"%JAVA_HOME%\bin\java" -classpath "%LOCALCLASSPATH%" org.apache.tools.ant.Main %1 %2 %3 %4 %5
+goto end
+:error
+echo error: JAVA_HOME not found in your environment.
+goto end
+:requirements
+echo error: Missing required jar files.
+echo.
+echo The Ant tool is required. Download Ant from the following URL
+echo http://jakarta.apache.org/ant/index.html and place the ant.jar
+echo file in the lib/ directory.
+echo.
+echo Please download Xalan2 from http://xml.apache.org/dist/xalan-j/
+echo and place the following files in the lib/ directory:
+echo.
+echo    xml-apis.jar
+echo    xalan.jar
+echo    xercesImpl.jar
+echo.
+echo Please download Xerces2 from http://xml.apache.org/dist/xerces-j/
+echo and place the following files in the lib/ directory:
+echo.
+echo    xercesSamples.jar
+echo.
+echo If building ManekiNeko, James Clark's Jing Relax NG validator 
+echo is also required. Please download the Jar file distribution 
+echo from http://www.thaiopensource.com/relaxng/jing.html and place
+echo the following file in the lib/ directory:
+echo.
+echo    jing.jar
+echo.
+if not exist lib md lib
+goto end
+:end
+set LOCALCLASSPATH=
+ at echo on

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/README
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/README	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/README	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,14 @@
+The "canonical" output of the files in this directory are
+a modified NSGMLS format, as described below. Each piece
+of information is conveyed on a separate line, encoded in
+UTF-8.
+
+  startElement ::= '(' name
+  attribute ::= 'A' name ' ' value
+  endElement ::= ')' name
+  characters ::= '"' text
+  comment ::= '#' text
+
+  text ::= Unicode chars, with tab, carriage return, and
+           newline escaped as \t, \r, and \n, respectively.
+  
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test00.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test00.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test00.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test01.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test01.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test01.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"Just text.
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test02.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test02.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test02.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+(HTML
+(BODY
+(H1
+"Header
+)H1
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test03.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test03.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test03.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+(HTML
+(BODY
+(P
+"Paragraph text.
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test04.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test04.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test04.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,10 @@
+(HTML
+(BODY
+(P
+"Paragraph text.\n
+)P
+(P
+"Additional text.
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test05.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test05.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test05.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(HEAD
+(SCRIPT
+Atype text/javascript
+"\n This is a <b>test</b>. Don't go crazy! </i>\n
+)SCRIPT
+)HEAD
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test06.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test06.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test06.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,17 @@
+(HTML
+(BODY
+"This 
+(I
+"is 
+(B
+Aclass test
+"unbalanced
+)B
+)I
+(B
+Aclass test
+" content
+)B
+", dude!
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test07.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test07.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test07.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,15 @@
+(HTML
+(BODY
+(TABLE
+(TR
+(TD
+Anowrap 
+)TD
+(TD
+Aalign middle
+Anowrap 
+)TD
+)TR
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test08.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test08.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test08.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+(HTML
+(HEAD
+(FOOBAR
+"Text
+)FOOBAR
+)HEAD
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test09.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test09.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test09.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,14 @@
+(HTML
+(HEAD
+(META
+Acontent text/html; charset=utf-8
+Ahttp-equiv content-type
+)META
+"\n
+)HEAD
+(BODY
+(H1
+"アニメ
+)H1
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test10.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test10.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test10.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,14 @@
+(HTML
+(HEAD
+(META
+Acontent text/html; charset=utf-16
+Ahttp-equiv content-type
+)META
+"\n
+)HEAD
+(BODY
+(H1
+"アニメ(LE)
+)H1
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test100.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test100.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test100.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,14 @@
+(HTML
+(BODY
+(P
+"\n
+(A
+Ahref link.htm
+(H3
+"Header
+)H3
+)A
+"\n
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test101.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test101.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test101.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,4 @@
+?base http://foo.bar
+?tags :noads:
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test102.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test102.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test102.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,12 @@
+(HTML
+(HEAD
+(META
+Acontent text/html;charset=iso-8859-1
+Ahttp-equiv content-type
+)META
+"\n
+)HEAD
+(BODY
+"—
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test103.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test103.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test103.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+(HTML
+A{http://www.w3.org/2000/xmlns/}xmlns:A NSa
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test11.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test11.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test11.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,14 @@
+(HTML
+(HEAD
+(META
+Acontent text/html; charset=utf-16
+Ahttp-equiv content-type
+)META
+"\n
+)HEAD
+(BODY
+(H1
+"アニメ(BE)
+)H1
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test12.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test12.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test12.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,37 @@
+(HTML
+(BODY
+(TABLE
+"\n 
+(TR
+"\n  
+(TD
+"\n   
+(TABLE
+"\n    
+(TR
+"\n     
+(TD
+"cell 1
+)TD
+"\n     
+(TD
+"\n   
+)TD
+)TR
+)TABLE
+"\n  
+)TD
+"\n 
+)TR
+"\n 
+(TR
+"\n  
+(TD
+"cell 2
+)TD
+"\n 
+)TR
+"\n
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test13.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test13.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test13.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,23 @@
+(HTML
+(BODY
+(UL
+"\n 
+(LI
+"One\n 
+)LI
+(LI
+"Two\n 
+(UL
+"\n  
+(LI
+"Two.One\n 
+)LI
+)UL
+"\n 
+)LI
+(LI
+"Three\n
+)LI
+)UL
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test14.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test14.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test14.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,30 @@
+(HTML
+(BODY
+(TABLE
+"\n 
+(TR
+"\n  
+(TD
+"\n   
+(OBJECT
+"\n    
+(PARAM
+)PARAM
+"\n    
+(EMBED
+"\n    
+)EMBED
+"\n    
+(NOEMBED
+"\n    
+)NOEMBED
+"\n   
+)OBJECT
+"\n  
+)TD
+"\n 
+)TR
+"\n
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test15.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test15.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test15.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(BODY
+(A
+Ahref http://example.com/cgi-bin/redirect?s=www.candy.com&u=Andy
+"M & Ms
+)A
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test16.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test16.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test16.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"&unknown1; & &unknown2;
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test17.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test17.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test17.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,14 @@
+(HTML
+(BODY
+(TABLE
+"\n
+(TR
+(TD
+(INPUT
+Atype text
+)INPUT
+)TD
+)TR
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test18.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test18.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test18.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(BODY
+(A
+Ahref /path/
+"blah
+)A
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test19.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test19.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test19.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+(HTML
+(BODY
+(IMG
+Asrc me.gif
+)IMG
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test20.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test20.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test20.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(BODY
+(SPAN
+Aclass note
+"Look Out!
+)SPAN
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test21.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test21.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test21.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"M & M
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test22.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test22.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test22.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"&foo;
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test23.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test23.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test23.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"<
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test24.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test24.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test24.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+(HTML
+(BODY
+(A
+)A
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test25.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test25.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test25.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test26.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test26.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test26.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test27.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test27.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test27.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test28.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test28.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test28.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"< =
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test29.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test29.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test29.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"&#foo;
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test30.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test30.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test30.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(BODY
+(A
+Ahref /cgi-bin/myscript
+"happy
+)A
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test31.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test31.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test31.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(BODY
+(A
+Ahref /broken/
+"Too Much to Ask
+)A
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test32.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test32.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test32.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,9 @@
+(HTML
+(BODY
+(IMG
+Aalt this ain't a real quote
+Asrc aint.gif
+)IMG
+"\n"this & that" â„¢\n  -- Andy <andyc at apache.org>
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test33.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test33.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test33.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(HEAD
+(SCRIPT
+Atype text/x-nekoscript
+"\n(h1\n"Header\n)h1\n
+)SCRIPT
+)HEAD
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test34.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test34.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test34.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,12 @@
+(HTML
+(BODY
+(LI
+"Item1
+(UL
+(LI
+"Item2
+)LI
+)UL
+)LI
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test35.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test35.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test35.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,31 @@
+(HTML
+(BODY
+(TABLE
+"\n  
+(TR
+"\n    
+(TH
+"foo\n    
+)TH
+(TH
+"bar\n    
+)TH
+(TH
+"baz\n  
+)TH
+)TR
+(TR
+"\n    
+(TD
+"foo\n    
+)TD
+(TD
+"bar\n    
+)TD
+(TD
+"baz\n
+)TD
+)TR
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test36.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test36.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test36.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,13 @@
+(HTML
+(BODY
+(A
+Aname foo
+)A
+(P
+(A
+Aname foo
+"Blah
+)A
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test37.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test37.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test37.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,23 @@
+(HTML
+(BODY
+(TABLE
+"\n  
+(TR
+"\n    
+(TD
+"Alpha\n
+)TD
+)TR
+)TABLE
+"\n
+(TABLE
+"\n  
+(TR
+"\n    
+(TD
+"Beta\n
+)TD
+)TR
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test38.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test38.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test38.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,27 @@
+(HTML
+"\n
+(BODY
+"\n  
+(P
+"Here we go! 
+(A
+Ahref http://bigidea.com/
+"Bob
+)A
+" 
+(BR
+)BR
+"\n  and 
+(A
+Ahref http://larryboy.com/
+" Larry 
+)A
+"\n  and friends 
+(A
+Ahref http://google.com/
+"Google
+)A
+"\n
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test39.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test39.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test39.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,15 @@
+(HTML
+(BODY
+(H1
+"Header1
+)H1
+"\n
+(SCRIPT
+"\ndocument.write('ABC');\n<!--\ndocument.write("<script>document.write('Hello, World')
+)SCRIPT
+"");\n//-->\ndocument.write('XYZ');\n\n
+(H2
+"Header2
+)H2
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test40.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test40.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test40.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,26 @@
+(HTML
+(BODY
+(TABLE
+"\n 
+(COL
+)COL
+"\n 
+(COL
+Astyle text-align: right
+)COL
+"\n 
+(TR
+"\n  
+(TD
+"This
+)TD
+"\n  
+(TD
+"That
+)TD
+"\n 
+)TR
+"\n
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test41.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test41.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test41.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,18 @@
+(HTML
+(HEAD
+(TITLE
+"foo
+)TITLE
+)HEAD
+(BODY
+"\n
+(FORM
+Aid form1
+"\n
+(ISINDEX
+Aprompt enterSomeText
+)ISINDEX
+"\n
+)FORM
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test42.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test42.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test42.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,9 @@
+(HTML
+(HEAD
+(SCRIPT
+)SCRIPT
+(TITLE
+"Title
+)TITLE
+)HEAD
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test43.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test43.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test43.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,17 @@
+(HTML
+"\n
+(BODY
+"\n
+(P
+"\n
+(BUTTON
+" a button 
+)BUTTON
+" 
+(BR
+)BR
+"\n
+)P
+"\n
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test44.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test44.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test44.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+?target
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test45.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test45.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test45.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+?target data
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test46.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test46.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test46.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+?target data\t
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test47.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test47.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test47.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+?php print "Hello, World.\\n";\n
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test48.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test48.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test48.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+(HTML
+(BODY
+(ISINDEX
+Aprompt enterSomeText
+)ISINDEX
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test49.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test49.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test49.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+!
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test50.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test50.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test50.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,13 @@
+(HTML
+(BODY
+(A
+Ahref foo
+)A
+(P
+(A
+Ahref foo
+"Blah
+)A
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test51.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test51.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test51.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,16 @@
+(HTML
+(BODY
+(UL
+"\n
+(LI
+"Item 1\n 
+(P
+"Paragraph\n
+)P
+)LI
+(LI
+"Item 2\n
+)LI
+)UL
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test52.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test52.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test52.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test53.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test53.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test53.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,19 @@
+(HTML
+"\n
+(BODY
+"\n
+(FORM
+"\n
+(SPAN
+Aid span1
+(SPAN
+Aid span2
+(SELECT
+)SELECT
+)SPAN
+)SPAN
+"\n
+)FORM
+"\n
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test54.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test54.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test54.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,10 @@
+(HTML
+(BODY
+(P
+(FORM
+(P
+)P
+)FORM
+)P
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test55.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test55.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test55.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(BODY
+(FONT
+(SELECT
+)SELECT
+)FONT
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test56.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test56.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test56.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,12 @@
+(HTML
+(BODY
+(FORM
+(CENTER
+(SELECT
+(OPTION
+)OPTION
+)SELECT
+)CENTER
+)FORM
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test57.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test57.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test57.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"Outside content
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test58.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test58.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test58.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+(HTML
+(BODY
+(TEXTAREA
+"&
+)TEXTAREA
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test59.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test59.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test59.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+(HTML
+(HEAD
+(SCRIPT
+"&
+)SCRIPT
+)HEAD
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test60.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test60.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test60.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,10 @@
+(HTML
+(BODY
+(A
+Ahref foo
+(FONT
+"text
+)FONT
+)A
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test61.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test61.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test61.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,23 @@
+(HTML
+"\n
+(HEAD
+(META
+Acontent no-cache
+Ahttp-equiv Pragma
+)META
+"\n\n
+(TITLE
+"Title
+)TITLE
+"\n
+(META
+Acontent text/html; charset=iso-8859-1
+Ahttp-equiv Content-Type
+)META
+"\n
+)HEAD
+"\n
+(BODY
+"\n\n
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test62.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test62.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test62.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,12 @@
+(HTML
+(HEAD
+"\n
+)HEAD
+(BODY
+(SPAN
+Aid cc
+Astyle behavior:url(#default#clientCaps)
+)SPAN
+"\n
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test63.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test63.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test63.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,16 @@
+(HTML
+(BODY
+(TABLE
+(TR
+(TD
+(TABLE
+(TR
+(TD
+)TD
+)TR
+)TABLE
+)TD
+)TR
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test64.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test64.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test64.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+Abgcolor white
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test65.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test65.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test65.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+!
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test66.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test66.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test66.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+!HTML
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test67.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test67.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test67.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,4 @@
+!HTML
+ppublic_id
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test68.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test68.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test68.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+!HTML
+ppublic_id
+ssystem_id
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test69.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test69.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test69.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+!HTML
+ppublic_id
+ssystem_id
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test70.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test70.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test70.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,4 @@
+!HTML
+ssystem_id
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test71.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test71.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test71.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+!ROOT
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test72.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test72.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test72.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+(HTML
+(BODY
+(A
+Ahref a&b
+)A
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test73.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test73.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test73.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,11 @@
+(HTML
+(HEAD
+(SCRIPT
+)SCRIPT
+)HEAD
+(BODY
+(H1
+"Title
+)H1
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test74.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test74.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test74.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,15 @@
+!HTML
+p-//W3C//DTD HTML 4.01 Transitional//EN
+shttp://www.w3.o$ 
+(HTML
+(HEAD
+(TITLE
+"Title
+)TITLE
+)HEAD
+(BODY
+(H1
+"Header1
+)H1
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test75.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test75.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test75.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,9 @@
+(HTML
+(BODY
+(P
+?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" 
+(IMG
+)IMG
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test76.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test76.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test76.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,17 @@
+(HTML
+(BODY
+(P
+"outer paragraph\n
+)P
+(P
+"inner paragraph
+(BR
+)BR
+"second line in inner paragraph\n
+)P
+"second line in outer paragrapth\n
+(P
+)P
+"outside paragraph tags
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test77.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test77.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test77.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,4 @@
+(HTML
+(FRAMESET
+)FRAMESET
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test78.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test78.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test78.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+#\na\nb\nc\n
+(HTML
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test79.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test79.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test79.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+(HTML
+Aa123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test80.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test80.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test80.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,12 @@
+(HTML
+(BODY
+(P
+"P1
+#[CDATA[<h1>Header</h1>]]
+"\n
+)P
+(P
+"P2
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test81.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test81.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test81.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,12 @@
+#one\ntwo\n\nthree\n\n\n
+?target one\ntwo\n\nthree\n\n\n
+(HTML
+Aattr one two  three   
+"\n
+(BODY
+"\n[one\ntwo\n\nthree\n\n\n]\n
+(TEXTAREA
+"one\ntwo\n\nthree\n\n\n
+)TEXTAREA
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test82.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test82.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test82.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(H:BODY
+A{http://www.w3.org/2000/xmlns/}xmlns:H http://www.w3.org/1999/xhtml
+)H:BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test83.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test83.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test83.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,22 @@
+(HTML
+(BODY
+(TABLE
+(TR
+(TD
+(TABLE
+(TR
+(TD
+(TABLE
+(TR
+(TD
+)TD
+)TR
+)TABLE
+)TD
+)TR
+)TABLE
+)TD
+)TR
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test84.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test84.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test84.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,28 @@
+(HTML
+A{http://www.w3.org/2000/xmlns/}xmlns:A NSa
+A{http://www.w3.org/2000/xmlns/}xmlns:B NSb
+A{http://www.w3.org/2000/xmlns/}xmlns:C NSc
+A{http://www.w3.org/2000/xmlns/}xmlns:D NSd
+A{http://www.w3.org/2000/xmlns/}xmlns:E NSe
+A{http://www.w3.org/2000/xmlns/}xmlns:F NSf
+A{http://www.w3.org/2000/xmlns/}xmlns:G NSg
+A{http://www.w3.org/2000/xmlns/}xmlns:H NSh
+A{http://www.w3.org/2000/xmlns/}xmlns:I NSi
+A{http://www.w3.org/2000/xmlns/}xmlns:J NSj
+A{http://www.w3.org/2000/xmlns/}xmlns:K NSk
+A{http://www.w3.org/2000/xmlns/}xmlns:L NSl
+A{http://www.w3.org/2000/xmlns/}xmlns:M NSm
+A{http://www.w3.org/2000/xmlns/}xmlns:N NSn
+A{http://www.w3.org/2000/xmlns/}xmlns:O NSo
+A{http://www.w3.org/2000/xmlns/}xmlns:P NSp
+A{http://www.w3.org/2000/xmlns/}xmlns:Q NSq
+A{http://www.w3.org/2000/xmlns/}xmlns:R NSr
+A{http://www.w3.org/2000/xmlns/}xmlns:S NSs
+A{http://www.w3.org/2000/xmlns/}xmlns:T NSt
+A{http://www.w3.org/2000/xmlns/}xmlns:U NSu
+A{http://www.w3.org/2000/xmlns/}xmlns:V NSv
+A{http://www.w3.org/2000/xmlns/}xmlns:W NSw
+A{http://www.w3.org/2000/xmlns/}xmlns:X NSx
+A{http://www.w3.org/2000/xmlns/}xmlns:Y NSy
+A{http://www.w3.org/2000/xmlns/}xmlns:Z NSz
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test85.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test85.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test85.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"&amp
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test86.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test86.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test86.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"&#x
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test87.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test87.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test87.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+!HTML
+p-//W3C//DTD HTML 4.0 Transitional//EN
+(HTML
+(BODY
+"Hello
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test88.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test88.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test88.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+(HTML
+(BODY
+"Hello
+(P
+"World
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test89.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test89.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test89.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+(HTML
+(BODY
+"&
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test90.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test90.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test90.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+xversion 1.0
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test91.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test91.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test91.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+(HTML
+(BODY
+(P
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test92.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test92.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test92.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,11 @@
+(HTML
+(HEAD
+(SCRIPT
+"document.write("</SCRIPT\\>");
+)SCRIPT
+)HEAD
+(BODY
+(P
+)P
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test93.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test93.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test93.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,12 @@
+(HTML
+(BODY
+(TABLE
+(FORM
+(TR
+(TD
+)TD
+)TR
+)FORM
+)TABLE
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test94.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test94.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test94.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,13 @@
+(HTML
+(HEAD
+(SCRIPT
+"\n<!--\nhtml script content\n//-->\n
+)SCRIPT
+"\n
+)HEAD
+(BODY
+(H1
+"Foo
+)H1
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test95.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test95.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test95.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,13 @@
+(HTML
+(HEAD
+(SCRIPT
+"\nhtml script content\n//\n
+)SCRIPT
+"\n
+)HEAD
+(BODY
+(H1
+"Foo
+)H1
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test96.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test96.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test96.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,13 @@
+(HTML
+(HEAD
+(SCRIPT
+"\n<![CDATA[\nxhtml script content\n]]>\n
+)SCRIPT
+"\n
+)HEAD
+(BODY
+(H1
+"Foo
+)H1
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test97.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test97.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test97.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,13 @@
+(HTML
+(HEAD
+(SCRIPT
+"\nxhtml script content\n\n
+)SCRIPT
+"\n
+)HEAD
+(BODY
+(H1
+"Foo
+)H1
+)BODY
+)HTML

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test98.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test98.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test98.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+(HTML
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/canonical/test99.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/canonical/test99.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/canonical/test99.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,16 @@
+(HTML
+A{http://www.w3.org/2000/xmlns/}xmlns:IE 
+(HEAD
+(MAINA6
+(META
+Acontent text/html; charset=ISO-8859-1
+Ahttp-equiv Content-Type
+)META
+)MAINA6
+)HEAD
+(BODY
+(H1
+"Foo
+)H1
+)BODY
+)HTML
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test00.html
===================================================================

Added: branches/nekohtml/upstream/0.9.5/data/html/test01.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test01.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test01.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+Just text.
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test02.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test02.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test02.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<h1>Header</h1>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test03.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test03.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test03.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<p>Paragraph text.
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test04.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test04.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test04.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+<p>Paragraph text.
+<p>Additional text.
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test05.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test05.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test05.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+<script type=text/javascript>
+ This is a <b>test</b>. Don't go crazy! </i>
+</script>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test06.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test06.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test06.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+This <i>is <b class=test>unbalanced</i> content</b>, dude!
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test07.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test07.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test07.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<td nowrap><td nowrap align=middle>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test08.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test08.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test08.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<foobar>Text</foobar>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test09.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test09.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test09.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+<meta http-equiv='content-type' content='text/html; charset=utf-8'>
+<h1>アニメ</h1>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test10.html
===================================================================
(Binary files differ)


Property changes on: branches/nekohtml/upstream/0.9.5/data/html/test10.html
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: branches/nekohtml/upstream/0.9.5/data/html/test100.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test100.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test100.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+<p>
+<a href="link.htm"><h3>Header</h3></a>
+</p>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test101.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test101.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test101.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+<?base http://foo.bar/>
+<?tags :noads:?>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test102.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test102.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test102.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+<meta http-equiv='content-type' content='text/html;charset=iso-8859-1'>
+&#151;
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test102.html.settings
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test102.html.settings	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test102.html.settings	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+feature	http://cyberneko.org/html/features/scanner/fix-mswindows-refs	true
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test103.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test103.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test103.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<html xmlns:a='NSa'>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test11.html
===================================================================
(Binary files differ)


Property changes on: branches/nekohtml/upstream/0.9.5/data/html/test11.html
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: branches/nekohtml/upstream/0.9.5/data/html/test12.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test12.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test12.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,14 @@
+<table>
+ <tr>
+  <td>
+   <table>
+    <tr>
+     <td>cell 1</td>
+     <td>
+   </table>
+  </td>
+ </tr>
+ <tr>
+  <td>cell 2</td>
+ </tr>
+</table>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test13.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test13.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test13.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+<ul>
+ <li>One
+ <li>Two
+ <ul>
+  <li>Two.One
+ </ul>
+ <li>Three
+</ul>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test14.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test14.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test14.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,13 @@
+<table>
+ <tr>
+  <td>
+   <object>
+    <param>
+    <embed>
+    </embed>
+    <noembed>
+    </noembed>
+   </object>
+  </td>
+ </tr>
+</table>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test15.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test15.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test15.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<a href='http://example.com/cgi-bin/redirect?s=www.candy.com&u=Andy'>M & Ms</a>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test16.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test16.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test16.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+&unknown1;&#32;&amp;&#x20;&unknown2;
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test17.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test17.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test17.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+<table>
+<tr><td><input type=text>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test18.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test18.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test18.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<a href=/path/>blah</a>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test19.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test19.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test19.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<Img Src='me.gif'>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test20.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test20.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test20.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<Span CLaSS='note'>Look Out!</spaN>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test21.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test21.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test21.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+M & M
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test22.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test22.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test22.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+&foo;
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test23.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test23.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test23.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test24.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test24.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test24.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<A =
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test25.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test25.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test25.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<A href=
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test26.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test26.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test26.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<A href='index.html
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test27.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test27.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test27.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<A href='index.html'
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test28.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test28.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test28.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+< =
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test29.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test29.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test29.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+&#foo;
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test30.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test30.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test30.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<a href=/cgi-bin/myscript>happy</a>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test31.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test31.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test31.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<a href=/broken/>Too Much to Ask</a>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test32.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test32.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test32.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+<img alt='this ain&apos;t a real quote' src='aint.gif'>
+&quot;this&#32;&amp;&#x20;that" &trade;
+  -- Andy &lt;andyc at apache.org&gt;
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test33.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test33.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test33.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+<script type='text/x-nekoscript'>
+(h1
+"Header
+)h1
+</script>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test34.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test34.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test34.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<li>Item1<ul></li><li>Item2
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test35.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test35.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test35.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,10 @@
+<table>
+  <tr>
+    <th>foo
+    <th>bar
+    <th>baz
+  <tr>
+    <td>foo
+    <td>bar
+    <td>baz
+</table>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test36.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test36.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test36.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<a name=foo><p>Blah</p>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test37.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test37.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test37.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+<table>
+  <tr>
+    <td>Alpha
+</table>
+<table>
+  <tr>
+    <td>Beta
+</table>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test38.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test38.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test38.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+<html>
+<body>
+  <p>Here we go! <a href="http://bigidea.com/">Bob</a> <br/.</p>
+  and <a href="http://larryboy.com/"> Larry </a>
+  and friends <a href="http://google.com/">Google</a>
+</body></html>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test39.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test39.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test39.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,9 @@
+<h1>Header1</h1>
+<script>
+document.write('ABC');
+<!--
+document.write("<script>document.write('Hello, World')</script>");
+//-->
+document.write('XYZ');
+</script>
+<h2>Header2</h2>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test40.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test40.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test40.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,8 @@
+<table>
+ <col>
+ <col style="text-align: right">
+ <tr>
+  <td>This</td>
+  <td>That</td>
+ </tr>
+</table>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test41.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test41.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test41.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,4 @@
+<html><head><title>foo</title></head><body>
+<form id='form1'>
+<isindex prompt='enterSomeText'></isindex>
+</form></body></html>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test42.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test42.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test42.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<head><script/><title>Title</title>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test43.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test43.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test43.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+<html>
+<body>
+<p>
+<button> a button </button> <br>
+</p>
+</body></html>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test44.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test44.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test44.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<?target?>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test45.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test45.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test45.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<?target data?>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test46.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test46.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test46.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<?target	data	?>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test47.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test47.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test47.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+<?php
+print "Hello, World.\n";
+?>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test48.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test48.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test48.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<body><isindex prompt='enterSomeText'>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test49.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test49.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test49.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,22 @@
+<!DOCTYPE [
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+////////////////////////////////////////////////////////////////////////////////////////////////////
+]>

Added: branches/nekohtml/upstream/0.9.5/data/html/test50.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test50.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test50.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<a href=foo><p>Blah</p>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test51.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test51.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test51.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+<ul>
+<li>Item 1
+ <p>Paragraph
+<li>Item 2
+</ul>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test52.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test52.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test52.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<html></html
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test53.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test53.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test53.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+<html>
+<body>
+<form>
+<span id=span1><span id=span2><select></select></span></span>
+</form>
+</body></html>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test54.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test54.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test54.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<p><form><p>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test55.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test55.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test55.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<font><select></select></font>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test56.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test56.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test56.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<form><center><select><option>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test57.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test57.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test57.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<html><body></body></html>Outside content
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test58.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test58.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test58.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<textarea>&amp;</textarea>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test59.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test59.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test59.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<script>&</script>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test60.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test60.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test60.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<a href='foo'><font>text</font></a>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test61.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test61.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test61.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,10 @@
+<html>
+<meta http-equiv="Pragma" content="no-cache">
+<head>
+<title>Title</title>
+<meta http-equiv="Content-Type" content="text/html;
+charset=iso-8859-1">
+</head>
+<body>
+</body>
+</html>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test62.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test62.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test62.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+<html><head>
+<span style="behavior:url(#default#clientCaps)" id=cc></span></head>
+<body>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test63.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test63.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test63.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<table><tr><td><table><td>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test64.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test64.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test64.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<body bgcolor='white'>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test65.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test65.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test65.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<!DOCTYPE>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test66.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test66.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test66.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<!doctype html>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test67.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test67.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test67.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<!DOCTYPE html PUBLIC "public_id">
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test68.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test68.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test68.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<!DOCTYPE html PUBLIC "public_id" "system_id">
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test69.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test69.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test69.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<!DOCTYPE html PUBLIC 'public_id' 'system_id'>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test70.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test70.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test70.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<!DOCTYPE html SYSTEM "system_id">
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test71.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test71.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test71.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+<!doctype root>
+<!doctype html>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test72.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test72.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test72.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<a href=a&amp;b>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test73.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test73.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test73.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<script></script><html><h1>Title
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test74.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test74.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test74.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+"http://www.w3.o$
+<title>Title</title><h1>Header1</h1>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test75.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test75.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test75.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<P><?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /><IMG></P>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test76.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test76.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test76.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,4 @@
+<p>outer paragraph
+<p>inner paragraph<br>second line in inner paragraph
+</p>second line in outer paragrapth
+</p>outside paragraph tags
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test77.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test77.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test77.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<html><frameset>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test78.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test78.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test78.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,5 @@
+<!--
+a
+b
+c
+-
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test79.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test79.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test79.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<HTML A123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890></HTML>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test80.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test80.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test80.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+<p>P1<![CDATA[<h1>Header</h1>]]>
+<p>P2
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test81.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test81.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test81.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+<!--one
two

three


-->
+<?target one
two

three


?>
+<html attr="one
two

three


">
+<body>
+[one
two

three


]
+<textarea>one
two

three


</textarea>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test82.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test82.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test82.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<html><h:body xmlns:h='http://www.w3.org/1999/xhtml'>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test83.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test83.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test83.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<table><tr><td><table><tr><td><table><tr><td>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test84.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test84.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test84.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,28 @@
+<html
+xmlns:a='NSa'
+xmlns:b='NSb'
+xmlns:c='NSc'
+xmlns:d='NSd'
+xmlns:e='NSe'
+xmlns:f='NSf'
+xmlns:g='NSg'
+xmlns:h='NSh'
+xmlns:i='NSi'
+xmlns:j='NSj'
+xmlns:k='NSk'
+xmlns:l='NSl'
+xmlns:m='NSm'
+xmlns:n='NSn'
+xmlns:o='NSo'
+xmlns:p='NSp'
+xmlns:q='NSq'
+xmlns:r='NSr'
+xmlns:s='NSs'
+xmlns:t='NSt'
+xmlns:u='NSu'
+xmlns:v='NSv'
+xmlns:w='NSw'
+xmlns:x='NSx'
+xmlns:y='NSy'
+xmlns:z='NSz'
+>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test85.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test85.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test85.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+&amp
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test86.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test86.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test86.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+&#x
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test87.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test87.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test87.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >Hello
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test88.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test88.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test88.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<HTML>Hello<p>World
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test89.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test89.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test89.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+&amp;
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test90.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test90.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test90.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<?xml version='1.0'?>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test91.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test91.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test91.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<p><?xml version="1.0"?>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test92.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test92.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test92.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<script>document.write("</SCRIPT\>");</script><p>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test93.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test93.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test93.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<table><form><td>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test94.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test94.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test94.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+<script>
+<!--
+html script content
+//-->
+</script>
+<h1>Foo</h1>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test95.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test95.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test95.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+<script>
+<!--
+html script content
+//-->
+</script>
+<h1>Foo</h1>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test95.html.settings
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test95.html.settings	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test95.html.settings	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+feature http://cyberneko.org/html/features/scanner/script/strip-comment-delims  true

Added: branches/nekohtml/upstream/0.9.5/data/html/test96.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test96.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test96.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+<script>
+<![CDATA[
+xhtml script content
+]]>
+</script>
+<h1>Foo</h1>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test97.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test97.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test97.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,6 @@
+<script>
+<![CDATA[
+xhtml script content
+]]>
+</script>
+<h1>Foo</h1>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test97.html.settings
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test97.html.settings	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test97.html.settings	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+feature http://cyberneko.org/html/features/scanner/script/strip-cdata-delims  true

Added: branches/nekohtml/upstream/0.9.5/data/html/test98.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test98.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test98.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+</html><h1>foo</h1>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test98.html.settings
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test98.html.settings	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test98.html.settings	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+feature	http://cyberneko.org/html/features/balance-tags/ignore-outside-content	true
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/data/html/test99.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/data/html/test99.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/data/html/test99.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+<HTML XMLNS:IE><head><mainA6><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=ISO-8859-1"><h1>Foo
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/doc/html/.htaccess
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/html/.htaccess	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/html/.htaccess	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,2 @@
+AddDefaultCharset Off
+

Added: branches/nekohtml/upstream/0.9.5/doc/html/changes.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/html/changes.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/html/changes.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,439 @@
+<title>NekoHTML | Change History</title>
+<link rel=stylesheet type=text/css href=../style.css>
+
+<h1>Change History</h1>
+<div class='navbar'>
+[<a href='../index.html'>Home</a>]
+[
+<a href='index.html'>Top</a>
+|
+<a href='usage.html'>Usage</a>
+|
+<a href='settings.html'>Settings</a>
+|
+<a href='filters.html'>Filters</a>
+|
+<a href='javadoc/index.html'>JavaDoc</a>
+|
+<a href='faq.html'>FAQ</a>
+|
+<a href='software.html'>Software</a>
+|
+Changes
+]
+</div>
+
+<h2>Releases</h2>
+<dl>
+ <dt>Version 0.9.5 (18 Jun 2005)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.5.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.5.tar.gz'>tgz</a>]
+ <dd>Added feature submitted by Asgeir Asgeirsson to allow scanner to fix 
+       character entity references for Microsoft Windows&reg; characters;
+     stopped building nekohtmlXni.jar file by default;
+     fixed handling of &lt;blockquote> reported by Joseph Walton
+       to better match browser behavior;
+     fixed tag-balancing bug for unknown elements reported by Marc
+       Guillemot and Vadim Tashlikovich;
+     fixed mapping of encoding name in <code>&lt;meta&gt;</code> element
+       reported by Marc Guillemot;
+     changed tag-balancing to allow headers inside of links suggested
+       by Laurens Fridael;
+     applied attribute namespace patch from Joseph Walton;
+     fixed namespace bug for "xml" prefixes reported by Asgeir
+       Asgeirsson;
+     fixed namespace bug for "xmlns" prefixes reported by
+       Johannes Koch;
+     and
+     fixed no-such-method exception bug when using augmentations feature
+       with older versions of Xerces2 reported by Hans Donner.
+ <dt>Version 0.9.4 (17 Nov 2004)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.4.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.4.tar.gz'>tgz</a>]
+ <dd>Fixed typo in proviso 5 of the license agreement; 
+     added features to strip CDATA delimiters (i.e. "&lt;![CDATA[" and
+       "]]>") from &lt;script> and &lt;style> elements suggested by Dan Sojka;
+     fixed tag-balancing problem reported by Egor Samarkhanov;
+     applied augmentations patches donated by Marc-André Morissette;
+     implemented augmentation performance enhancements inspired by
+       Marc-André Morissette;
+     fixed ignore-outside-content bug reported by Chris Erskine;
+     and
+     updated link to Xerces download site.
+ <dt>Version 0.9.3 (30 Jun 2004)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.3.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.3.tar.gz'>tgz</a>]
+ <dd>Implemented scanning of XML declaration;
+     fixed &lt;script&gt; tag scanning bug reported by Vasiliev Ivan;
+     added <code>Version</code> class and manifest entries to query 
+       product information;
+     and fixed some Javadoc errors.
+ <dt>Version 0.9.2 (31 Mar 2004)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.2.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.2.tar.gz'>tgz</a>]
+ <dd>Fixed entity reference scanning and tag-balancing bugs identified 
+       by Tommy Sandström;
+     fixed tag-balancing bug reported by Oliver Pfeiffer;
+     fixed doctype scanning bug reported by Jonathan Baxter;
+     updated Purifier filter to synthesize missing namespace bindings;
+     updated Writer filter to convert all known characters back to 
+       their entity names;
+     and
+     updated implementation to work with Xerces-J 2.6.2 that removed 
+       the <code>ObjectFactory</code> class in the 
+       <code>org.apache.xerces.util</code> package.
+ <dt>Version 0.9.1 (24 Feb 2004)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.1.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.1.tar.gz'>tgz</a>]
+ <dd>Fixed namespace binding bug reported by Jonathan Baxter.
+ <dt>Version 0.9 (19 Feb 2004)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.9.tar.gz'>tgz</a>]
+ <dd>Implemented scanning of CDATA sections;
+     implemented namespace processing;
+     added features to 
+       override namespace bindings,
+       insert namespace bindings if not present,
+       override doctype public and system identifiers, and
+       insert doctype declaration if not present;
+     added a filter to allow applications to "purify" the input, ensuring
+       that the output is well-formed XML;
+     added missing location augmentations from document type declaration
+       callback;
+     fixed newline scanning bugs reported by Jonathan Baxter;
+     and
+     fixed comment scanning bugs and infinite loop bug caused by extremely
+       long element and attribute names found by Ram Subbaroyan.
+ <dt>Version 0.8.3 (12 Dec 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.8.3.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.8.3.tar.gz'>tgz</a>]
+ <dd>Fixed null pointer exception for &lt;frameset&gt; tags reported by
+       Dawid Weiss;
+     and
+     added missing file to xercesMinimal.jar file reported by Brent Beardsley.
+ <dt>Version 0.8.2 (14 Nov 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.8.2.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.8.2.tar.gz'>tgz</a>]
+ <dd>Fixed array index out of bounds exception in special tags and
+       doctype scanning bug reported by Leo Galambos;
+     updated processing instruction scanning to handle weird PIs exported
+       from Microsoft products as reported by Gabriele Bulfon;
+     fixed erroneous reporting of missing whitespace before attributes
+       reported by Arno Schatz;
+     installed a default error handler that prints to standard error
+       suggested by Arno Schatz;
+     and
+     fixed handling of dangling &lt;/p&gt; reported by Gopi Murthy to
+       better match browser behavior.
+ <dt>Version 0.8.1 (30 Sep 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.8.1.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.8.1.tar.gz'>tgz</a>]
+ <dd>Fixed bug reported by Yuan Ji that allowed multiple &lt;html&gt; tags;
+     fixed bug in stripping leading comments in &lt;script&gt; tags 
+       as reported by Lawrence McCartin;
+     added feature to be able to strip HTML comment delimiters (i.e. "&lt;!--" 
+       and "--&gt;") from &lt;style&gt; element content suggested by
+       Lawrence McCartin;
+     updated DOMParser to work around a bug in the Xerces HTML DOM
+       implementation when a doctype node was inserted into the document,
+       reported by Troy Waldrep;
+     updated the DOMFragmentParser to allow setting of features and
+       properties as requested by Paul Reeves;
+     changed the status of the document fragment parser from experimental
+       to <font color=green>supported</font>;
+     added feature to allow application to ignore a character encoding
+       specified in a &lt;meta http-equiv='Content-Type' 
+       content='text/html;charset=...'&gt; tag requested by Roger Fullerton;
+     and
+     changed feature identifier for document fragment tag balancing to
+       be more in line with other features (but retained old feature
+       identifier for backwards compatibility).
+ <dt>Version 0.8 (05 Aug 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.8.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.8.tar.gz'>tgz</a>]
+ <dd>Implemented scanning of doctype declaration;
+     implemented non-normalized attribute value for XNI filters that want
+       to know original attribute value;
+     fixed bug scanning entity references inside of unquoted attributes;
+     fixed line counting bug in attribute values reported by Arno Schatz;
+     and
+     updated files in xercesMinimal.jar noted by Brent Beardsley.
+ <dt>Version 0.7.7 (25 Jun 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.7.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.7.tar.gz'>tgz</a>]
+ <dd>Fixed handling of &lt;font&gt; tags reported by Dave King;
+     fixed bugs that caused multiple &lt;head&gt; and &lt;body&gt; tags
+       as reported by Mike Bowler;
+     fixed missing &lt;tr&gt; bug in nested tables reported by Troy Waldrep;
+     and
+     normalized newlines in attribute values to spaces.
+ <dt>Version 0.7.6 (06 May 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.6.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.6.tar.gz'>tgz</a>]
+ <dd>Fixed infinite loop in special tags reported by Mike Bowler.
+ <dt>Version 0.7.5 (02 May 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.5.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.5.tar.gz'>tgz</a>]
+ <dd>Fixed parsing of entity reference within &lt;textarea&gt; tags reported
+       by Mattias Jiderhamn;
+     changed behavior of tag balancer to not consume content after the end
+       &lt;body&gt; and &lt;html&gt; tags but retained old behavior through
+       new feature;
+     fixed &lt;noscript&gt; bug reported by Takashi Tomokiyo;
+     and
+     updated implementation for XNI changes introduced in Xerces-J 2.4.0.
+ <dt>Version 0.7.4 (03 Mar 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.4.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.4.tar.gz'>tgz</a>]
+ <dd>Fixed &lt;form&gt; element balancing problem reported by Dan Rocco;
+     fixed null pointer exception reported by Michael Dynin that was
+       caused by a null XMLResourceIdentifier object passed to the 
+       startGeneralEntity method in the Xerces DOM parser classes;
+     fixed handling of &lt;font&gt; element as requested by Arno Schatz
+       to better match current browsers;
+     replaced generic catch exception blocks with explicit catch blocks
+       suggested by Arno Schatz;
+     fixed &lt;center&gt; tag-balancing problem reported by Russell Gold;
+     fixed null pointer exception caused by null namespace context
+       object passed to Xerces SAX parser class reported by David Leslie;
+     and
+     added FAQ entry describing how to insert custom filters before
+       the tag-balancer.
+ <dt>Version 0.7.3 (28 Jan 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.3.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.3.tar.gz'>tgz</a>]
+ <dd>Updated implementation for XNI changes introduced in Xerces-J 2.3.0;
+     and
+     fixed hack string to accommodate XML4J build of Xerces included in
+       the Eclipse editor reported by Geoffrey Longman.
+ <dt>Version 0.7.2 (10 Jan 2003)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.2.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.2.tar.gz'>tgz</a>]
+ <dd>Fixed class-cast exception bug in DOMFragmentParser reported by
+       Joseph Artsimovich;
+     fixed &lt;span&gt; tag-balancing bug reported by Ron Cemer;
+     and
+     fixed handling of form tags missing a parent element reported by 
+       Russell Gold in order to better match browser behavior.
+ <dt>Version 0.7.1 (06 Dec 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.1.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.1.tar.gz'>tgz</a>]
+ <dd>Fixed null pointer exception caused by null attributes object
+       passed to Xerces SAX parser class as reported by Kevin Huber;
+     and
+     fixed infinite loop condition when encountering "&lt;/html[eof]"
+       as reported by Matt Hurst.
+ <dt>Version 0.7 (27 Nov 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.7.tar.gz'>tgz</a>]
+ <dd>Changed behavior of tag balancer for unbalanced elements
+       as requested by Troy Waldrep to make output match that
+       produced by browsers such as Mozilla;
+     fixed other tag balancing problems identified by a bug
+       reported by Laurens Fridael;
+     added <font color='red'>experimental</font> HTML fragment 
+       parsing feature and DOM fragment parser class;
+     fixed buffer boundary bug in skipMarkup method reported
+       by Mike Bowler;
+     added constructor to the Writer filter that accepts a
+       Java writer object parameter as requested by Alain
+       Gilbert;
+     fixed HTMLScanner class so that it can compile with JDK 1.1
+       as reported by Mikko Honkala;
+     and
+     fixed bug reported by Russell Gold that would ignore
+       the &lt;param&gt; element within an &lt;applet&gt;
+       element.
+ <dt>Version 0.6.8 (30 Sep 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.8.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.8.tar.gz'>tgz</a>]
+ <dd>Implemented scanning of processing instructions;
+     improved performance of HTMLElements#getElement method inspired 
+       by Sam Cheung;
+     changed tag balancer algorithm as requested by Mike Bowler so 
+       that it does not close the &lt;body&gt; element to insert a
+       proper parent element;
+     fixed &lt;isindex&gt; proper parent bug and &lt;script&gt; empty 
+       element tag bug reported by Mike Bowler;
+     fixed bug reported by YingLCS that a &lt;form&gt; tag
+       would prematurely close a &lt;p&gt; tag;
+     and
+     updated implementation for XNI changes introduced in Xerces-J 2.2.0.
+ <dt>Version 0.6.7 (06 Sep 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.7.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.7.tar.gz'>tgz</a>]
+ <dd>Added a FAQ section;
+     and
+     updated implementation for XNI changes introduced in Xerces-J 2.1.0.
+ <dt>Version 0.6.6 (25 Aug 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.6.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.6.tar.gz'>tgz</a>]
+ <dd>Changed packaging to include product name and version in
+       directory name;
+     updated <code>HTMLConfiguration</code> to implement the
+       <code>XMLPullParserConfiguration</code> interface;
+     fixed bug reported by Martin Jericho to correct handling
+       of &lt;col&gt; element;
+     fixed bug reported by Dave King that would skip to end of 
+       document if bad markup was found;
+     fixed numerous bugs related to scanning &lt;script&gt; tags
+       reported by Sam Cheung;
+     added feature to be able to strip HTML comment delimiters (i.e.
+       "&lt;!--" and "--&gt;") from &lt;script&gt; element content;
+     changed the status of the feature to dynamically insert 
+       content from <em>experimental</em> to <font color=green>
+       supported</font>;
+     added code to be able to compare test files against canonical
+       output for regression testing;
+     and
+     fixed minor bugs found by the tests.     
+ <dt>Version 0.6.5 (17 Jul 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.5.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.5.tar.gz'>tgz</a>]
+ <dd> Fixed bug in changing character encoding when "charset=..." is
+        not written in lowercase;
+      and
+      mark attributes as "specified".
+ <dt>Version 0.6.4 (15 Jun 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.4.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.4.tar.gz'>tgz</a>]
+ <dd>Re-organized package contents for integration into the CyberNeko
+       Tools for XNI package;
+     fixed table closing bug reported by Oskar Liljeblad;
+     fixed newline bug reported by OtisG;
+     and
+     fixed line counting bug reported by Donald Ball.
+ <dt>Version 0.6.3 (29 May 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.3.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.3.tar.gz'>tgz</a>]
+ <dd>Fixed bug in handling of &lt;th&gt; elements reported by
+       Oskar Liljeblad;
+     and
+     fixed various tag-balancing problems.
+ <dt>Version 0.6.2 (26 May 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.2.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.2.tar.gz'>tgz</a>]
+ <dd>Changed scanner behavior as requested by Alexey Shananin to 
+       report malformed start elements (e.g. &lt;...&gt;) as 
+       characters 
+     and
+     fixed tag balancing bug introduced in previous version. Oops!
+ <dt>Version 0.6.1 (23 May 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.1.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.1.tar.gz'>tgz</a>]
+ <dd>Changed tag balancer behavior to swallow events after the close 
+       of the &lt;html&gt; tag to ensure that the document stream 
+       remains well-formed;
+     added additional Ruby elements;
+     and
+     improved tag balancer performance.
+ <dt>Version 0.6 (12 May 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.6.tar.gz'>tgz</a>]
+ <dd>Added property to allow custom document filters to be appended
+       to the default NekoHTML parser pipeline;
+     added convenience filters for serializing HTML documents and
+       removing elements from the document event stream;
+     added samples to demonstrate the filtering feature;
+     added <font color=red>experimental</font> functionality to
+       allow applications to dynamically insert content into the
+       HTML document stream;
+     added a minimal Xerces2 Jar file containing just the files
+       required for using the HTMLConfiguration class directly to
+       alleviate full dependence on Xerces2 distribution;
+     applied patch from Serge Proskuryakov to fix handling of 
+       misplaced &lt;title&gt; within &lt;body&gt;;
+     fixed minor tag balancing bug;
+     and
+     re-organized and added new documentation.
+ <dt>Version 0.5 (07 May 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.5.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.5.tar.gz'>tgz</a>]
+ <dd>Fixed some location reporting information bugs and added 
+       feature to report character boundaries of events via the
+       associated augmentations object;
+     added feature to disable tag balancing;
+     and
+     added features to notify handlers of start and end of character
+       and built-in XML and HTML entity references.
+ <dt>Version 0.4.1 (03 May 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.4.1.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.4.1.tar.gz'>tgz</a>]
+ <dd>Fixed some unquoted attribute value scanning bugs reported
+       by Xiaowei Jiang;
+     fixed hack for Xerces-J 2.0.1 reported by Ron Cemer;
+     now passing locator object to <code>startDocument</code>
+       method;
+     and
+     celebrated opening of the Spider-Man movie.
+ <dt>Version 0.4 (14 Apr 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.4.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.4.tar.gz'>tgz</a>]
+ <dd>Added properties to control case of element and attribute names;
+     changed behavior of parser so that only known HTML elements
+       have their names modified according to the properties &mdash; all
+       unknown tags are left as-is;
+     added property to set default encoding;
+     added feature to augment infoset to report "synthesized" events;
+     added feature to be able to report errors and localized the error
+       messages;
+     implemented the locator so that location information can be
+     reported;
+     and
+     fixed element information so that more elements are properly
+     scanned as "special".
+ <dt>Version 0.3.3 (02 Apr 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.3.3.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.3.3.tar.gz'>tgz</a>]
+ <dd>Separated <tt>META-INF/services/*</tt> files to separate Jar
+     so that HTML parser configuration selection can be controlled 
+     more explicitly; added DOM and SAX parser classes for 
+     convenience; and fixed bug so that parser now obeys the
+     encoding specified in the input source.
+ <dt>Version 0.3.2 (15 Mar 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.3.2.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.3.2.tar.gz'>tgz</a>]
+ <dd>Fixed problem with bare &lt;input&gt; elements appearing outside
+     of &lt;form&gt; tag.
+ <dt>Version 0.3.1 (07 Mar 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.3.1.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.3.1.tar.gz'>tgz</a>]
+ <dd>Fixed handling of bare ampersands in content and attribute
+     values.
+ <dt>Version 0.3 (25 Feb 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.3.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.3.tar.gz'>tgz</a>]
+ <dd>Changed license to an Apache style license and fixed a
+     few bugs.
+ <dt>Version 0.2.3 (19 Feb 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.2.3.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.2.3.tar.gz'>tgz</a>]
+ <dd>Nested tables bug fix.
+ <dt>Version 0.2.2 (17 Feb 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.2.2.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.2.2.tar.gz'>tgz</a>]
+ <dd>More bug fixes to allow the parser to be used with Xalan
+     2.3.0. The parser wasn't keeping track of features and
+     properties and without namespaces turned on, Xalan would
+     not correctly transform the SAX events emitted using 
+     NekoHTML.
+ <dt>Version 0.2.1 (16 Feb 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.2.1.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.2.1.tar.gz'>tgz</a>]
+ <dd>Minor bug fix to work around problem in Xerces-J 2.0.0 SAX 
+     parser that drops attributes when parser configuration 
+     doesn't have a symbol table.
+ <dt>Version 0.2 (14 Feb 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.2.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.2.tar.gz'>tgz</a>]
+ <dd>Adding support for UTF-8, UTF-16, and other 8-bit encodings
+     supported by Java.
+ <dt>Version 0.1 (04 Feb 2002)
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.1.zip'>zip</a>]
+     [<a href='http://www.apache.org/~andyc/neko/nekohtml-0.1.tar.gz'>tgz</a>]
+ <dd>Initial writing.
+</dl>
+
+<div class='copyright'>
+(C) Copyright 2002-2005, Andy Clark. All rights reserved.
+</div>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/doc/html/faq.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/html/faq.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/html/faq.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,352 @@
+<title>NekoHTML | Frequently Asked Questions</title>
+<link rel=stylesheet type=text/css href=../style.css>
+
+<h1>Frequently Asked Questions</h1>
+<div class='navbar'>
+[<a href='../index.html'>Home</a>]
+[
+<a href='index.html'>Top</a>
+|
+<a href='usage.html'>Usage</a>
+|
+<a href='settings.html'>Settings</a>
+|
+<a href='filters.html'>Filters</a>
+|
+<a href='javadoc/index.html'>JavaDoc</a>
+|
+FAQ
+|
+<a href='software.html'>Software</a>
+|
+<a href='changes.html'>Changes</a>
+]
+</div>
+
+<h2>Table of Contents</h2>
+
+<ul>
+<li><a href='#uppercase'>Why are the DOM element names always uppercase?</a>
+<li><a href='#hierarchy'>Why do I get a hierarchy request error using DOM?</a>
+<li><a href='#prefilter'>How do I add filters <em>before</em> the tag balancer?</a>
+<li><a href='#fragments'>How do I parse HTML document fragments?</a>
+<li><a href='#offsets'>How can I get the location of document information?</a>
+<li><a href='#xerces2'>Do I have to use all of Xerces2?</a>
+<li><a href='#version'>What version of NekoHTML am I using?</a>
+</ul>
+
+<hr>
+
+<a name='uppercase'></a>
+<h3>Why are the DOM element names always uppercase?</h3>
+
+<p>
+The <a href='http://www.w3.org/TR/1998/REC-DOM-Level-1-19981001/level-one-html.html'>HTML 
+DOM</a> specification explicitly states that element and
+attribute names follow the semantics, including case-sensitivity,
+specified in the <a href='http://www.w3.org/TR/html4/'>HTML
+4</a> specification.  In addition,
+<a href='http://www.w3.org/TR/html4/about.html#h-1.2.1'>section
+1.2.1</a> of the HTML 4.01 specification states:
+<blockquote>
+Element names are written in uppercase letters (e.g., BODY). 
+Attribute names are written in lowercase letters (e.g., lang, onsubmit). 
+</blockquote>
+<p>
+The Xerces HTML DOM implementation (used by default in the
+NekoHTML <code>DOMParser</code> class) follows this convention.
+Therefore, even if the 
+"http://cyberneko.org/html/properties/names/elems" property is
+set to "lower", the DOM will still uppercase the element names.
+<p>
+To get around this problem, instantiate a Xerces2 <code>DOMParser</code>
+object using the NekoHTML parser configuration. By default, the
+Xerces DOM parser class creates a standard XML DOM tree, not
+an HTML DOM tree. Therefore, the element and attribute names
+will follow the settings for the
+"http://cyberneko.org/html/properties/names/elems" and
+"http://cyberneko.org/html/properties/names/attrs" properties.
+However, realize that the application will not be able to cast
+the document nodes to the HTML DOM interfaces for accessing the 
+document's information.
+<p>
+The following sample code shows how to instantiate a DOM
+parser using the NekoHTML parser configuration:
+<pre class='code'>
+<span class='code-comment'>// import org.apache.xerces.parsers.DOMParser;
+// import org.cyberneko.html.HTMLConfiguration;</span>
+
+DOMParser parser <span class='code-punct'>=</span> <span class='code-keyword'>new</span> DOMParser<span class='code-punct'>(</span><span class='code-keyword'>new</span> HTMLConfiguration<span class='code-punct'>());</span>
+</pre>
+
+<a name='hierarchy'></a>
+<h3>Why do I get a hierarchy request error using DOM?</h3>
+
+<p>
+Using the NekoHTML DOM parser to parse HTML documents with 
+namespace information can result in a hierarchy request error
+to be thrown. For example:
+<blockquote>
+org.w3c.dom.DOMException: HIERARCHY_REQUEST_ERR: An attempt was made 
+to insert a node where it is not permitted.
+</blockquote>
+<p>
+The Xerces HTML DOM implementation does not support namespaces
+and cannot represent XHTML documents with namespace information.
+Therefore, in order to use the default HTML DOM implementation
+with NekoHTML's <code>DOMParser</code> to parse XHTML documents,
+you must turn off namespace processing. For example:
+<pre class='code'>
+<span class='code-comment'>// import org.cyberneko.html.parsers.DOMParser;</span>
+
+DOMParser parser <span class='code-punct'>=</span> <span class='code-keyword'>new</span> DOMParser<span class='code-punct'>();</span>
+parser<span class='code-punct'>.</span><span class='code-func'>setFeature</span><span class='code-punct'>(</span><span class='code-string'>"http://xml.org/sax/features/namespaces"</span><span class='code-punct'>,</class> <span class='code-keyword'>false</span><span class='code-punct'>);</span>
+</pre>
+<p>
+If your application requires namespace processing to be turned
+on <em>and</em> uses the DOM API, another option is to add a
+custom filter to the parsing pipeline to remove namespace
+information before the <code>DOMParser</code> constructs the
+document. For example:
+<pre class='code'>
+<span class='code-comment'>// import org.cyberneko.html.filters.DefaultFilter;
+// import org.cyberneko.html.parsers.DOMParser;
+// import org.apache.xerces.xni.*;
+// import org.apache.xerces.xni.parser.XMLDocumentFilter;</span>
+
+DOMParser parser <span class='code-punct'>=</span> <span class='code-keyword'>new</span> DOMParser<span class='code-punct'>();</span>
+parser<span class='code-punct'>.</span><span class='code-func'>setProperty</span><span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/properties/filters"</span><span class='code-punct'>,</span> 
+  <span class='code-keyword'>new</span> XMLDocumentFilter<span class='code-punct'>[] {</span> <span class='code-keyword'>new</span> DefaultFilter<span class='code-punct'>() {</span>
+    <span class='code-keyword'>public void</span> <span class='code-func'>startElement</span><span class='code-punct'>(</span>QName element<span class='code-punct'>,</span> XMLAttributes attrs<span class='code-punct'>,</span>
+                             Augmentations augs<span class='code-punct'>)</span> <span class='code-keyword'>throws</span> XNIException <span class='code-punct'>{</span>
+      element<span class='code-punct'>.</span>uri <span class='code-punct'>=</span> <span class='code-keyword'>null</span><span class='code-punct'>;</span>
+      <span class='code-keyword'>super</span><span class='code-punct'>.</span><span class='code-func'>startElement</span><span class='code-punct'>(</span>element<span class='code-punct'>,</span> attrs<span class='code-punct'>,</span> augs<span class='code-punct'>);</span>
+    <span class='code-punct'>}</span>
+    <span class='code-comment'>// ...etc...</span>
+  <span class='code-punct'>}
+});</span>
+</pre>
+
+<a name='prefilter'></a>
+<h3>How do I add filters <em>before</em> the tag balancer?</h3>
+
+<p>
+The NekoHTML parser has a property that allows you to append 
+custom filter components at the end of the parser pipeline as 
+detailed in the <a href='filters.html'>Pipeline Filters</a> 
+documentation. But this means that processing occurs 
+<em>after</em> the tag-balancer does its job. However, the same 
+property can also be used to insert custom components before 
+the tag-balancer as well.
+<p>
+The secret is to <em>disable</em> the tag-balancing feature and 
+then add another instance of the <code>HTMLTagBalancer</code> 
+component at the end of your custom filter pipeline. The following
+example shows how to add a custom filter before the tag-balancer
+in the DOM parser. (This also works on all other types of parsers
+that use the <code>HTMLConfiguration</code>.)
+<pre class='code'>
+<span class='code-comment'>// import org.cyberneko.html.HTMLConfiguration;
+// import org.cyberneko.html.parsers.DOMParser;
+// import org.apache.xerces.xni.parser.XMLDocumentFilter;</span>
+
+DOMParser parser <span class='code-punct'>=</span> <span class='code-keyword'>new</span> DOMParser<span class='code-punct'>();</span>
+parser<span class='code-punct'>.</span><span class='code-func'>setFeature</span><span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/features/balance-tags"</span><span class='code-punct'>,</span> <span class='code-keyword'>false</span><span class='code-punct'>);</span>
+XMLDocumentFilter<span class='code-punct'>[]</span> filters <span class='code-punct'>= {</span> <span class='code-keyword'>new</span> MyFilter<span class='code-punct'>(),</span> <span class='code-keyword'>new</span> HTMLTagBalancer<span class='code-punct'>() };</span>
+parser<span class='code-punct'>.</span><span class='code-func'>setProperty</span><span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/properties/filters"</span><span class='code-punct'>,</span> filters<span class='code-punct'>);</span>
+</pre>
+
+<a name='fragments'></a>
+<h3>How do I parse HTML document fragments?</h3>
+
+<p>
+Frequently, HTML is used within applications and online forms
+to allow users to enter rich-text. In these situations, it is
+useful to be able to parse the entered text as a document
+<i>fragment</i>. In other words, the entered text represents
+content within the HTML &lt;body&gt; element &mdash; it is
+<em>not</em> a full HTML document.
+<p>
+Starting with version 0.7.0, NekoHTML has added a feature that 
+allows the application to parse HTML document fragments. Setting 
+the "<code>http://cyberneko.org/features/document-fragment</code>" 
+feature to <code>true</code> instructs the tag-balancer to 
+balance only tags found within the HTML &lt;body&gt; element. 
+The surrounding &lt;body&gt; and &lt;html&gt; elements are not
+inserted.
+<p>
+<strong>Note:</strong>
+The document-fragment feature should <strong>not</strong> be
+used on the <code>DOMParser</code> class since it relies on
+balanced elements in order to correctly construct the DOM
+tree. However, a new parser class has been added to NekoHTML
+to allow you parser DOM document fragments. Please refer to
+the <a href='usage.html#convenience'>Usage Instructions</a>
+for more information.
+
+<a name='offsets'></a>
+<h3>How can I get the location of document information?</h3>
+
+<p>
+Many applications are interested in knowing where elements,
+attributes, and character data appear within the source
+document. To aid these applications, NekoHTML has a feature
+that reports the starting and ending character offsets of
+each piece of information in the document.
+<p>
+In order to tell NekoHTML to report the character offsets
+for document information, the 
+<a href='settings.html#augmentations'>augmentations</a>
+feature needs to be turned on. For example:
+<p>
+<pre class='code'>
+<span class='code-comment'>// import org.cyberneko.html.parsers.SAXParser;</span>
+
+String AUGMENTATIONS <span class='code-punct'>=</span> <span class='code-string'>"http://cyberneko.org/html/features/augmentations"</span><span class='code-punct'>;</span>
+
+SAXParser parser <span class='code-punct'>=</span> <span class='code-keyword'>new</span> <span class='code-func'>SAXParser</span><span class='code-punct'>();</span>
+parser<span class='code-punct'>.</span><span class='code-func'>setFeature</span><span class='code-punct'>(</span>AUGMENTATIONS<span class='code-punct'>,</span> <span class='code-keyword'>true</span><span class='code-punct'>);</span>
+</pre>
+<p>
+Once the feature is enabled, the location information can be
+obtained by querying the 
+<code><a href='javadoc/org/cyberneko/html/HTMLEventInfo.html'>HTMLEventInfo</a></code> 
+object in the <code>Augmentations</code> parameter passed to
+all XNI callbacks. This dependency is required because DOM
+and SAX lack the ability to communicate this detailed 
+information to the application.
+<p>
+The XNI dependence does not restrict applications to only
+using the Xerces Native Interface, however. The best way to
+use this information is by extending one of the parsers in the
+<code>org.cyberneko.html.parsers</code> package and overriding
+the methods of interest. The following example extends the
+<code>SAXParser</code> class to retrieve the event information
+for start elements:
+<p>
+<pre class='code'>
+<span class='code-keyword'>public class</span> MySAXParser <span class='code-keyword'>extends</span> SAXParser <span class='code-punct'>{</span>
+
+    <span class='code-keyword'>static final</span> String AUGMENTATIONS <span class='code-punct'>=</span>
+        <span class='code-string'>"http://cyberneko.org/html/features/augmentations"</span><span class='code-punct'>;</span>
+
+    <span class='code-keyword'>public</span> <span class='code-func'>MySAXParser</span><span class='code-punct'>() {</span>
+        <span class='code-func'>setFeature</span><span class='code-punct'>(</span>AUGMENTATIONS<span class='code-punct'>,</span> <span class='code-keyword'>true</span><span class='code-punct'>);
+    }</span>
+
+    <span class='code-keyword'>public void</span> <span class='code-func'>startElement</span><span class='code-punct'>(</span>QName element<span class='code-punct'>,</span> XMLAttributes attrs<span class='code-punct'>,</span>
+                             Augmentations augs<span class='code-punct'>)</span> <span class='code-keyword'>throws</span> XNIException <span class='code-punct'>{</span>
+
+        <span class='code-comment'>// get offset information</span>
+        HTMLEventInfo info <span class='code-punct'>=
+           (</span>HTMLEventInfo<span class='code-punct'>)</span>augs<span class='code-punct'>.</span><span class='code-func'>getItem</span><span class='code-punct'>(</span>AUGMENTATIONS<span class='code-punct'>);</span>
+
+        <span class='code-keyword'>boolean</span> synthesized <span class='code-punct'>=</span> info<span class='code-punct'>.</span><span class='code-func'>isSynthesized</span><span class='code-punct'>();</span>
+        <span class='code-keyword'>int</span> beginRow <span class='code-punct'>=</span> info<span class='code-punct'>.</span><span class='code-func'>getBeginLineNumber</span><span class='code-punct'>();</span>
+        <span class='code-keyword'>int</span> beginCol <span class='code-punct'>=</span> info<span class='code-punct'>.</span><span class='code-func'>getBeginColumnNumber</span><span class='code-punct'>();</span>
+        <span class='code-keyword'>int</span> endRow <span class='code-punct'>=</span> info<span class='code-punct'>.</span><span class='code-func'>getEndLineNumber</span><span class='code-punct'>();</span>
+        <span class='code-keyword'>int</span> endCol <span class='code-punct'>=</span> info<span class='code-punct'>.</span><span class='code-func'>getEndColumnNumber</span><span class='code-punct'>();</span>
+
+        <span class='code-comment'>// perform default processing</span>
+        <span class='code-keyword'>super</span><span class='code-punct'>.</span><span class='code-func'>startElement</span><span class='code-punct'>(</span>element<span class='code-punct'>,</span> attrs<span class='code-punct'>,</span> augs<span class='code-punct'>);
+    }
+
+}</span>
+</pre>
+<p>
+<strong>Note:</strong>
+The NekoHTML parser reports character offsets and is unable 
+to report the byte offsets that map to the resulting characters.
+The parser takes advantage of the character decoders present in
+the JVM which do not report byte offsets. And because these
+decoders buffer blocks of bytes internally for performance
+reasons, it is not possible to write a custom input stream to
+perform this mapping between byte and character offsets. If you
+control the source documents and can restrict them to a single
+character encoding, then writing a custom reader to perform 
+this mapping is more feasible.
+<p>
+<strong>Note:</strong>
+Currently, only the start and end row and column information
+can be queried. In the future, NekoHTML will be able
+to report character offsets from the beginning of the file.
+This does not, however, mean that byte offsets will also be
+supported at a future date.
+
+<a name='xerces2'></a>
+<h3>Do I have to use all of Xerces2?</h3>
+
+<p>
+While NekoHTML is a rather small library, many users complain
+about the size of the Xerces2 library. However, the full
+Xerces2 library is <em>not</em> required in order to use the
+NekoHTML parser. Because the CyberNeko HTML parser is written 
+using the Xerces Native Interface (XNI) framework that forms 
+the foundation of the Xerces2 implementation, only that part
+is required to write applications using NekoHTML.
+<p>
+For convenience, a small Jar file containing only the necessary 
+parts of the framework and utility classes from Xerces2 is
+distributed with the NekoHTML package. The Jar file, called
+<code>xercesMinimal.jar</code>, can be found in the
+<code>lib/</code> directory of the distribution. Simply add
+this file to your classpath along with <code>nekohtml.jar</code>.
+<p>
+However, there are a few restrictions if you choose to use
+the <code>xercesMinimal.jar</code> file instead of the full
+Xerces2 package. First, you cannot use the DOM and SAX parsers
+included with NekoHTML because they use the Xerces2 base
+classes. Second, because you cannot use the convenience
+parser classes, your application must be written using the
+XNI framework. However, using the XNI framework is not 
+difficult for programmers familiar with SAX. [Note: future 
+versions of NekoHTML may include custom implementations of
+the DOM and SAX parsers to avoid this dependence on the 
+Xerces2 library.]
+<p>
+Most users of the CyberNeko HTML parser will not have a
+problem including the full Xerces2 package because the
+application is likely to need an XML parser implementation.
+However, for those users that are concerned about Jar file
+size, then using the <code>xercesMinimal.jar</code> file
+may be a useful alternative.
+
+<a name='version'></a>
+<h3>What version of NekoHTML am I using?</h3>
+
+<p>
+Since version 0.9.3, NekoHTML includes a class that can be
+used to query the product version within application code. 
+The <code>Version</code> class in the
+<code>org.cyberneko.html</code> package contains a method,
+<code>getVersion</code> that returns the NekoHTML version
+as a string. For example:
+<pre class='code'>
+<span class='code-comment'>// import org.cyberneko.html.Version;</span>
+
+System<span class='code-punct'>.</span>err<span class='code-punct'>.</span><span class='code-func'>println</span><span class='code-punct'>(</span>Version<span class='code-punct'>.</span><span class='code-func'>getVersion</span><span class='code-punct'>());</span>
+</pre>
+<p>
+The <code>Version</code> also includes a <code>main</code> 
+method that prints the version information to standard output.
+<p>
+The version and product information can also be queried using
+the Java package API. For example:
+<pre class='code'>
+Class cls <span class='code-punct'>=</span> Class<span class='code-punct'>.</span><span class='code-func'>forName</span><span class='code-punct'>(</span><span class='code-string'>"org.cyberneko.html.HTMLConfiguration"</span><span class='code-punct'>);</span>
+Package pkg <span class='code-punct'>=</span> cls<span class='code-punct'>.</span><span class='code-func'>getPackage</span><span class='code-punct'>();</span>
+
+String name <span class='code-punct'>=</span> pkg<span class='code-punct'>.</span><span class='code-func'>getName</span><span class='code-punct'>();</span>
+
+String specTitle   <span class='code-punct'>=</span> pkg<span class='code-punct'>.</span><span class='code-func'>getSpecificationTitle</span><span class='code-punct'>();</span>
+String specVendor  <span class='code-punct'>=</span> pkg<span class='code-punct'>.</span><span class='code-func'>getSpecificationVendor</span><span class='code-punct'>();</span>
+String specVersion <span class='code-punct'>=</span> pkg<span class='code-punct'>.</span><span class='code-func'>getSpecificationVersion</span><span class='code-punct'>();</span>
+
+String implTitle   <span class='code-punct'>=</span> pkg<span class='code-punct'>.</span><span class='code-func'>getImplementationTitle</span><span class='code-punct'>();</span>
+String implVendor  <span class='code-punct'>=</span> pkg<span class='code-punct'>.</span><span class='code-func'>getImplementationVendor</span><span class='code-punct'>();</span>
+String implVersion <span class='code-punct'>=</span> pkg<span class='code-punct'>.</span><span class='code-func'>getImplementationVersion</span><span class='code-punct'>();</span>
+</pre>
+
+<div class='copyright'>
+(C) Copyright 2002-2005, Andy Clark. All rights reserved.
+</div>

Added: branches/nekohtml/upstream/0.9.5/doc/html/filters.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/html/filters.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/html/filters.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,373 @@
+<title>NekoHTML | Pipeline Filters</title>
+<link rel=stylesheet type=text/css href=../style.css>
+
+<h1>Pipeline Filters</h1>
+<div class='navbar'>
+[<a href='../index.html'>Home</a>]
+[
+<a href='index.html'>Top</a>
+|
+<a href='usage.html'>Usage</a>
+|
+<a href='settings.html'>Settings</a>
+|
+Filters
+|
+<a href='javadoc/index.html'>JavaDoc</a>
+|
+<a href='faq.html'>FAQ</a>
+|
+<a href='software.html'>Software</a>
+|
+<a href='changes.html'>Changes</a>
+]
+</div>
+
+<h2>Table of Contents</h2>
+<ul>
+<li><a href='#overview'>Overview</a>
+ <ul>
+ <li><a href='#overview.create'>Creating a New Filter</a>
+ <li><a href='#overview.append'>Appending Filters to the Pipeline</a>
+ </ul>
+<li><a href='#filters'>Sample Filters</a>
+ <ul>
+ <li><a href='#filters.serialize'>Serializing HTML Documents</a>
+ <li><a href='#filters.namespaces'>Namespace Processing</a>
+ <li><a href='#filters.well-formedness'>Ensuring XML Well-Formedness</a>
+ <li><a href='#filters.removing'>Removing Elements</a>
+ <li><a href='#filters.identity'>Performing Identity Transform</a>
+ <li><a href='#filters.dynamic'>Dynamically Inserting Content</a>
+ </ul>
+</ul>
+
+<hr>
+
+<a name='overview'></a>
+<h2>Overview</h2>
+<p>
+The Xerces Native Interface (XNI) defines a parser configuration
+framework in which parsers can be written as a pipeline of
+modular components. This allows new parser configurations to be
+constructed by re-arranging existing components and/or writing
+custom components. And because the NekoHTML parser is written using
+this modular framework, new functionality can be quickly and
+easily added to the parser by appending custom document filters 
+to the end of the default NekoHTML parsing pipeline.
+
+<a name='overview.create'></a>
+<h3>Creating a New Filter</h3>
+<p>
+To write a custom filter, simply write a new class that implements
+the <code>XMLDocumentFilter</code> interface from the
+<code>org.apache.xerces.xni.parser</code> package of Xerces2. This
+interface allows the component to be both the <em>handler</em> of
+document events from the previous stage in the pipeline as well as 
+the <em>source</em> for the next stage in the pipeline. The 
+implementation of the new filter is completely arbitrary; it can 
+remove events from the document stream, generate new events, or 
+anything else you want!
+<p>
+NekoHTML includes a base filter class to simplify the creation of
+custom filters. To write a new filter, simply extend the 
+<code>DefaultFilter</code> class located in the
+<code>org.cyberneko.html.filters</code> package and override the
+relevent methods to add your own behavior. Once done, the only
+thing you need to do is append the filter to the end of the
+parser pipeline.
+
+<a name='overview.append'></a>
+<h3>Appending Filters to the Pipeline</h3>
+<p>
+The NekoHTML parser has a <a href='settings.html#filters'>filters
+property</a> that allows you to append custom document filters to
+the end of the default parser pipeline. The value of this property
+is an array of objects that implement the <code>XMLDocumentFilter</code>
+interface in XNI. For example, the following code instantiates a
+default filter and appends it to the parser pipeline:
+<pre class='code'>
+XMLDocumentFilter noop <span class='code-punct'>=</span> <span class='code-keyword'>new</span> DefaultFilter<span class='code-punct'>();</span>
+XMLDocumentFilter<span class='code-punct'>[]</span> filters <span class='code-punct'>= {</span> noop <span class='code-punct'>};</span>
+
+XMLParserConfiguration parser <span class='code-punct'>=</span> <span class='code-keyword'>new</span> HTMLConfiguration<span class='code-punct'>();</span>
+parser<span class='code-punct'>.</span>setProperty<span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/properties/filters"</span><span class='code-punct'>,</span> filters<span class='code-punct'>);</span>
+</pre>
+
+<a name='filters'></a>
+<h2>Sample Filters</h2>
+<p>
+This section describes a few of the basic document filters 
+that are included with the NekoHTML parser. The included filters 
+enable applications to perform a variety of operations, including:
+<ul>
+<li>serializing HTML documents;
+<li>ensuring XML well-formedness;
+    and
+<li>performing identity transform.
+</ul>
+
+<a name='filters.serialize'></a>
+<h3>Serializing HTML Documents</h3>
+<p>
+NekoHTML includes a simple HTML serializer written as a filter. 
+The <code>Writer</code> class is located in the 
+<code>org.cyberneko.html.filters</code> and contains two
+different constructors. The default constructor creates a writer
+that prints to the standard output. The other constructor allows 
+the application to control the output stream and the encoding.
+For example:
+<pre class='code'>
+<span class='code-comment'>// write to standard output using UTF-8</span>
+XMLDocumentFilter writer <span class='code-punct'>=</span> new Writer<span class='code-punct'>();</span>
+
+<span class='code-comment'>// write to file with specified encoding</span>
+OutputStream stream <span class='code-punct'>=</span> <span class='code-keyword'>new</span> FileOutputStream<span class='code-punct'>(</span><span class='code-string'>"index.html"</span><span class='code-punct'>);</span>
+String encoding <span class='code-punct'>=</span> <span class='code-string'>"ISO-8859-1"</span><span class='code-punct'>;</span>
+XMLDocumentFilter writer <span class='code-punct'>=</span> <span class='code-keyword'>new</span> Writer<span class='code-punct'>(</span>stream, encoding<span class='code-punct'>);</span>
+</pre>
+<p>
+Besides serializing the HTML event stream, the writer also passes 
+the document events to the next stage in the pipeline. This allows 
+applications to insert writer filters between other custom filters 
+for debugging purposes.
+<p>
+Since an HTML document may have specified its encoding using the
+&lt;META&gt; tag and http-equiv/content attributes, the writer will
+automatically change any character set specified in this tag to
+match the encoding of the output stream. Therefore, the character
+encoding name used to construct the writer should be an official
+<a href='http://www.iana.org/assignments/character-sets'>IANA</a>
+encoding name and not a Java encoding name.
+<strong>Note:</strong>
+The modified character set in the &lt;META&gt; tag is <em>not</em>
+propagated to the next stage in the pipeline. The changed value is
+only output to the stream; the original value is sent to the next
+stage in the pipeline.
+<p>
+For convenience, the <code>Writer</code> class contains a 
+<code>main</code> method that allows you to run it as a program.
+This can be used for debugging purposes in order to see what the
+NekoHTML parser is generating as well as converting the character
+encoding of existing documents. 
+<p>
+The following table shows the standard usage of the writer:
+<table cellspacing='0' cellpadding='3'>
+<tr><th style='border-bottom: 0'>Usage:
+<td style='border-bottom: 0'><tt>java org.cyberneko.html.filters.Writer (options) file ...</tt>
+<tr><th style='border-bottom: solid black 1'>Options:
+<td><pre>
+  -e name  Specify IANA name of output encoding.
+  -i       Perform identity transform.
+  -p       Purify output to ensure XML well-formedness.
+  -h       Display help screen.</pre>
+</td>
+</tr>
+</table>
+
+<a name='filters.namespaces'></a>
+<h3>Namespace Processing</h3>
+<p>
+A filter to perform namespace processing is included with NekoHTML,
+for convenience. You do not need to add this filter manually because
+it is automatically added to the parsing pipeline if the SAX namespaces 
+feature is enabled. However, if you are interested, the
+<code>NamespaceBinder</code> component is included in the 
+<code>org.cyberneko.html.filters</code> package.
+<p>
+<strong>Note:</strong>
+This component does not perform <em>any</em> namespace processing
+unless the SAX namespaces feature, 
+"http://xml.org/sax/features/namespaces", is enabled.
+
+<a name='filters.well-formedness'></a>
+<h3>Ensuring XML Well-Formedness</h3>
+<p>
+HTML allows documents to be less strict than XML documents which 
+means that most HTML documents cannot be parsed with an XML parser. 
+But even if an HTML document can be parsed and accessed by
+applications using standard XML programming interfaces, many
+applications need to produce well-formed output. Not only do tags
+need to be balanced properly, but the document content must also
+be legal according to the XML specification. Therefore, the 
+NekoHTML parser provides a filter that "purifies" the input, 
+ensuring that the output is well-formed XML.
+<p>
+The <code>Purifier</code> class in the
+<code>org.cyberneko.html.filters</code> package lets the application
+convert the HTML input into well-formed XML output. Some of the
+changes that the Purifier performs, are:
+<ul>
+<li>fixing illegal element and attribute names;
+<li>ensuring the string "--" does not appear in the content of
+    a comment;
+<li>escaping illegal characters appearing in the document;
+<li>etc.
+</ul>
+
+<a name='filters.removing'></a>
+<h3>Removing Elements</h3>
+<p>
+The NekoHTML parser also provides a basic document filter capable 
+of removing specified elements from the processing stream. The
+<code>ElementRemover</code> class is located in the 
+<code>org.cyberneko.html.filters</code> package and provides
+two options for processing document elements:
+<ul>
+<li>specifying those elements which should be accepted and,
+    optionally, which attributes of that element should be
+    kept; and
+<li>specifying those elements whose tags and content should be
+    completely removed from the event stream.
+</ul>
+<p>
+The first option allows the application to specify which elements
+appearing in the event stream should be accepted and, therefore,
+passed on to the next stage in the pipeline. All elements 
+<em>not</em> in the list of acceptable elements have their start 
+and end tags stripped from the event stream <em>unless</em> those
+elements appear in the list of elements to be removed. 
+<p>
+The second option allows the application to specify which elements
+should be completely removed from the event stream. When an element
+appears that is to be removed, the element's start and end tag as
+well as all of that element's content is removed from the event
+stream.
+<p>
+A common use of this filter would be to only allow rich-text
+and linking elements as well as the character content to pass 
+through the filter &mdash; all other elements would be stripped.
+The following code shows how to configure this filter to perform
+this task:
+<pre class='code'>
+ElementRemover remover <span class='code-punct'>=</span> <span class='code-keyword'>new</span> ElementRemover<span class='code-punct'>();</span>
+remover<span class='code-punct'>.</span>acceptElement<span class='code-punct'>(</span><span class='code-string'>"b"</span><span class='code-punct'>,</span> <span class='code-keyword'>null</span><span class='code-punct'>);</span>
+remover<span class='code-punct'>.</span>acceptElement<span class='code-punct'>(</span><span class='code-string'>"i"</span><span class='code-punct'>,</span> <span class='code-keyword'>null</span><span class='code-punct'>);</span>
+remover<span class='code-punct'>.</span>acceptElement<span class='code-punct'>(</span><span class='code-string'>"u"</span><span class='code-punct'>,</span> <span class='code-keyword'>null</span><span class='code-punct'>);</span>
+remover<span class='code-punct'>.</span>acceptElement<span class='code-punct'>(</span><span class='code-string'>"a"</span><span class='code-punct'>,</span> <span class='code-keyword'>new</span> String<span class='code-punct'>[] {</span> <span class='code-string'>"href"</span> <span class='code-punct'>});</span>
+</pre>
+<p>
+However, this would still allow the text content of other
+elements to pass through, which may not be desirable. In order
+to further "clean" the input, the <code>removeElement</code>
+option can be used. The following piece of code adds the ability
+to completely remove any &lt;SCRIPT&gt; tags and content 
+from the stream.
+<pre class='code'>
+remover<span class='code-punct'>.</span>removeElement<span class='code-punct'>(</span><span class='code-string'>"script"</span><span class='code-punct'>);</span>
+</pre>
+<p>
+This source code is included in the <code>src/html/sample/</code>
+directory in the file named <code>RemoveElements.java</code>.
+<p>
+<strong>Note:</strong>
+When an element is "stripped", its start and end tags are
+removed from the event stream. However, all of the element's
+text content and elements (that are accepted) are not stripped.
+To completely remove an element's content, use the
+<code>removeElement</code> method.
+<p>
+<strong>Note:</strong>
+Care should be taken when using this filter because the output
+may not be a well-balanced tree. Specifically, if the application
+removes the &lt;HTML&gt; element (with or without retaining its
+children), the resulting document event stream will no longer be
+well-formed.
+
+<a name='filters.identity'></a>
+<h3>Performing Identity Transform</h3>
+<p>
+An identity filter is provided that performs an identity 
+operation of the original document event stream generated by the 
+HTML scanner by removing events that are synthesized by the tag 
+balancer. This operation is essentially the same as turning off 
+tag-balancing in the parser. However, this filter is useful when 
+you want the tag balancer to report "errors" but do not want the 
+synthesized events in the output.
+<p>
+<strong>Note:</strong>
+This filter requires the augmentations feature to be turned on.
+For example:
+<pre class='code'>
+XMLParserConfiguration parser <span class='code-punct'>=</span> new HTMLConfiguration<span class='code-punct'>();</span>
+parser<span class='code-punct'>.</span>setFeature<span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/features/augmentations"</span><span class='code-punct'>,</span> <span class='code-keyword'>true</span><span class='code-punct'>);</span>
+</pre>
+<p>
+<strong>Note:</strong>
+This isn't <em>exactly</em> the identify transform because the
+element and attributes names may have been modified from the
+original document. For example, by default, NekoHTML converts
+element names to upper-case and attribute names to lower-case.
+
+<a name='filters.dynamic'></a>
+<h3>Dynamically Inserting Content</h3>
+<p>
+The NekoHTML parser has the ability to dynamically insert content
+into the parsed HTML document. This functionality can be used to
+insert the result of an embedded script (e.g. JavaScript) into the 
+HTML document in place of the script element. <strong>Note:</strong>
+NekoHTML does not provide a scripting engine &mdash; only the 
+ability to insert content to be parsed.
+<p>
+To insert content into the HTML document stream, call the
+<code>pushInputStream</code> method on the NekoHTML parser
+configuration class. This method takes an <code>XMLInputSource</code>
+object as a parameter. At the moment, the character stream 
+(java.io.Reader) of the input source <strong>must</strong> be 
+set or else the implementation will throw an illegal argument 
+exception.
+<p>
+A sample program called <code>Script</code> is included in the 
+<tt>src/sample/</tt> directory that demonstrates how to use of the 
+<code>pushInputSource</code> method of the HTMLConfiguration in order 
+to dynamically insert content into the HTML stream. 
+This sample defines a new script language called "NekoScript" 
+that is a modified subset of the 
+<a href='http://www.jclark.com/sp/sgmlsout.htm'>NSGMLS format</a>. 
+In this format, each line specifies a new <i>command</i> where each 
+command may indicate a start element tag, an attribute value, 
+character content, an end element tag, etc. The following table 
+enumerates the NSGMLS features supported by the NekoScript
+language:
+<table border='1' cellspacing='0', cellpadding='3'>
+<tr>
+<th style='font-weight:normal;border-bottom:solid black 1'><tt>(<i>name</i></tt>
+<td>A start element with the specified <i>name</i>.
+<tr>
+<th style='font-weight:normal;border-bottom:solid black 1'><tt>"<i>text</i></tt>
+<td>Character content with the specified <i>text</i>.
+<tr>
+<th style='font-weight:normal;border-bottom:solid black 1'><tt>)<i>name</i></tt>
+<td>An end element with the specified <i>name</i>.
+</table>
+<p>
+When processed with the <code>Script</code> filter, the following 
+document:
+<pre class='document'>
+&lt;script type='text/x-nekoscript'&gt;
+(h1
+"Header
+)h1
+&lt;/script&gt;
+</pre>
+<p>
+is equivalent to:
+<pre class='document'>
+&lt;H1&gt;Header&lt;/H1&gt;
+</pre>
+<p>
+as seen by the document handler registered with the parser.
+<p>
+The <code>Script</code> class implements a <code>main</code>
+method so that it can be run as a program. Running the program
+produces the following output: [<strong>Note:</strong> The command
+should be contiguous. It is split among separate lines in this 
+example to make it easier to read.]
+<pre class='cmdline'>
+<span class='cmdline-prompt'>&gt;</span> <span class='cmdline-cmd'>java -cp nekohtml.jar;nekohtmlSamples.jar;lib/xercesMinimal.jar 
+       sample.Script data/test33.html</span>
+&lt;H1&gt;Header&lt;/H1&gt;
+</pre>
+
+<div class='copyright'>
+(C) Copyright 2002-2005, Andy Clark. All rights reserved.
+</div>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/doc/html/index.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/html/index.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/html/index.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,105 @@
+<title>NekoHTML</title>
+<link rel=stylesheet type=text/css href=../style.css>
+
+<h1>CyberNeko HTML Parser <sub>0.9.5</sub></h1>
+<div class='navbar'>
+[<a href='../index.html'>Home</a>]
+[
+Top
+|
+<a href='usage.html'>Usage</a>
+|
+<a href='settings.html'>Settings</a>
+|
+<a href='filters.html'>Filters</a>
+|
+<a href='javadoc/index.html'>JavaDoc</a>
+|
+<a href='faq.html'>FAQ</a>
+|
+<a href='software.html'>Software</a>
+|
+<a href='changes.html'>Changes</a>
+]
+</div>
+
+<h2>About</h2>
+<p>
+NekoHTML is a simple <a href='http://www.w3.org/TR/html4/'>HTML</a>
+scanner and tag balancer that enables application programmers to 
+parse HTML documents and access the information using standard XML 
+interfaces. The parser can scan HTML files and "fix up" many common 
+mistakes that human (and computer) authors make in writing HTML 
+documents. NekoHTML adds missing parent elements; automatically 
+closes elements with optional end tags; and can handle mismatched 
+inline element tags.
+<p>
+NekoHTML is written using the 
+<a href='http://xml.apache.org/xerces2-j/xni.html'>Xerces 
+Native Interface</a> (XNI) that is the foundation of the 
+<a href='http://xml.apache.org/xerces2-j/'>Xerces2</a> 
+implementation. This enables you to use the NekoHTML parser
+with existing XNI tools without modification or rewriting code.
+
+<h2>License Agreement</h2>
+<p>
+The NekoHTML parser is distributed under an Apache-style license
+and is currently being considered as a sub-project of the Apache 
+Xerces project. If accepted, the license and development of NekoHTML
+will move to Apache. However, this will not affect the ability of
+the parser to be freely used as specified by the current license.
+<p>
+For specific license details, please refer to the 
+<a href='../../LICENSE'>LICENSE</a> file.
+
+<h2>Download</h2>
+<p>
+The NekoHTML parser includes complete Java source code and 
+documentation. You can download the latest version from the 
+following location:
+<ul>
+<li>NekoHTML 
+    [<a href='http://www.apache.org/~andyc/neko/nekohtml-latest.zip'>zip</a>]
+    [<a href='http://www.apache.org/~andyc/neko/nekohtml-latest.tar.gz'>tgz</a>] 
+</ul>
+
+<h2>Requirements and Limitations</h2>
+<p>
+This version of NekoHTML requires the following:
+<ul>
+<li><a href='http://java.sun.com/products/'>Java 1.1</a> (or 
+    higher)
+<li><a href='http://xml.apache.org/xerces2-j/download.cgi'>Xerces 2.0.0</a>
+    (or higher) 
+    [<a href='http://archive.apache.org/dist/xml/xerces-j/'>archive</a>]
+</ul>
+<p>
+This version has the following limitations:
+<ul>
+<li>There are HTML documents for which NekoHTML cannot properly 
+    generate a well-formed XML document event stream. For example,
+    documents with multiple &lt;html&gt; tags are inherently 
+    ill-formed because XML documents may only have a single root
+    element.
+<li>Code added to the core DOM implementation in Xerces-J 2.0.1
+    introduced a bug in the HTML DOM implementation based on it.
+    The bug causes the element nodes in the resultant HTML document 
+    object to be of type <code>org.apache.xerces.dom.ElementNSImpl</code> 
+    instead of the appropriate HTML DOM element objects. The problem
+    affects NekoHTML users who use the parser with Xerces-J 2.0.1 and
+    anyone using the HTML DOM implementation in Xerces-J 2.0.1.    
+<li>There are no other known major limitations with this release. 
+    However, additional work can always be done to improve 
+    performance, fix bugs, and add functionality.
+</ul>
+<p>
+For a more complete list of items to be done, please refer to
+the <a href='../../TODO_html'>Todo Items</a>.
+
+<h2>Contact Information</h2>
+<p>
+Andy Clark &lt;<a href='mailto:andyc at apache.org'>andyc at apache.org</a>&gt;
+
+<div class='copyright'>
+(C) Copyright 2002-2005, Andy Clark. All rights reserved.
+</div>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/doc/html/settings.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/html/settings.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/html/settings.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,412 @@
+<title>NekoHTML | Parser Settings</title>
+<link rel=stylesheet type=text/css href=../style.css>
+<style type='text/css'>
+.see {
+ margin: 0; margin-top: 0.25em;
+ font-size: 0.8em; 
+}
+</style>
+
+<h1>Parser Settings</h1>
+<div class='navbar'>
+[<a href='../index.html'>Home</a>]
+[
+<a href='index.html'>Top</a>
+|
+<a href='usage.html'>Usage</a>
+|
+Settings
+|
+<a href='filters.html'>Filters</a>
+|
+<a href='javadoc/index.html'>JavaDoc</a>
+|
+<a href='faq.html'>FAQ</a>
+|
+<a href='software.html'>Software</a>
+|
+<a href='changes.html'>Changes</a>
+]
+</div>
+
+<h2>Configuring Parser</h2>
+<p>
+The application can set a variety of NekoHTML settings to more
+precisely control the behavior of the parser. These settings
+can be set directly on the <code>HTMLConfiguration</code> class
+or on the supplied parser classes by calling the
+<code>setFeature</code> and <code>setProperty</code> methods.
+For example:
+<pre class='code'>
+<span class='code-comment'>// settings on HTMLConfiguration</span>
+org<span class='code-punct'>.</span>apache<span class='code-punct'>.</span>xerces<span class='code-punct'>.</span>xni<span class='code-punct'>.</span>parser<span class='code-punct'>.</span>XMLParserConfiguration config <span class='code-punct'>=</span>
+  <span class='code-keyword'>new</span> org<span class='code-punct'>.</span>cyberneko<span class='code-punct'>.</span>html<span class='code-punct'>.</span>HTMLConfiguration<span class='code-punct'>();</span>
+config<span class='code-punct'>.</span>setFeature<span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/features/augmentations"</span><span class='code-punct'>,</span> <span class='code-keyword'>true</span><span class='code-punct'>);</span>
+config<span class='code-punct'>.</span>setProperty<span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/properties/names/elems"</span><span class='code-punct'>,</span> <span class='code-string'>"lower"</span><span class='code-punct'>);</span>
+
+<span class='code-comment'>// settings on DOMParser</span>
+org<span class='code-punct'>.</span>cyberneko<span class='code-punct'>.</span>html<span class='code-punct'>.</span>parsers<span class='code-punct'>.</span>DOMParser parser <span class='code-punct'>=</span> 
+  <span class='code-keyword'>new</span> org<span class='code-punct'>.</span>cyberneko<span class='code-punct'>.</span>html<span class='code-punct'>.</span>parsers<span class='code-punct'>.</span>DOMParser<span class='code-punct'>();</span>
+parser<span class='code-punct'>.</span>setFeature<span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/features/augmentations"</span><span class='code-punct'>,</span> <span class='code-keyword'>true</span><span class='code-punct'>);</span>
+parser<span class='code-punct'>.</span>setProperty<span class='code-punct'>(</span><span class='code-string'>"http://cyberneko.org/html/properties/names/elems"</span><span class='code-punct'>,</span> <span class='code-string'>"lower"</span><span class='code-punct'>);</span>
+</pre>
+
+<h2>Features</h2>
+The NekoHTML parser supports the following features:
+<table cellpadding='4' cellspacing='0'>
+ <tr><th>Feature Id / Description<th>Default
+ <tr>
+  <td>
+   <a name='namespaces'></a>
+   <span class='id'>http://xml.org/sax/features/namespaces</span>
+   <br>
+   Specifies if the NekoHTML parser should perform namespace
+   processing. If enabled, namespace binding attributes are
+   processed and elements and attributes are bound to the defined
+   namespaces.
+   <p class='see'>
+   See:&nbsp;<a href='#override-namespaces'>http://cyberneko.org/html/features/override-namespaces</a>
+  <td align='center'>true
+ <tr>
+  <td>
+   <a name='balance-tags'></a>
+   <span class='id'>http://cyberneko.org/html/features/balance-tags</span>
+   <br>
+   Specifies if the NekoHTML parser should attempt to balance
+   the tags in the parsed document. Balancing the tags fixes up many
+   common mistakes by adding missing parent elements, automatically
+   closing elements with optional end tags, and correcting unbalanced
+   inline element tags. In order to process HTML documents as XML, this
+   feature should <strong>not</strong> be turned off. This feature is
+   provided as a performance enhancement for applications that only
+   care about the appearance of specific elements, attributes, and/or
+   content regardless of the document's ill-formed structure.
+  <td align='center'>true
+ <tr>
+  <td>
+   <a name='override-doctype'></a>
+   <span class='id'>http://cyberneko.org/html/features/override-doctype</span>
+   <br>
+   Specifies whether the NekoHTML parser should override the public
+   and system identifier values specified in the document type declaration.
+   <p class='see'>
+   See:&nbsp;<a href='#doctype-pubid'>http://cyberneko.org/html/properties/doctype/pubid</a>
+   <br>
+   See:&nbsp;<a href='#doctype-sysid'>http://cyberneko.org/html/properties/doctype/sysid</a>
+  <td align='center'>false
+  <tr>
+   <td>
+    <a name='insert-doctype'></a>
+    <span class='id'>http://cyberneko.org/html/features/insert-doctype</span>
+    <br>
+    Specifies whether the NekoHTML parser should insert a document type
+    declaration into the document handler callbacks. The values for the
+    public and system identifiers are taken from the sysid and pubid
+    properties. Therefore, those properties should be set if this
+    feature is turned on. Also, setting this feature to <code>true</code> 
+    will cause the parser to ignore any document type declaration that 
+    appears in the document.
+    <p class='see'>
+    See:&nbsp;<a href='#doctype-pubid'>http://cyberneko.org/html/properties/doctype/pubid</a>
+    <br>
+    See:&nbsp;<a href='#doctype-sysid'>http://cyberneko.org/html/properties/doctype/sysid</a>
+   <td align='center'>false
+ <tr>
+  <td>
+   <a name='override-namespaces'></a>
+   <span class='id'>http://cyberneko.org/html/features/override-namespaces</span>
+   <br>
+   Specifies whether the NekoHTML parser should override the namespace 
+   URI bound to HTML elements and attributes.
+   <p class='see'>
+   See:&nbsp;<a href='#namespaces-uri'>http://cyberneko.org/html/properties/namespaces-uri</a>
+  <td align='center'>false
+  <tr>
+   <td>
+    <a name='insert-namespaces'></a>
+    <span class='id'>http://cyberneko.org/html/features/insert-namespaces</span>
+    <br>
+    Specifies whether the NekoHTML parser should insert namespace URI 
+    bindings to HTML elements and attributes. The value for the
+    namespace URI is taken from the namespaces property. Therefore, 
+    that property should be set if this feature is turned on. 
+    <p class='see'>
+    See:&nbsp;<a href='#namespaces-uri'>http://cyberneko.org/html/properties/namespaces-uri</a>
+   <td align='center'>false
+ <tr>
+  <td>
+   <a name='ignore-outside-content'></a>
+   <span class='id'>http://cyberneko.org/html/features/balance-tags/ignore-outside-content</span>
+   <br>
+   Specifies if the NekoHTML parser should ignore content after the end 
+   of the document root element. If this feature is set to true, all 
+   elements and character content appearing outside of the document body 
+   is consumed. If set to false, the end elements for the &lt;body&gt;
+   and &lt;html&gt; are ignored, allowing content appearing outside of
+   the document to be parsed and communicated to the application.
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='document-fragment'></a>
+   <span class='id'>http://cyberneko.org/html/features/balance-tags/document-fragment</span>
+   <br>
+   Specifies if the tag balancer should operate as if a fragment
+   of HTML is being parsed. With this feature set, the tag balancer
+   will not attempt to insert a missing body elements around content
+   and markup. However, proper parents for elements contained within
+   the &lt;body&gt; element will still be inserted. This feature should 
+   <strong>not</strong> be used when using the <code>DOMParser</code>
+   class. In order to parse a DOM <code>DocumentFragment</code>, use the
+   <code>DOMFragmentParser</code> class.
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='cdata-sections'></a>
+   <span class='id'>http://cyberneko.org/html/features/scanner/cdata-sections</span>
+   <br>
+   Specifies whether CDATA sections are reported as character content.
+   If set to <code>false</code>, CDATA sections are reported as comments. 
+   When reported as comments, the comment text is prefixed with "[CDATA[" 
+   and end with "]]". This prefix and suffix is <em>not</em>
+   included when reported as character content.
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='notify-char-refs'></a>
+   <span class='id'>http://apache.org/xml/features/scanner/notify-char-refs</span>
+   <br>
+   Specifies whether character entity references (e.g. &amp;#32;, &amp;#x20;, 
+   etc) should be reported to the registered document handler. The name of 
+   the entity reported will contain the leading pound sign and optional 'x' 
+   character. For example, the name of the character entity reference
+   <code>&amp;#x20;</code> will be reported as "#x20".
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='notify-builtin-xml-refs'></a>
+   <span class='id'>http://apache.org/xml/features/scanner/notify-builtin-refs</span>
+   <br>
+   Specifies whether the XML built-in entity references (e.g. &amp;amp;, 
+   &amp;lt;, etc) should be reported to the registered document handler.
+   This only applies to the five pre-defined XML general entities --
+   specifically, "amp", "lt", "gt", "quot", and "apos". This is done for
+   compatibility with the Xerces feature.
+   To be notified of the built-in entity references in HTML, set the 
+   <code>http://cyberneko.org/html/features/scanner/notify-builtin-refs</code> 
+   feature to <code>true</code>.
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='notify-builtin-html-refs'></a>
+   <span class='id'>http://cyberneko.org/html/features/scanner/notify-builtin-refs</span>
+   <br>
+   Specifies whether the HTML built-in entity references (e.g. &amp;nobr;, 
+   &amp;copy;, etc) should be reported to the registered document
+   handler. This <em>includes</em> the five pre-defined XML general 
+   entities.
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='fix-mswindows-refs'></a>
+   <span class='id'>http://cyberneko.org/html/features/scanner/fix-mswindows-refs</span>
+   <br>
+   Specifies whether to fix character entity references for Microsoft 
+   Windows&reg; characters as described at
+   <a href='http://www.cs.tut.fi/~jkorpela/www/windows-chars.html'>http://www.cs.tut.fi/~jkorpela/www/windows-chars.html</a>.
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='ignore-specified-charset'></a>
+   <span class='id'>http://cyberneko.org/html/features/scanner/ignore-specified-charset</span>
+   <br>
+   Specifies whether to ignore the character encoding specified within the 
+   &lt;meta http-equiv='Content-Type' content='text/html;charset=...'&gt; 
+   tag. By default, NekoHTML checks this tag for a charset and changes the 
+   character encoding of the scanning reader object. Setting this feature 
+   to <code>true</code> allows the application to override this behavior.
+   <p class='see'>
+   See: <a href='#default-encoding'>http://cyberneko.org/html/properties/default-encoding</a>
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='script-strip-comment-delims'></a>
+   <span class='id'>http://cyberneko.org/html/features/scanner/script/strip-comment-delims</span>
+   <br>
+   Specifies whether the scanner should strip HTML comment delimiters
+   (i.e. "&lt;!--" and "--&gt;") from &lt;script&gt; element content.
+   <p class='see'>
+   See: <a href='#style-strip-comment-delims'>http://cyberneko.org/html/features/scanner/style/strip-comment-delims</a>
+   <br>
+   See: <a href='#script-strip-cdata-delims'>http://cyberneko.org/html/features/scanner/script/strip-cdata-delims</a>
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='script-strip-cdata-delims'></a>
+   <span class='id'>http://cyberneko.org/html/features/scanner/script/strip-cdata-delims</span>
+   <br>
+   Specifies whether the scanner should strip XHTML CDATA delimiters
+   (i.e. "&lt;![CDATA[" and "]]&gt;") from &lt;script&gt; element content.
+   <p class='see'>
+   See: <a href='#style-strip-cdata-delims'>http://cyberneko.org/html/features/scanner/style/strip-cdata-delims</a>
+   <br>
+   See: <a href='#script-strip-comment-delims'>http://cyberneko.org/html/features/scanner/script/strip-comment-delims</a>
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='style-strip-comment-delims'></a>
+   <span class='id'>http://cyberneko.org/html/features/scanner/style/strip-comment-delims</span>
+   <br>
+   Specifies whether the scanner should strip HTML comment delimiters
+   (i.e. "&lt;!--" and "--&gt;") from &lt;style&gt; element content.
+   <p class='see'>
+   See: <a href='#script-strip-comment-delims'>http://cyberneko.org/html/features/scanner/script/strip-comment-delims</a>
+   <br>
+   See: <a href='#style-strip-cdata-delims'>http://cyberneko.org/html/features/scanner/style/strip-cdata-delims</a>
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='style-strip-cdata-delims'></a>
+   <span class='id'>http://cyberneko.org/html/features/scanner/style/strip-cdata-delims</span>
+   <br>
+   Specifies whether the scanner should strip XHTML CDATA delimiters
+   (i.e. "&lt;![CDATA[" and "]]&gt;") from &lt;style&gt; element content.
+   <p class='see'>
+   See: <a href='#script-strip-cdata-delims'>http://cyberneko.org/html/features/scanner/script/strip-cdata-delims</a>
+   <br>
+   See: <a href='#style-strip-comment-delims'>http://cyberneko.org/html/features/scanner/style/strip-comment-delims</a>
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='augmentations'></a>
+   <span class='id'>http://cyberneko.org/html/features/augmentations</span>
+   <br>
+   Specifies whether infoset items that correspond to the
+   HTML events are included in the parsing pipeline. If
+   included, the augmented item will implement the
+   <code>HTMLEventInfo</code> interface found in the
+   <code>org.cyberneko.html</code> package. The augmentations 
+   can be queried in XNI by calling the <code>getItem</code> 
+   method with the key 
+   "http://cyberneko.org/html/features/augmentations".
+   Currently, the HTML event info augmentation can report event
+   character boundaries and whether the event is synthesized.
+  <td align='center'>false
+ <tr>
+  <td>
+   <a name='report-errors'></a>
+   <span class='id'>http://cyberneko.org/html/features/report-errors</span>
+   <br>
+   Specifies whether errors should be reported to the registered error
+   handler. Since HTML applications are supposed to permit the
+   liberal use (and abuse) of HTML documents, errors should
+   normally be handled silently. However, if the application wants
+   to know about errors in the parsed HTML document, this feature
+   can be set to <code>true</code>.
+  <td align='center'>false
+</table>
+
+<h2>Properties</h2>
+The NekoHTML parser supports the following properties:
+<table cellpadding='4' cellspacing='0'>
+ <tr><th>Property Id / Description<th>Values<th>Default
+ <tr>
+  <td>
+   <a name='filters'></a>
+   <span class='id'>http://cyberneko.org/html/properties/filters</span>
+   <br>
+   This property allows applications to append custom document
+   processing components to the end of the default NekoHTML parser 
+   pipeline. The value of this property must be an array of type
+   <code>org.apache.xerces.xni.parser.XMLDocumentFilter</code>
+   and no value of this array is allowed to be null. The document 
+   filters are appended to the parser pipeline in array order.
+   Please refer to the <a href='filters.html'>filters</a>
+   documentation for more information.
+  <td align='center'><nobr>XMLDocumentFilter[]</nobr>
+  <td align='center'>null
+ <tr>
+  <td>
+   <a name='default-encoding'></a>
+   <span class='id'>http://cyberneko.org/html/properties/default-encoding</span>
+   <br>
+   Sets the default encoding the NekoHTML scanner should use
+   when parsing documents. In the absence of an
+   <code>http-equiv</code> directive in the source document,
+   this setting is important because the parser does not
+   have any support to <i>auto-detect</i> the encoding.
+   <p class='see'>
+   See:&nbsp;<a href='#ignore-specified-charset'>http://cyberneko.org/html/features/scanner/ignore-specified-charset</a>
+  <td align='center'>
+   <a href='http://www.iana.org/assignments/character-sets'>IANA</a> 
+   encoding names
+  <td align='center'><nobr>"Windows-1252"</nobr>
+ <tr>
+  <td>
+   <a name='elem-names'></a>
+   <span class='id'>http://cyberneko.org/html/properties/names/elems</span>
+   <br>
+   Specifies how the NekoHTML components should modify recognized
+   element names. Names can be converted to upper-case, converted
+   to lower-case, or left as-is. The value of "match" specifies
+   that element names are to be left as-is but the end tag name will
+   be modified to match the start tag name. This is required to
+   ensure that the parser generates a well-formed XML document.
+  <td align='center'>"upper"<br>"lower"<br>"match"
+  <td align='center'>"upper"
+ <tr>
+  <td>
+   <a name='attr-names'></a>
+   <span class='id'>http://cyberneko.org/html/properties/names/attrs</span>
+   <br>
+   Specifies how the NekoHTML components should modify attribute names
+   of recognized elements. Names can be converted to upper-case, 
+   converted to lower-case, or left as-is.
+  <td align='center'>"upper"<br>"lower"<br><nobr>"no-change"</nobr>
+  <td align='center'>"lower"
+ <tr>
+  <td>
+   <a name='doctype-pubid'></a>
+   <span class='id'>http://cyberneko.org/html/properties/doctype/pubid</span>
+   <br>
+   Specifies the document type declaration public identifier if the
+   <code>http://cyberneko.org/html/features/override-doctype</code>
+   feature is set to <code>true</code>. The default value is the HTML
+   4.01 transitional public identifier, "-//W3C//DTD HTML 4.01 Transitional//EN".
+   <p class='see'>
+   See:&nbsp;<a href='#overrid-doctype'>http://cyberneko.org/html/features/override-doctype</a>
+  <td align='center'>String
+  <td align='center'>HTML 4.01 transitional public identifier
+ <tr>
+  <td>
+   <a name='doctype-sysid'></a>
+   <span class='id'>http://cyberneko.org/html/properties/doctype/sysid</span>
+   <br>
+   Specifies the document type declaration system identifier if the
+   <code>http://cyberneko.org/html/features/override-doctype</code>
+   feature is set to <code>true</code>. The default value is the HTML
+   4.01 transitional system identifier, "http://www.w3.org/TR/html4/loose.dtd".
+   <p class='see'>
+   See:&nbsp;<a href='#overrid-doctype'>http://cyberneko.org/html/features/override-doctype</a>
+  <td align='center'>String
+  <td align='center'>HTML 4.01 transitional system identifier
+ <tr>
+  <td>
+   <a name='namespaces-uri'></a>
+   <span class='id'>http://cyberneko.org/html/properties/namespaces-uri</span>
+   <br>
+   Specifies the namespace binding URI if the
+   <code>http://cyberneko.org/html/features/override-namespaces</code>
+   feature is set to <code>true</code>. The default value is the XHTML
+   1.0 namespace, "http://www.w3.org/1999/xhtml". This property does
+   <em>not</em> affect the case of element and attributes names and
+   does <em>not</em> ensure that the output of the NekoHTML parser is
+   valid according to the XHTML specification.
+   <p class='see'>
+   See:&nbsp;<a href='#override-namespaces'>http://cyberneko.org/html/features/override-namespaces</a>
+  <td align='center'>String
+  <td align='center'>XHTML 1.0 namespaces URI
+</table>
+
+<div class='copyright'>
+(C) Copyright 2002-2005, Andy Clark. All rights reserved.
+</div>

Added: branches/nekohtml/upstream/0.9.5/doc/html/software.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/html/software.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/html/software.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,209 @@
+<title>NekoHTML | Software</title>
+<meta http-equiv='content-type' content='text/html;charset=utf-8'>
+<link rel=stylesheet type=text/css href=../style.css>
+<style type='text/css'>
+.desc { font-style: italic }
+.note { font-style: italic }
+</style>
+
+<h1>Software Projects</h1>
+<div class='navbar'>
+[<a href='../index.html'>Home</a>]
+[
+<a href='index.html'>Top</a>
+|
+<a href='usage.html'>Usage</a>
+|
+<a href='settings.html'>Settings</a>
+|
+<a href='filters.html'>Filters</a>
+|
+<a href='javadoc/index.html'>JavaDoc</a>
+|
+<a href='faq.html'>FAQ</a>
+|
+Software
+|
+<a href='changes.html'>Changes</a>
+]
+</div>
+
+<h2>Available Projects</h2>
+<p>
+The world is full of HTML pages and there's a good chance that, at some
+time, your software project will need to read and process these documents.
+Perhaps NekoHTML can help! And, depending on what you need to do, there
+may already be a solution to your problem. This page lists a number of 
+projects that use the NekoHTML parser.
+<p>
+Here are some software projects you may find useful, presented in 
+alphabetical order:
+<table cellpadding='4' cellspacing='0'>
+<tr>
+<th rowspan='2'>Celware WebRecorder
+<td class='desc'>
+Celware WebRecorder rapidly and easily exposes information on existing 
+Web sites to other IT systems via a programmatic service, using the same 
+interface as a standard Web browser. If you can see the information on 
+the Web, Celware WebRecorder can learn by example and create a service 
+to include that information in your IT environment—without making any 
+changes to the Web server!
+<tr>
+<td>
+<a href='http://www.celcorp.com/webrecorder.html'>http://www.celcorp.com/webrecorder.html</a>
+<tr>
+<th rowspan='2'>CVSGrab
+<td class='desc'>
+CVSGrab allows you to checkout files from a public CVS repository even 
+if you are behind a corporate firewall that blocks access to the pserver. 
+It provides read-only access to public CVS repositories through the
+ViewCVS web interface.
+<tr>
+<td>
+<a href='http://cvsgrab.sourceforge.net/'>http://cvsgrab.sourceforge.net/</a>
+<tr>
+<th rowspan='2'>Egothor
+<td class='desc'>
+Egothor is an open source, high-performance, full-featured text search 
+engine written entirely in Java. It can be configured as a standalone 
+engine, metasearcher, peer-to-peer HUB, and, moreover, it can be used 
+as a library for an application that needs full-text search.
+<!--
+<br><br>
+<font size='-1'>Note: NekoHTML is used in Egothor starting with RC-1.2.4 
+which is available from 16 Oct 2003.</font>
+-->
+<tr>
+<td>
+<a href='http://www.egothor.org/'>http://www.egothor.org/</a>
+<tr>
+<th rowspan='2'>HtmlUnit
+<td class='desc'>
+HtmlUnit is a java unit testing framework for testing web based applications. 
+HtmlUnit models the returned document so that you deal with pages and form 
+and tables.
+<tr>
+<td>
+<a href='http://htmlunit.sourceforge.net/'>http://htmlunit.sourceforge.net/</a>
+<tr>
+<th rowspan='2'>HttpUnit
+<td class='desc'>
+HttpUnit is a free, open source Java API for accessing web sites 
+without a browser, and is ideally suited for automated unit testing of 
+web sites when combined with a Java unit test framework such as JUnit.
+<tr>
+<td>
+<a href='http://httpunit.sourceforge.net/'>http://httpunit.sourceforge.net/</a>
+<tr>
+<th rowspan='2'>Jakarta Jelly
+<td class='desc'>
+Jelly is a Java and XML based scripting and processing engine for turning 
+XML into executable code. Jelly can be used as a more flexible and powerful 
+front end to Ant such as in the Maven project, as a testing framework such 
+as JellyUnit, in an intergration or workflow system such as werkflow or as 
+a page templating system inside engines like Cocoon.
+<tr>
+<td>
+<a href='http://jakarta.apache.org/commons/sandbox/jelly/'>http://jakarta.apache.org/commons/sandbox/jelly/</a>
+<tr>
+<!-- JPluck removed NekoHTML as of version 0.9. Oh well...
+<th rowspan='2'>JPluck
+<td class='desc'>
+JPluck is a Java-based toolkit for creating Plucker documents. JPluck 
+is the perfect companion to the Plucker Viewer for the Palm OS platform. 
+Together they provide a free solution for offline reading of web sites 
+on Palm handhelds.
+<tr>
+<td>
+<a href='http://jpluck.sourceforge.net/'>http://jpluck.sourceforge.net/</a>
+-->
+<tr>
+<th rowspan='2'>Jivan</th>
+<td class='desc'>
+Jivan is a new Java web presentation technology that aids in the
+programming of dynamic web pages by separating program code from
+presentation layout. It enables you to use the W3C DOM API to 
+push content into your HTML template. By directly copying 
+unchanged sections of the source documents during page 
+serialization, Jivan provides great performance to the web 
+developer.
+<tr>
+<td>
+<a href='http://www.jivan.net/'>http://www.jivan.net/</a>
+<tr>
+<th rowspan='2'>jWebUnit
+<td class='desc'>
+jWebUnit is a Java framework that facilitates creation of acceptance tests 
+for web applications. It provides a high-level API for navigating a 
+web application combined with a set of assertions to verify the application's 
+correctness. This includes navigation via links, form entry and submission, 
+validation of table contents, and other typical business web application 
+features.
+<tr>
+<td>
+<a href='http://jwebunit.sourceforge.net/'>http://jwebunit.sourceforge.net/</a>
+<tr>
+<th rowspan='2'>LingPipe
+<td class='desc'>
+LingPipe is a suite of Java tools designed to perform linguistic analysis on 
+natural language data. While fast and robust enough to be used in a commercial 
+system, LingPipe's flexibility and included source make it appropriate for 
+research use. Tools include a statistical named-entity detector, a heuristic 
+sentence boundary detector, and a heuristic within-document coreference 
+resolution engine. Named entity extraction models are included for English 
+news and English genomics domains, and can be trained for other languages 
+and genres.
+<tr>
+<td>
+<a href='http://www.alias-i.com/lingpipe/'>http://www.alias-i.com/lingpipe/</a>
+<tr>
+<th rowspan='2'>Mockrunner
+<td class='desc'>
+Mockrunner is a lightweight framework for unit testing applications 
+in the J2EE environment. It supports Struts  actions and forms, 
+servlets, filters and tag classes. Furthermore it includes a JDBC and 
+a JMS test framework. The JDBC test framework can be used standalone 
+or in conjunction with MockEJB  to test EJB based applications.
+<tr>
+<td>
+<a href='http://mockrunner.sourceforge.net/'>http://mockrunner.sourceforge.net/</a>
+<tr>
+<th rowspan='2'>Pasta
+<td>
+Pastaは、ウェブ開発のためのフレームワークです。 このフレームワークは主に、
+独自のテンプレート言語「CLS」と、バックグラウンド (リレーショナルデータベースなど) 
+とテンプレート言語を接続するための「メッセージキュー」から構成されています。
+...
+<br>
+<strong>Note:</strong>
+<span class='desc'>The documentation for Pasta is only available in 
+Japanese at this time.</span>
+<tr>
+<td>
+<a href='http://www.port4.info/pasta/'>http://www.port4.info/pasta/</a>
+<tr>
+<th rowspan='2'>X-Smiles
+<td class='desc'>
+X-Smiles is  a Java based XML browser. It is intended for both desktop 
+use and embedded network devices and to support multimedia services.
+<tr>
+<td>
+<a href='http://www.x-smiles.org/'>http://www.x-smiles.org/</a>
+</table>
+<p class='note'>
+<strong>Note:</strong> The author of NekoHTML does not officially endorse, 
+recommend, or support any of the above software &mdash; they are merely 
+presented for the benefit of the user. All questions and comments should 
+be directed to the respective project owners. Mail sent to the author of
+NekoHTML regarding these projects will be ignored.
+
+<h2>Adding Your Project</h2>
+<p>
+If your project incorporates NekoHTML in some way and you think
+it would be useful to other users, please 
+<a href='mailto:andyc at apache.org'>contact me</a> and I will add
+it to this list.
+
+<div class='copyright'>
+(C) Copyright 2002-2005, Andy Clark. All rights reserved.
+</div>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/doc/html/usage.html
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/html/usage.html	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/html/usage.html	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,212 @@
+<title>NekoHTML | Usage Instructions</title>
+<link rel=stylesheet type=text/css href=../style.css>
+<style type='text/css'>
+.note {
+  margin-left: 2em; margin-right: 2em;
+  padding: .25em;
+  border: 1px solid black;
+  background-color: #fdd;
+}
+</style>
+
+<h1>Usage Instructions</h1>
+<div class='navbar'>
+[<a href='../index.html'>Home</a>]
+[
+<a href='index.html'>Top</a>
+|
+Usage
+|
+<a href='settings.html'>Settings</a>
+|
+<a href='filters.html'>Filters</a>
+|
+<a href='javadoc/index.html'>JavaDoc</a>
+|
+<a href='faq.html'>FAQ</a>
+|
+<a href='software.html'>Software</a>
+|
+<a href='changes.html'>Changes</a>
+]
+</div>
+
+<a name='transparent'></a>
+<h2>Transparent Parser Construction</h2>
+<p>
+NekoHTML is designed to be as lightweight and simple to use as
+possible. Using the Xerces 2.0.0 parser as a foundation, NekoHTML 
+can be transparent for applications that instantiate parser objects 
+with the <a href='http://java.sun.com/xml/jaxp/index.html'>Java 
+API for XML Processing</a> (JAXP). Just put the appropriate NekoHTML 
+jar files in the classpath <em>before</em> the Xerces jar files. For 
+example (on Windows): [<strong>Note:</strong> The classpath should be 
+contiguous. It is split among separate lines in this example to make 
+it easier to read.]
+<pre class='cmdline'>
+<span class='cmdline-prompt'>&gt;</span> <span class='cmdline-cmd'>java -cp nekohtml.jar;nekohtmlXni.jar;
+           xmlParserAPIs.jar;xercesImpl.jar;xercesSamples.jar 
+       sax.Counter doc/html/index.html</span>
+doc/html/index.html: 10 ms (49 elems, 21 attrs, 0 spaces, 2652 chars)
+</pre>
+<p>
+The Xerces2 implementation dynamically instantiates the default
+parser configuration to construct parser objects via the Jar
+service facility. The Jar file <code>nekohtmlXni.jar</code> 
+contains a <code>META-INF/services</code> file that is read by
+Xerces2 implementation for this purpose. Therefore, as long as
+this Jar file appears <em>before</em> the Xerces2 Jar files,
+the NekoHTML parser configuration will be used instead of the
+Xerces2 standard configuration.
+<p>
+Using this method will cause <em>every</em> Xerces2 parser
+constructed (using standard APIs) in the same JVM to use the
+HTML parser configuration. If this is not what you want to do,
+you should create the NekoHTML parser explicitly even though 
+you parse and access the document contents using standard XML 
+APIs. The following sections describe this method in more
+detail.
+<p class='note'>
+<strong>Note:</strong>
+The nekohtmlXni.jar file is no longer built by default. This
+change was made to alleviate confusion about which Jar files
+to add to the JVM classpath. If you still want to use this
+Jar file, you must build it using the "jar-xni" Ant task.
+</p>
+
+<a name='convenience'></a>
+<h2>Convenience Parser Classes</h2>
+<p>
+If you don't want to override the default Xerces2 parser 
+instantiation mechanism, separate DOM and SAX parser classes are 
+included in the <code>org.cyberneko.html.parsers</code> package 
+for convenience. Both parsers use the <code>HTMLConfiguration</code> 
+class to be able to parse HTML documents. In addition, the DOM 
+parser uses the Xerces HTML DOM implementation so that the
+returned documents are of type 
+<code>org.w3c.dom.html.HTMLDocument</code>. The following example 
+shows how to use the NekoHTML <code>DOMParser</code> directly:
+<pre class='code'>
+<span class='code-keyword'>package</span> sample<span class='code-punct'>;</span>
+
+<span class='code-keyword'>import</span> org.cyberneko.html.parsers.DOMParser<span class='code-punct'>;</span>
+<span class='code-keyword'>import</span> org.w3c.dom.Document<span class='code-punct'>;</span>
+<span class='code-keyword'>import</span> org.w3c.dom.Node<span class='code-punct'>;</span>
+
+<span class='code-keyword'>public class</span> TestHTMLDOM <span class='code-punct'>{</span>
+    <span class='code-keyword'>public static void</span> <span class='code-func'>main</span><span class='code-punct'>(</span>String<span class='code-punct'>[]</span> argv<span class='code-punct'>)</span> <span class='code-keyword'>throws</span> Exception <span class='code-punct'>{</span>
+        DOMParser parser <span class='code-punct'>=</span> <span class='code-keyword'>new</span> DOMParser<span class='code-punct'>();</span>
+        <span class='code-keyword'>for</span> <span class='code-punct'>(</span><span class='code-keyword'>int</span> i <span class='code-punct'>=</span> 0<span class='code-punct'>;</span> i <span class='code-punct'><</span> argv<span class='code-punct'>.</span>length<span class='code-punct'>;</span> i<span class='code-punct'>++) {</span>
+            parser<span class='code-punct'>.</span><span class='code-func'>parse</span><span class='code-punct'>(</span>argv<span class='code-punct'>[</span>i<span class='code-punct'>]);</span>
+            <span class='code-func'>print</span><span class='code-punct'>(</span>parser<span class='code-punct'>.</span><span class='code-func'>getDocument</span><span class='code-punct'>(),</span> <span class='code-string'>""</span><span class='code-punct'>);</span>
+        <span class='code-punct'>}</span>
+    <span class='code-punct'>}</span>
+    <span class='code-keyword'>public static void</span> <span class='code-func'>print</span><span class='code-punct'>(</span>Node node<span class='code-punct'>,</span> String indent<span class='code-punct'>) {</span>
+        System<span class='code-punct'>.</span>out<span class='code-punct'>.</span><span class='code-func'>println</span><span class='code-punct'>(</span>indent<span class='code-punct'>+</span>node<span class='code-punct'>.</span><span class='code-func'>getClass</span><span class='code-punct'>().</span><span class='code-func'>getName</span><span class='code-punct'>());</span>
+        Node child <span class='code-punct'>=</span> node<span class='code-punct'>.</span><span class='code-func'>getFirstChild</span><span class='code-punct'>();</span>
+        <span class='code-keyword'>while</span> <span class='code-punct'>(</span>child <span class='code-punct'>!=</span> <span class='code-keyword'>null</span><span class='code-punct'>) {</span>
+            print<span class='code-punct'>(</span>child<span class='code-punct'>,</span> indent<span class='code-punct'>+</span><span class='code-string'>" "</span><span class='code-punct'>);</span>
+            child <span class='code-punct'>=</span> child<span class='code-punct'>.</span><span class='code-func'>getNextSibling</span><span class='code-punct'>();</span>
+        <span class='code-punct'>}
+    }</span>
+<span class='code-punct'>}</span>
+</pre>
+<p>
+Running this program produces the following output:
+[<strong>Note:</strong> The classpath should be 
+contiguous. It is split among separate lines in this example to make 
+it easier to read.]
+<pre class='cmdline'>
+<span class='cmdline-prompt'>&gt;</span> <span class='cmdline-cmd'>java -cp nekohtml.jar;nekohtmlSamples.jar;
+           xmlParserAPIs.jar;xercesImpl.jar
+       sample.TestHTMLDOM data/html/test01.html</span>
+org.apache.html.dom.HTMLDocumentImpl
+ org.apache.html.dom.HTMLHtmlElementImpl
+  org.apache.html.dom.HTMLBodyElementImpl
+   org.apache.xerces.dom.TextImpl
+</pre>
+<p>
+This source code is included in the <code>src/html/sample/</code> directory.
+<p>
+In addition to the provided DOM and SAX parser classes, NekoHTML
+also provides a DOM fragment parser class. The <code>DOMFragmentParser</code>
+class, found in the <code>org.cyberneko.html.parsers</code>
+package, in can be used to parse fragments of HTML documents 
+into their corresponding DOM nodes. The following example shows 
+how to use the NekoHTML <code>DOMFragmentParser</code> directly:
+<pre class='code'>
+<span class='code-keyword'>package</span> sample<span class='code-punct'>;</span>
+
+<span class='code-keyword'>import</span> org.cyberneko.html.parsers.DOMFragmentParser<span class='code-punct'>;</span>
+<span class='code-keyword'>import</span> org.apache.html.dom.HTMLDocumentImpl<span class='code-punct'>;</span>
+<span class='code-keyword'>import</span> org.w3c.dom.Document<span class='code-punct'>;</span>
+<span class='code-keyword'>import</span> org.w3c.dom.DocumentFragment<span class='code-punct'>;</span>
+<span class='code-keyword'>import</span> org.w3c.dom.Node<span class='code-punct'>;</span>
+<span class='code-keyword'>import</span> org.w3c.dom.html.HTMLDocument<span class='code-punct'>;</span>
+
+<span class='code-keyword'>public class</span> TestHTMLDOMFragment <span class='code-punct'>{</span>
+    <span class='code-keyword'>public static void</span> <span class='code-func'>main</span><span class='code-punct'>(</span>String<span class='code-punct'>[]</span> argv<span class='code-punct'>)</span> <span class='code-keyword'>throws</span> Exception <span class='code-punct'>{</span>
+        DOMFragmentParser parser <span class='code-punct'>=</span> <span class='code-keyword'>new</span> DOMFragmentParser<span class='code-punct'>();</span>
+        HTMLDocument document <span class='code-punct'>=</span> <span class='code-keyword'>new</span> HTMLDocumentImpl<span class='code-punct'>();</span>
+        <span class='code-keyword'>for</span> <span class='code-punct'>(</span><span class='code-keyword'>int</span> i <span class='code-punct'>=</span> 0<span class='code-punct'>;</span> i <span class='code-punct'><</span> argv<span class='code-punct'>.</span>length<span class='code-punct'>;</span> i<span class='code-punct'>++) {</span>
+            DocumentFragment fragment <span class='code-punct'>=</span> document<span class='code-punct'>.</span><span class='code-func'>createDocumentFragment</span><span class='code-punct'>();</span>
+            parser<span class='code-punct'>.</span><span class='code-func'>parse</span><span class='code-punct'>(</span>argv<span class='code-punct'>[</span>i<span class='code-punct'>],</span> fragment<span class='code-punct'>);</span>
+            <span class='code-func'>print</span><span class='code-punct'>(</span>fragment<span class='code-punct'>,</span> <span class='code-string'>""</span><span class='code-punct'>);</span>
+        <span class='code-punct'>}</span>
+    <span class='code-punct'>}</span>
+    <span class='code-keyword'>public static void</span> <span class='code-func'>print</span><span class='code-punct'>(</span>Node node<span class='code-punct'>,</span> String indent<span class='code-punct'>) {</span>
+        System<span class='code-punct'>.</span>out<span class='code-punct'>.</span><span class='code-func'>println</span><span class='code-punct'>(</span>indent<span class='code-punct'>+</span>node<span class='code-punct'>.</span><span class='code-func'>getClass</span><span class='code-punct'>().</span><span class='code-func'>getName</span><span class='code-punct'>());</span>
+        Node child <span class='code-punct'>=</span> node<span class='code-punct'>.</span><span class='code-func'>getFirstChild</span><span class='code-punct'>();</span>
+        <span class='code-keyword'>while</span> <span class='code-punct'>(</span>child <span class='code-punct'>!=</span> <span class='code-keyword'>null</span><span class='code-punct'>) {</span>
+            <span class='code-func'>print</span><span class='code-punct'>(</span>child<span class='code-punct'>,</span> indent<span class='code-punct'>+</span><span class='code-string'>" "</span><span class='code-punct'>);</span>
+            child <span class='code-punct'>=</span> child<span class='code-punct'>.</span><span class='code-func'>getNextSibling</span><span class='code-punct'>();</span>
+        <span class='code-punct'>}
+    }</span>
+<span class='code-punct'>}</span>
+</pre>
+<p>
+This source code is included in the <code>src/html/sample/</code> 
+directory.
+<p>
+Notice that the application parses a document fragment a little
+bit differently than parsing a complete document. Instead of 
+initiating a parse by passing in a system identifier (or an
+input source), parsing an HTML document fragment requires the
+application to pass a DOM <code>DocumentFragment</code> object
+to the <code>parse</code> method. The DOM fragment parser will
+use the owner document of the <code>DocumentFragment</code> as 
+the factory for parsed nodes. These nodes are then appended in
+document order to the document fragment object.
+<p>
+<strong>Note:</strong>
+In order for HTML DOM objects to be created, the document fragment 
+object passed to the <code>parse</code> method should be created from 
+a DOM document object of type <code>org.w3c.dom.html.HTMLDocument</code>. 
+
+<a name='custom'></a>
+<h2>Custom Parser Classes</h2>
+<p>
+Alternatively, you can construct any XNI-based parser class
+using the <code>HTMLConfiguration</code> parser configuration class
+found in the <code>org.cyberneko.html</code> package. The following
+example shows how to extend the abstract SAX parser provided with
+the Xerces2 implementation by passing the NekoHTML parser 
+configuration to the base class in the constructor.
+<pre class='code'>
+<span class='code-keyword'>package</span> sample<span class='code-punct'>;</span>
+
+<span class='code-keyword'>import</span> org.apache.xerces.parsers.AbstractSAXParser<span class='code-punct'>;</span>
+<span class='code-keyword'>import</span> org.cyberneko.html.HTMLConfiguration<span class='code-punct'>;</span>
+
+<span class='code-keyword'>public class</span> HTMLSAXParser <span class='code-keyword'>extends</span> AbstractSAXParser <span class='code-punct'>{</span>
+    <span class='code-keyword'>public</span> HTMLSAXParser<span class='code-punct'>() {</span>
+        <span class='code-keyword'>super</span><span class='code-punct'>(</span><span class='code-keyword'>new</span> HTMLConfiguration<span class='code-punct'>());</span>
+    <span class='code-punct'>}</span>
+<span class='code-punct'>}</span>
+</pre>
+<p>
+This source code is included in the <code>src/html/sample/</code> directory.
+
+<div class='copyright'>
+(C) Copyright 2002-2005, Andy Clark. All rights reserved.
+</div>
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/doc/style.css
===================================================================
--- branches/nekohtml/upstream/0.9.5/doc/style.css	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/doc/style.css	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,74 @@
+BODY { background: #EEEEEE;
+       margin: 0; padding: 10;  }
+H1 { text-align: center; background: #DDDDFF;
+     border-style: solid; border-color: black; border-width: 0; 
+     border-top-width: 1; border-bottom-width: 1;
+     margin: -10; padding: 5 }
+H2 H3 H4 H5 H6 { }
+P { margin: 15; text-align: justify }
+UL { margin-right: 15 }
+OL { margin-right: 15 }
+TD DL { margin: 0 }
+DL { margin: 15 }
+DT { font-weight: bold }
+DD { font-style: italic }
+OL UL { margin: 15 }
+HR { background: black; color: black; height: 1;
+     border: none;
+     margin: 0; padding: 0 }
+
+TABLE { border-style: solid; border-color: black; border-width: 2;
+        border-right-width: 1; border-bottom-width: 1;
+        margin: 15; margin-left: 30; margin-right: 30 }
+TR { background: white; vertical-align: top }
+TH { border-style: solid; border-color: black; border-width: 0;
+     border-bottom-width: 1; border-right-width: 1;
+     background: #DDDDFF }
+TD { border-style: solid; border-color: black; border-width: 0;
+     border-right-width: 1; border-bottom-width: 1 }
+
+PRE.cmdline { background: black; color: silver;
+              border-style: solid; border-color: white; border-width: 1;
+              margin: 15; margin-left: 30; margin-right: 30; padding: 5 }
+.cmdline-prompt { color: magenta }
+.cmdline-cmd { color: yellow }
+
+PRE.code { background: darkblue; color: #00FF00;
+           border-style: solid; border-color: black; border-width: 1; 
+           margin: 15; margin-left: 30; margin-right: 30; padding: 5 }
+.code-keyword { color: yellow }
+.code-punct { color: white }
+.code-number { color: silver }
+.code-string { color: cyan }
+.code-comment { color: silver }
+.code-func { font-weight: bold }
+
+PRE.document,PRE.xml { background: white; color: black;
+                       border-style: solid; border-color: black; border-width: 1;
+                       margin: 15; margin-left: 30; margin-right: 30;
+                       padding: 5 }
+.xml-markup { color: blue }
+.xml-comment { color: silver }
+.xml-ename { color: maroon }
+.xml-aname { color: maroon }
+.xml-avalue { font-weight: bold }
+.xml-aname-xmlns { color: red }
+.xml-avalue-xmlns { color: red; font-weight: bold }
+.xml-chars { font-weight: bold }
+
+.id { font-family: sans-serif; 
+      text-decoration: underline;
+      white-space: nowrap }
+
+.navbar { margin: 10; text-align: center }
+DIV.navbar A { text-decoration: none }
+
+.new { color: red; font-weight: bold }
+.updated { color: orange; font-weight: bold }
+
+.copyright { text-align: center; 
+             background: #DDDDFF;
+             border-style: solid; border-color: black; border-width: 0;
+             border-top-width: 1; border-bottom-width: 1;
+             margin: -10; margin-top: 10 }
+

Added: branches/nekohtml/upstream/0.9.5/src/html/META-INF/services/org.apache.xerces.xni.parser.XMLParserConfiguration
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/META-INF/services/org.apache.xerces.xni.parser.XMLParserConfiguration	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/META-INF/services/org.apache.xerces.xni.parser.XMLParserConfiguration	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1 @@
+org.cyberneko.html.HTMLConfiguration

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLAugmentations.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLAugmentations.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLAugmentations.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,108 @@
+/* 
+ * (C) Copyright 2004-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html;
+
+import org.apache.xerces.xni.Augmentations;
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+
+/**
+ * This class is here to overcome the XNI changes to the 
+ * <code>Augmentations</code> interface. In early versions of XNI, the
+ * augmentations interface contained a <code>clear()</code> method to
+ * remove all of the items from the augmentations instance. A later
+ * version of XNI changed this method to <code>removeAllItems()</code>.
+ * Therefore, this class extends the augmentations interface and
+ * explicitly implements both of these methods.
+ * <p>
+ * <strong>Note:</strong>
+ * This code is inspired by performance enhancements submitted by
+ * Marc-André Morissette.
+ * 
+ * @author Andy Clark
+ */
+public class HTMLAugmentations
+    implements Augmentations {
+
+    //
+    // Data
+    //
+
+    /** Augmentation items. */
+    protected Hashtable fItems = new Hashtable();
+
+    //
+    // Public methods
+    //
+
+    // since Xerces 2.3.0
+
+    /** Removes all of the elements in this augmentations object. */
+    public void removeAllItems() {
+        fItems.clear();
+    } // removeAllItems()
+
+    // from Xerces 2.0.0 (beta4) until 2.3.0
+
+    /** Removes all of the elements in this augmentations object. */
+    public void clear() {
+        fItems.clear();
+    } // clear()
+
+    //
+    // Augmentations methods
+    //
+
+    /**
+     * Add additional information identified by a key to the Augmentations 
+     * structure.
+     * 
+     * @param key    Identifier, can't be <code>null</code>
+     * @param item   Additional information
+     *
+     * @return The previous value of the specified key in the Augmentations 
+     *         structure, or <code>null</code> if it did not have one.
+     */
+    public Object putItem(String key, Object item) {
+        return fItems.put(key, item);
+    } // putItem(String, Object):Object
+
+
+    /**
+     * Get information identified by a key from the Augmentations structure.
+     * 
+     * @param key    Identifier, can't be <code>null</code>
+     *
+     * @return The value to which the key is mapped in the Augmentations 
+     *         structure; <code>null</code> if the key is not mapped to any 
+     *         value.
+     */
+    public Object getItem(String key) {
+        return fItems.get(key);
+    } // getItem(String):Object
+    
+    /**
+     * Remove additional info from the Augmentations structure
+     * 
+     * @param key    Identifier, can't be <code>null</code>
+     * @return The previous value of the specified key in the Augmentations 
+     *         structure, or <code>null</code> if it did not have one.
+     */
+    public Object removeItem(String key) {
+        return fItems.remove(key);
+    } // removeItem(String):Object
+
+    /**
+     * Returns an enumeration of the keys in the Augmentations structure.
+     */
+    public Enumeration keys() {
+        return fItems.keys();
+    } // keys():Enumeration
+
+} // class HTMLAugmentations

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLComponent.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLComponent.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLComponent.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,42 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html;
+
+import org.apache.xerces.xni.parser.XMLComponent;
+
+/**
+ * This interface extends the XNI <code>XMLComponent</code> interface
+ * to add methods that allow the preferred default values for features
+ * and properties to be queried.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLComponent.java,v 1.4 2005/02/14 03:56:54 andyc Exp $
+ */
+public interface HTMLComponent 
+    extends XMLComponent {
+
+    //
+    // HTMLComponent methods
+    //
+
+    /** 
+     * Returns the default state for a feature, or null if this
+     * component does not want to report a default value for this
+     * feature.
+     */
+    public Boolean getFeatureDefault(String featureId);
+
+    /** 
+     * Returns the default state for a property, or null if this
+     * component does not want to report a default value for this
+     * property. 
+     */
+    public Object getPropertyDefault(String propertyId);
+
+} // interface HTMLComponent

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLConfiguration.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLConfiguration.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLConfiguration.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,696 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+import java.text.MessageFormat;
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.Properties;
+import java.util.ResourceBundle;
+import java.util.Vector;
+                                                                               
+import org.cyberneko.html.filters.NamespaceBinder;
+
+import org.apache.xerces.util.DefaultErrorHandler;
+import org.apache.xerces.util.ParserConfigurationSettings;
+import org.apache.xerces.xni.XMLDocumentHandler;
+import org.apache.xerces.xni.XMLDTDHandler;
+import org.apache.xerces.xni.XMLDTDContentModelHandler;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLConfigurationException;
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.apache.xerces.xni.parser.XMLDocumentSource;
+import org.apache.xerces.xni.parser.XMLEntityResolver;
+import org.apache.xerces.xni.parser.XMLErrorHandler;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.apache.xerces.xni.parser.XMLParseException;
+import org.apache.xerces.xni.parser.XMLPullParserConfiguration;
+                                      
+/**
+ * An XNI-based parser configuration that can be used to parse HTML 
+ * documents. This configuration can be used directly in order to
+ * parse HTML documents or can be used in conjunction with any XNI
+ * based tools, such as the Xerces2 implementation.
+ * <p>
+ * This configuration recognizes the following features:
+ * <ul>
+ * <li>http://cyberneko.org/html/features/augmentations
+ * <li>http://cyberneko.org/html/features/report-errors
+ * <li>http://cyberneko.org/html/features/report-errors/simple
+ * <li>http://cyberneko.org/html/features/balance-tags
+ * <li><i>and</i>
+ * <li>the features supported by the scanner and tag balancer components.
+ * </ul>
+ * <p>
+ * This configuration recognizes the following properties:
+ * <ul>
+ * <li>http://cyberneko.org/html/properties/names/elems
+ * <li>http://cyberneko.org/html/properties/names/attrs
+ * <li>http://cyberneko.org/html/properties/filters
+ * <li>http://cyberneko.org/html/properties/error-reporter
+ * <li><i>and</i>
+ * <li>the properties supported by the scanner and tag balancer.
+ * </ul>
+ * <p>
+ * For complete usage information, refer to the documentation.
+ *
+ * @see HTMLScanner
+ * @see HTMLTagBalancer
+ * @see HTMLErrorReporter
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLConfiguration.java,v 1.9 2005/02/14 03:56:54 andyc Exp $
+ */
+public class HTMLConfiguration 
+    extends ParserConfigurationSettings
+    implements XMLPullParserConfiguration {
+
+    //
+    // Constants
+    //
+
+    // features
+
+    /** Namespaces. */
+    protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces";
+
+    /** Include infoset augmentations. */
+    protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
+
+    /** Report errors. */
+    protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
+
+    /** Simple report format. */
+    protected static final String SIMPLE_ERROR_FORMAT = "http://cyberneko.org/html/features/report-errors/simple";
+
+    /** Balance tags. */
+    protected static final String BALANCE_TAGS = "http://cyberneko.org/html/features/balance-tags";
+
+    // properties
+
+    /** Modify HTML element names: { "upper", "lower", "default" }. */
+    protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
+
+    /** Modify HTML attribute names: { "upper", "lower", "default" }. */
+    protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
+    
+    /** Pipeline filters. */
+    protected static final String FILTERS = "http://cyberneko.org/html/properties/filters";
+
+    /** Error reporter. */
+    protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
+
+    // other
+
+    /** Error domain. */
+    protected static final String ERROR_DOMAIN = "http://cyberneko.org/html";
+
+    // private
+
+    /** Document source class array. */
+    private static final Class[] DOCSOURCE = { XMLDocumentSource.class };
+
+    //
+    // Data
+    //
+
+    // handlers
+
+    /** Document handler. */
+    protected XMLDocumentHandler fDocumentHandler;
+
+    /** DTD handler. */
+    protected XMLDTDHandler fDTDHandler;
+
+    /** DTD content model handler. */
+    protected XMLDTDContentModelHandler fDTDContentModelHandler;
+
+    /** Error handler. */
+    protected XMLErrorHandler fErrorHandler = new DefaultErrorHandler();
+
+    // other settings
+
+    /** Entity resolver. */
+    protected XMLEntityResolver fEntityResolver;
+
+    /** Locale. */
+    protected Locale fLocale = Locale.getDefault();
+
+    // state
+
+    /** 
+     * Stream opened by parser. Therefore, must close stream manually upon
+     * termination of parsing.
+     */
+    protected boolean fCloseStream;
+
+    // components
+
+    /** Components. */
+    protected Vector fHTMLComponents = new Vector(2);
+
+    // pipeline
+
+    /** Document scanner. */
+    protected HTMLScanner fDocumentScanner = new HTMLScanner();
+
+    /** HTML tag balancer. */
+    protected HTMLTagBalancer fTagBalancer = new HTMLTagBalancer();
+
+    /** Namespace binder. */
+    protected NamespaceBinder fNamespaceBinder = new NamespaceBinder();
+
+    // other components
+
+    /** Error reporter. */
+    protected HTMLErrorReporter fErrorReporter = new ErrorReporter();
+
+    // HACK: workarounds Xerces 2.0.x problems
+
+    /** Parser version is Xerces 2.0.0. */
+    protected static boolean XERCES_2_0_0 = false;
+
+    /** Parser version is Xerces 2.0.1. */
+    protected static boolean XERCES_2_0_1 = false;
+
+    /** Parser version is XML4J 4.0.x. */
+    protected static boolean XML4J_4_0_x = false;
+
+    //
+    // Static initializer
+    //
+
+    static {
+        try {
+            String VERSION = "org.apache.xerces.impl.Version";
+            Object version = ObjectFactory.createObject(VERSION, VERSION);
+            java.lang.reflect.Field field = version.getClass().getField("fVersion");
+            String versionStr = String.valueOf(field.get(version));
+            XERCES_2_0_0 = versionStr.equals("Xerces-J 2.0.0");
+            XERCES_2_0_1 = versionStr.equals("Xerces-J 2.0.1");
+            XML4J_4_0_x = versionStr.startsWith("XML4J 4.0.");
+        }
+        catch (Throwable e) {
+            // ignore
+        }
+    } // <clinit>()
+
+    //
+    // Constructors
+    //
+
+    /** Default constructor. */
+    public HTMLConfiguration() {
+
+        // add components
+        addComponent(fDocumentScanner);
+        addComponent(fTagBalancer);
+        addComponent(fNamespaceBinder);
+
+        //
+        // features
+        //
+
+        // recognized features
+        String VALIDATION = "http://xml.org/sax/features/validation";
+        String[] recognizedFeatures = {
+            AUGMENTATIONS,
+            NAMESPACES,
+            VALIDATION,
+            REPORT_ERRORS,
+            SIMPLE_ERROR_FORMAT,
+            BALANCE_TAGS,
+        };
+        addRecognizedFeatures(recognizedFeatures);
+        setFeature(AUGMENTATIONS, false);
+        setFeature(NAMESPACES, true);
+        setFeature(VALIDATION, false);
+        setFeature(REPORT_ERRORS, false);
+        setFeature(SIMPLE_ERROR_FORMAT, false);
+        setFeature(BALANCE_TAGS, true);
+
+        // HACK: Xerces 2.0.0
+        if (XERCES_2_0_0) {
+            // NOTE: These features should not be required but it causes a
+            //       problem if they're not there. This will be fixed in 
+            //       subsequent releases of Xerces.
+            recognizedFeatures = new String[] {
+                "http://apache.org/xml/features/scanner/notify-builtin-refs",
+            };
+            addRecognizedFeatures(recognizedFeatures);
+        }
+        
+        // HACK: Xerces 2.0.1
+        if (XERCES_2_0_0 || XERCES_2_0_1 || XML4J_4_0_x) {
+            // NOTE: These features should not be required but it causes a
+            //       problem if they're not there. This should be fixed in 
+            //       subsequent releases of Xerces.
+            recognizedFeatures = new String[] {
+                "http://apache.org/xml/features/validation/schema/normalized-value",
+                "http://apache.org/xml/features/scanner/notify-char-refs",
+            };
+            addRecognizedFeatures(recognizedFeatures);
+        }
+        
+        //
+        // properties
+        //
+
+        // recognized properties
+        String[] recognizedProperties = {
+            NAMES_ELEMS,
+            NAMES_ATTRS,
+            FILTERS,
+            ERROR_REPORTER,
+        };
+        addRecognizedProperties(recognizedProperties);
+        setProperty(NAMES_ELEMS, "upper");
+        setProperty(NAMES_ATTRS, "lower");
+        setProperty(ERROR_REPORTER, fErrorReporter);
+        
+        // HACK: Xerces 2.0.0
+        if (XERCES_2_0_0) {
+            // NOTE: This is a hack to get around a problem in the Xerces 2.0.0
+            //       AbstractSAXParser. If it uses a parser configuration that
+            //       does not have a SymbolTable, then it will remove *all*
+            //       attributes. This will be fixed in subsequent releases of 
+            //       Xerces.
+            String SYMBOL_TABLE = "http://apache.org/xml/properties/internal/symbol-table";
+            recognizedProperties = new String[] {
+                SYMBOL_TABLE,
+            };
+            addRecognizedProperties(recognizedProperties);
+            Object symbolTable = ObjectFactory.createObject("org.apache.xerces.util.SymbolTable",
+                                                            "org.apache.xerces.util.SymbolTable");
+            setProperty(SYMBOL_TABLE, symbolTable);
+        }
+
+    } // <init>()
+
+    //
+    // Public methods
+    //
+
+    /** 
+     * Pushes an input source onto the current entity stack. This 
+     * enables the scanner to transparently scan new content (e.g. 
+     * the output written by an embedded script). At the end of the
+     * current entity, the scanner returns where it left off at the
+     * time this entity source was pushed.
+     * <p>
+     * <strong>Hint:</strong>
+     * To use this feature to insert the output of &lt;SCRIPT&gt;
+     * tags, remember to buffer the <em>entire</em> output of the
+     * processed instructions before pushing a new input source.
+     * Otherwise, events may appear out of sequence.
+     *
+     * @param inputSource The new input source to start scanning.
+     */
+    public void pushInputSource(XMLInputSource inputSource) {
+        fDocumentScanner.pushInputSource(inputSource);
+    } // pushInputSource(XMLInputSource)
+
+    // XMLParserConfiguration methods
+    //
+
+    /** Sets a feature. */
+    public void setFeature(String featureId, boolean state)
+        throws XMLConfigurationException {
+        super.setFeature(featureId, state);
+        int size = fHTMLComponents.size();
+        for (int i = 0; i < size; i++) {
+            HTMLComponent component = (HTMLComponent)fHTMLComponents.elementAt(i);
+            component.setFeature(featureId, state);
+        }
+    } // setFeature(String,boolean)
+
+    /** Sets a property. */
+    public void setProperty(String propertyId, Object value)
+        throws XMLConfigurationException {
+        super.setProperty(propertyId, value);
+
+        if (propertyId.equals(FILTERS)) {
+            XMLDocumentFilter[] filters = (XMLDocumentFilter[])getProperty(FILTERS);
+            if (filters != null) {
+                for (int i = 0; i < filters.length; i++) {
+                    XMLDocumentFilter filter = filters[i];
+                    if (filter instanceof HTMLComponent) {
+                        addComponent((HTMLComponent)filter);
+                    }
+                }
+            }
+        }
+
+        int size = fHTMLComponents.size();
+        for (int i = 0; i < size; i++) {
+            HTMLComponent component = (HTMLComponent)fHTMLComponents.elementAt(i);
+            component.setProperty(propertyId, value);
+        }
+    } // setProperty(String,Object)
+
+    /** Sets the document handler. */
+    public void setDocumentHandler(XMLDocumentHandler handler) {
+        fDocumentHandler = handler;
+    } // setDocumentHandler(XMLDocumentHandler)
+
+    /** Returns the document handler. */
+    public XMLDocumentHandler getDocumentHandler() {
+        return fDocumentHandler;
+    } // getDocumentHandler():XMLDocumentHandler
+
+    /** Sets the DTD handler. */
+    public void setDTDHandler(XMLDTDHandler handler) {
+        fDTDHandler = handler;
+    } // setDTDHandler(XMLDTDHandler)
+
+    /** Returns the DTD handler. */
+    public XMLDTDHandler getDTDHandler() {
+        return fDTDHandler;
+    } // getDTDHandler():XMLDTDHandler
+
+    /** Sets the DTD content model handler. */
+    public void setDTDContentModelHandler(XMLDTDContentModelHandler handler) {
+        fDTDContentModelHandler = handler;
+    } // setDTDContentModelHandler(XMLDTDContentModelHandler)
+
+    /** Returns the DTD content model handler. */
+    public XMLDTDContentModelHandler getDTDContentModelHandler() {
+        return fDTDContentModelHandler;
+    } // getDTDContentModelHandler():XMLDTDContentModelHandler
+
+    /** Sets the error handler. */
+    public void setErrorHandler(XMLErrorHandler handler) {
+        fErrorHandler = handler;
+    } // setErrorHandler(XMLErrorHandler)
+
+    /** Returns the error handler. */
+    public XMLErrorHandler getErrorHandler() {
+        return fErrorHandler;
+    } // getErrorHandler():XMLErrorHandler
+
+    /** Sets the entity resolver. */
+    public void setEntityResolver(XMLEntityResolver resolver) {
+        fEntityResolver = resolver;
+    } // setEntityResolver(XMLEntityResolver)
+
+    /** Returns the entity resolver. */
+    public XMLEntityResolver getEntityResolver() {
+        return fEntityResolver;
+    } // getEntityResolver():XMLEntityResolver
+
+    /** Sets the locale. */
+    public void setLocale(Locale locale) {
+        if (locale == null) {
+            locale = Locale.getDefault();
+        }
+        fLocale = locale;
+    } // setLocale(Locale)
+
+    /** Returns the locale. */
+    public Locale getLocale() {
+        return fLocale;
+    } // getLocale():Locale
+
+    /** Parses a document. */
+    public void parse(XMLInputSource source) throws XNIException, IOException {
+        setInputSource(source);
+        parse(true);
+    } // parse(XMLInputSource)
+
+    //
+    // XMLPullParserConfiguration methods
+    //
+
+    // parsing
+
+    /**
+     * Sets the input source for the document to parse.
+     *
+     * @param inputSource The document's input source.
+     *
+     * @exception XMLConfigurationException Thrown if there is a 
+     *                        configuration error when initializing the
+     *                        parser.
+     * @exception IOException Thrown on I/O error.
+     *
+     * @see #parse(boolean)
+     */
+    public void setInputSource(XMLInputSource inputSource)
+        throws XMLConfigurationException, IOException {
+        reset();
+        fCloseStream = inputSource.getByteStream() == null &&
+                       inputSource.getCharacterStream() == null;
+        fDocumentScanner.setInputSource(inputSource);
+    } // setInputSource(XMLInputSource)
+
+    /**
+     * Parses the document in a pull parsing fashion.
+     *
+     * @param complete True if the pull parser should parse the
+     *                 remaining document completely.
+     *
+     * @return True if there is more document to parse.
+     *
+     * @exception XNIException Any XNI exception, possibly wrapping 
+     *                         another exception.
+     * @exception IOException  An IO exception from the parser, possibly
+     *                         from a byte stream or character stream
+     *                         supplied by the parser.
+     *
+     * @see #setInputSource
+     */
+    public boolean parse(boolean complete) throws XNIException, IOException {
+        try {
+            boolean more = fDocumentScanner.scanDocument(complete);
+            if (!more) {
+                cleanup();
+            }
+            return more;
+        }
+        catch (XNIException e) {
+            cleanup();
+            throw e;
+        }
+        catch (IOException e) {
+            cleanup();
+            throw e;
+        }
+    } // parse(boolean):boolean
+
+    /**
+     * If the application decides to terminate parsing before the xml document
+     * is fully parsed, the application should call this method to free any
+     * resource allocated during parsing. For example, close all opened streams.
+     */
+    public void cleanup() {
+        fDocumentScanner.cleanup(fCloseStream);
+    } // cleanup()
+    
+    //
+    // Protected methods
+    //
+
+    /** Adds a component. */
+    protected void addComponent(HTMLComponent component) {
+
+        // add component to list
+        fHTMLComponents.addElement(component);
+
+        // add recognized features and set default states
+        String[] features = component.getRecognizedFeatures();
+        addRecognizedFeatures(features);
+        int featureCount = features != null ? features.length : 0;
+        for (int i = 0; i < featureCount; i++) {
+            Boolean state = component.getFeatureDefault(features[i]);
+            if (state != null) {
+                setFeature(features[i], state.booleanValue());
+            }
+        }
+
+        // add recognized properties and set default values
+        String[] properties = component.getRecognizedProperties();
+        addRecognizedProperties(properties);
+        int propertyCount = properties != null ? properties.length : 0;
+        for (int i = 0; i < propertyCount; i++) {
+            Object value = component.getPropertyDefault(properties[i]);
+            if (value != null) {
+                setProperty(properties[i], value);
+            }
+        }
+
+    } // addComponent(HTMLComponent)
+
+    /** Resets the parser configuration. */
+    protected void reset() throws XMLConfigurationException {
+
+        // reset components
+        int size = fHTMLComponents.size();
+        for (int i = 0; i < size; i++) {
+            HTMLComponent component = (HTMLComponent)fHTMLComponents.elementAt(i);
+            component.reset(this);
+        }
+
+        // configure pipeline
+        XMLDocumentSource lastSource = fDocumentScanner;
+        if (getFeature(BALANCE_TAGS)) {
+            lastSource.setDocumentHandler(fTagBalancer);
+            fTagBalancer.setDocumentSource(fDocumentScanner);
+            lastSource = fTagBalancer;
+        }
+        if (getFeature(NAMESPACES)) {
+            lastSource.setDocumentHandler(fNamespaceBinder);
+            fNamespaceBinder.setDocumentSource(fTagBalancer);
+            lastSource = fNamespaceBinder;
+        }
+        XMLDocumentFilter[] filters = (XMLDocumentFilter[])getProperty(FILTERS);
+        if (filters != null) {
+            for (int i = 0; i < filters.length; i++) {
+                XMLDocumentFilter filter = filters[i];
+                Class filterClass = filter.getClass();
+                try {
+                    Method filterMethod = filterClass.getMethod("setDocumentSource", DOCSOURCE);
+                    if (filterMethod != null) {
+                        filterMethod.invoke(filter, new Object[] { lastSource });
+                    }
+                }
+                catch (IllegalAccessException e) {
+                    // ignore
+                } 
+                catch (InvocationTargetException e) {
+                    // ignore
+                } 
+                catch (NoSuchMethodException e) {
+                    // ignore
+                }
+                lastSource.setDocumentHandler(filter);
+                lastSource = filter;
+            }
+        }
+        lastSource.setDocumentHandler(fDocumentHandler);
+
+    } // reset()
+
+    //
+    // Interfaces
+    //
+
+    /**
+     * Defines an error reporter for reporting HTML errors. There is no such 
+     * thing as a fatal error in parsing HTML. I/O errors are fatal but should 
+     * throw an <code>IOException</code> directly instead of reporting an error.
+     * <p>
+     * When used in a configuration, the error reporter instance should be
+     * set as a property with the following property identifier:
+     * <pre>
+     * "http://cyberneko.org/html/internal/error-reporter" in the
+     * </pre>
+     * Components in the configuration can query the error reporter using this
+     * property identifier.
+     * <p>
+     * <strong>Note:</strong>
+     * All reported errors are within the domain "http://cyberneko.org/html". 
+     *
+     * @author Andy Clark
+     */
+    protected class ErrorReporter
+        implements HTMLErrorReporter {
+
+        //
+        // Data
+        //
+
+        /** Last locale. */
+        protected Locale fLastLocale;
+
+        /** Error messages. */
+        protected ResourceBundle fErrorMessages;
+
+        //
+        // HTMLErrorReporter methods
+        //
+
+        /** Format message without reporting error. */
+        public String formatMessage(String key, Object[] args) {
+            if (!getFeature(SIMPLE_ERROR_FORMAT)) {
+                if (!fLocale.equals(fLastLocale)) {
+                    fErrorMessages = null;
+                    fLastLocale = fLocale;
+                }
+                if (fErrorMessages == null) {
+                    fErrorMessages = 
+                        ResourceBundle.getBundle("org/cyberneko/html/res/ErrorMessages",
+                                                 fLocale);
+                }
+                try {
+                    String value = fErrorMessages.getString(key);
+                    String message = MessageFormat.format(value, args);
+                    return message;
+                }
+                catch (MissingResourceException e) {
+                    // ignore and return a simple format
+                }
+            }
+            return formatSimpleMessage(key, args);
+        } // formatMessage(String,Object[]):String
+
+        /** Reports a warning. */
+        public void reportWarning(String key, Object[] args)
+            throws XMLParseException {
+            if (fErrorHandler != null) {
+                fErrorHandler.warning(ERROR_DOMAIN, key, createException(key, args));
+            }
+        } // reportWarning(String,Object[])
+
+        /** Reports an error. */
+        public void reportError(String key, Object[] args)
+            throws XMLParseException {
+            if (fErrorHandler != null) {
+                fErrorHandler.error(ERROR_DOMAIN, key, createException(key, args));
+            }
+        } // reportError(String,Object[])
+
+        //
+        // Protected methods
+        //
+
+        /** Creates parse exception. */
+        protected XMLParseException createException(String key, Object[] args) {
+            String message = formatMessage(key, args);
+            return new XMLParseException(fDocumentScanner, message);
+        } // createException(String,Object[]):XMLParseException
+
+        /** Format simple message. */
+        protected String formatSimpleMessage(String key, Object[] args) {
+            StringBuffer str = new StringBuffer();
+            str.append(ERROR_DOMAIN);
+            str.append('#');
+            str.append(key);
+            if (args != null && args.length > 0) {
+                str.append('\t');
+                for (int i = 0; i < args.length; i++) {
+                    if (i > 0) {
+                        str.append('\t');
+                    }
+                    str.append(String.valueOf(args[i]));
+                }
+            }
+            return str.toString();
+        } // formatSimpleMessage(String,
+
+    } // class ErrorReporter
+
+} // class HTMLConfiguration

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLElements.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLElements.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLElements.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,752 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html;
+
+/**
+ * Collection of HTML element information.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLElements.java,v 1.12 2005/02/14 07:16:59 andyc Exp $
+ */
+public class HTMLElements {
+
+    //
+    // Constants
+    //
+    
+    // element codes
+
+    // NOTE: The element codes *must* start with 0 and increment in
+    //       sequence. The parent and closes references depends on 
+    //       this assumption. -Ac
+
+    public static final short A = 0;
+    public static final short ABBR = A+1;
+    public static final short ACRONYM = ABBR+1;
+    public static final short ADDRESS = ACRONYM+1;
+    public static final short APPLET = ADDRESS+1;
+    public static final short AREA = APPLET+1;
+    public static final short B = AREA+1;
+    public static final short BASE = B+1;
+    public static final short BASEFONT = BASE+1;
+    public static final short BDO = BASEFONT+1;
+    public static final short BGSOUND = BDO+1;
+    public static final short BIG = BGSOUND+1;
+    public static final short BLINK = BIG+1;
+    public static final short BLOCKQUOTE = BLINK+1;
+    public static final short BODY = BLOCKQUOTE+1;
+    public static final short BR = BODY+1;
+    public static final short BUTTON = BR+1;
+    public static final short CAPTION = BUTTON+1;
+    public static final short CENTER = CAPTION+1;
+    public static final short CITE = CENTER+1;
+    public static final short CODE = CITE+1;
+    public static final short COL = CODE+1;
+    public static final short COLGROUP = COL+1;
+    public static final short COMMENT = COLGROUP+1;
+    public static final short DEL = COMMENT+1;
+    public static final short DFN = DEL+1;
+    public static final short DIR = DFN+1;
+    public static final short DIV = DIR+1;
+    public static final short DD = DIV+1;
+    public static final short DL = DD+1;
+    public static final short DT = DL+1;
+    public static final short EM = DT+1;
+    public static final short EMBED = EM+1;
+    public static final short FIELDSET = EMBED+1;
+    public static final short FONT = FIELDSET+1;
+    public static final short FORM = FONT+1;
+    public static final short FRAME = FORM+1;
+    public static final short FRAMESET = FRAME+1;
+    public static final short H1 = FRAMESET+1;
+    public static final short H2 = H1+1;
+    public static final short H3 = H2+1;
+    public static final short H4 = H3+1;
+    public static final short H5 = H4+1;
+    public static final short H6 = H5+1;
+    public static final short HEAD = H6+1;
+    public static final short HR = HEAD+1;
+    public static final short HTML = HR+1;
+    public static final short I = HTML+1;
+    public static final short IFRAME = I+1;
+    public static final short ILAYER = IFRAME+1;
+    public static final short IMG = ILAYER+1;
+    public static final short INPUT = IMG+1;
+    public static final short INS = INPUT+1;
+    public static final short ISINDEX = INS+1;
+    public static final short KBD = ISINDEX+1;
+    public static final short KEYGEN = KBD+1;
+    public static final short LABEL = KEYGEN+1;
+    public static final short LAYER = LABEL+1;
+    public static final short LEGEND = LAYER+1;
+    public static final short LI = LEGEND+1;
+    public static final short LINK = LI+1;
+    public static final short LISTING = LINK+1;
+    public static final short MAP = LISTING+1;
+    public static final short MARQUEE = MAP+1;
+    public static final short MENU = MARQUEE+1;
+    public static final short META = MENU+1;
+    public static final short MULTICOL = META+1;
+    public static final short NEXTID = MULTICOL+1;
+    public static final short NOBR = NEXTID+1;
+    public static final short NOEMBED = NOBR+1;
+    public static final short NOFRAMES = NOEMBED+1;
+    public static final short NOLAYER = NOFRAMES+1;
+    public static final short NOSCRIPT = NOLAYER+1;
+    public static final short OBJECT = NOSCRIPT+1;
+    public static final short OL = OBJECT+1;
+    public static final short OPTION = OL+1;
+    public static final short OPTGROUP = OPTION+1;
+    public static final short P = OPTGROUP+1;
+    public static final short PARAM = P+1;
+    public static final short PLAINTEXT = PARAM+1;
+    public static final short PRE = PLAINTEXT+1;
+    public static final short Q = PRE+1;
+    public static final short RB = Q+1;
+    public static final short RBC = RB+1;
+    public static final short RP = RBC+1;
+    public static final short RT = RP+1;
+    public static final short RTC = RT+1;
+    public static final short RUBY = RTC+1;
+    public static final short S = RUBY+1;
+    public static final short SAMP = S+1;
+    public static final short SCRIPT = SAMP+1;
+    public static final short SELECT = SCRIPT+1;
+    public static final short SMALL = SELECT+1;
+    public static final short SOUND = SMALL+1;
+    public static final short SPACER = SOUND+1;
+    public static final short SPAN = SPACER+1;
+    public static final short STRIKE = SPAN+1;
+    public static final short STRONG = STRIKE+1;
+    public static final short STYLE = STRONG+1;
+    public static final short SUB = STYLE+1;
+    public static final short SUP = SUB+1;
+    public static final short TABLE = SUP+1;
+    public static final short TBODY = TABLE+1;
+    public static final short TD = TBODY+1;
+    public static final short TEXTAREA = TD+1;
+    public static final short TFOOT = TEXTAREA+1;
+    public static final short TH = TFOOT+1;
+    public static final short THEAD = TH+1;
+    public static final short TITLE = THEAD+1;
+    public static final short TR = TITLE+1;
+    public static final short TT = TR+1;
+    public static final short U = TT+1;
+    public static final short UL = U+1;
+    public static final short VAR = UL+1;
+    public static final short WBR = VAR+1;
+    public static final short XML = WBR+1;
+    public static final short XMP = XML+1;
+    public static final short UNKNOWN = XMP+1;
+
+    // information
+
+    /** Element information organized by first letter. */
+    protected static final Element[][] ELEMENTS_ARRAY = new Element[26][];
+
+    /** Element information as a contiguous list. */
+    protected static final ElementList ELEMENTS = new ElementList();
+
+    /** No such element. */
+    public static final Element NO_SUCH_ELEMENT = new Element(UNKNOWN, "", 0, new short[]{HEAD,BODY}/*HTML*/, null);
+
+    //
+    // Static initializer
+    //
+
+    /**
+     * Initializes the element information.
+     * <p>
+     * <strong>Note:</strong>
+     * The <code>getElement</code> method requires that the HTML elements
+     * are added to the list in alphabetical order. If new elements are
+     * added, then they <em>must</em> be inserted in alphabetical order.
+     */
+    static {
+        // <!ENTITY % heading "H1|H2|H3|H4|H5|H6">
+        // <!ENTITY % fontstyle "TT | I | B | BIG | SMALL">
+        // <!ENTITY % phrase "EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE | ABBR | ACRONYM" >
+        // <!ENTITY % special "A | IMG | OBJECT | BR | SCRIPT | MAP | Q | SUB | SUP | SPAN | BDO">
+        // <!ENTITY % formctrl "INPUT | SELECT | TEXTAREA | LABEL | BUTTON">
+        // <!ENTITY % inline "#PCDATA | %fontstyle; | %phrase; | %special; | %formctrl;">
+        // <!ENTITY % block "P | %heading; | %list; | %preformatted; | DL | DIV | NOSCRIPT | BLOCKQUOTE | FORM | HR | TABLE | FIELDSET | ADDRESS">
+        // <!ENTITY % flow "%block; | %inline;">
+
+        // initialize array of element information
+        ELEMENTS_ARRAY['A'-'A'] = new Element[] {
+            // A - - (%inline;)* -(A)
+            new Element(A, "A", Element.INLINE, BODY, null),
+            // ABBR - - (%inline;)*
+            new Element(ABBR, "ABBR", Element.INLINE, BODY, null),
+            // ACRONYM - - (%inline;)*
+            new Element(ACRONYM, "ACRONYM", Element.INLINE, BODY, null),
+            // ADDRESS - - (%inline;)*
+            new Element(ADDRESS, "ADDRESS", Element.BLOCK, BODY, null),
+            // APPLET
+            new Element(APPLET, "APPLET", 0, BODY, null),
+            // AREA - O EMPTY
+            new Element(AREA, "AREA", Element.EMPTY, MAP, null),
+        };
+        ELEMENTS_ARRAY['B'-'A'] = new Element[] {
+            // B - - (%inline;)*
+            new Element(B, "B", Element.INLINE, BODY, null),
+            // BASE - O EMPTY
+            new Element(BASE, "BASE", Element.EMPTY, HEAD, null),
+            // BASEFONT
+            new Element(BASEFONT, "BASEFONT", 0, HEAD, null),
+            // BDO - - (%inline;)*
+            new Element(BDO, "BDO", Element.INLINE, BODY, null),
+            // BGSOUND
+            new Element(BGSOUND, "BGSOUND", Element.EMPTY, HEAD, null),
+            // BIG - - (%inline;)*
+            new Element(BIG, "BIG", Element.INLINE, BODY, null),
+            // BLINK
+            new Element(BLINK, "BLINK", Element.INLINE, BODY, null),
+            // BLOCKQUOTE - - (%block;|SCRIPT)+
+            new Element(BLOCKQUOTE, "BLOCKQUOTE", Element.BLOCK, BODY, new short[]{P}),
+            // BODY O O (%block;|SCRIPT)+ +(INS|DEL)
+            new Element(BODY, "BODY", 0, HTML, new short[]{HEAD}),
+            // BR - O EMPTY
+            new Element(BR, "BR", Element.EMPTY, BODY, null),
+            // BUTTON - - (%flow;)* -(A|%formctrl;|FORM|FIELDSET)
+            new Element(BUTTON, "BUTTON", 0, BODY, null),
+        };
+        ELEMENTS_ARRAY['C'-'A'] = new Element[] {
+            // CAPTION - - (%inline;)*
+            new Element(CAPTION, "CAPTION", Element.INLINE, TABLE, null),
+            // CENTER, 
+            new Element(CENTER, "CENTER", 0, BODY, null),
+            // CITE - - (%inline;)*
+            new Element(CITE, "CITE", Element.INLINE, BODY, null),
+            // CODE - - (%inline;)*
+            new Element(CODE, "CODE", Element.INLINE, BODY, null),
+            // COL - O EMPTY
+            new Element(COL, "COL", Element.EMPTY, TABLE, null),
+            // COLGROUP - O (COL)*
+            new Element(COLGROUP, "COLGROUP", 0, TABLE, new short[]{COL,COLGROUP}),
+            // COMMENT
+            new Element(COMMENT, "COMMENT", Element.SPECIAL, HTML, null),
+        };
+        ELEMENTS_ARRAY['D'-'A'] = new Element[] {
+            // DEL - - (%flow;)*
+            new Element(DEL, "DEL", 0, BODY, null),
+            // DFN - - (%inline;)*
+            new Element(DFN, "DFN", Element.INLINE, BODY, null),
+            // DIR
+            new Element(DIR, "DIR", 0, BODY, null),
+            // DIV - - (%flow;)*
+            new Element(DIV, "DIV", Element.BLOCK, BODY, null),
+            // DD - O (%flow;)*
+            new Element(DD, "DD", 0, DL, new short[]{DT,DD}),
+            // DL - - (DT|DD)+
+            new Element(DL, "DL", Element.BLOCK, BODY, null),
+            // DT - O (%inline;)*
+            new Element(DT, "DT", 0, DL, new short[]{DT,DD}),
+        };
+        ELEMENTS_ARRAY['E'-'A'] = new Element[] {
+            // EM - - (%inline;)*
+            new Element(EM, "EM", Element.INLINE, BODY, null),
+            // EMBED
+            new Element(EMBED, "EMBED", 0, BODY, null),
+        };
+        ELEMENTS_ARRAY['F'-'A'] = new Element[] {
+            // FIELDSET - - (#PCDATA,LEGEND,(%flow;)*)
+            new Element(FIELDSET, "FIELDSET", 0, BODY, null),
+            // FONT
+            new Element(FONT, "FONT", Element.CONTAINER, BODY, null),
+            // FORM - - (%block;|SCRIPT)+ -(FORM)
+            new Element(FORM, "FORM", Element.CONTAINER, new short[]{BODY,TD,P,DIV}, new short[]{FORM,BUTTON}),
+            // FRAME - O EMPTY
+            new Element(FRAME, "FRAME", Element.EMPTY, FRAMESET, null),
+            // FRAMESET - - ((FRAMESET|FRAME)+ & NOFRAMES?)
+            new Element(FRAMESET, "FRAMESET", 0, HTML, null),
+        };
+        ELEMENTS_ARRAY['H'-'A'] = new Element[] {
+            // (H1|H2|H3|H4|H5|H6) - - (%inline;)*
+            new Element(H1, "H1", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+            new Element(H2, "H2", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+            new Element(H3, "H3", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+            new Element(H4, "H4", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+            new Element(H5, "H5", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+            new Element(H6, "H6", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+            // HEAD O O (%head.content;) +(%head.misc;)
+            new Element(HEAD, "HEAD", 0, HTML, null),
+            // HR - O EMPTY
+            new Element(HR, "HR", Element.EMPTY, BODY, new short[]{P}),
+            // HTML O O (%html.content;)
+            new Element(HTML, "HTML", 0, null, null),
+        };
+        ELEMENTS_ARRAY['I'-'A'] = new Element[] {
+            // I - - (%inline;)*
+            new Element(I, "I", Element.INLINE, BODY, null),
+            // IFRAME
+            new Element(IFRAME, "IFRAME", Element.BLOCK, BODY, null),
+            // ILAYER
+            new Element(ILAYER, "ILAYER", Element.BLOCK, BODY, null),
+            // IMG - O EMPTY
+            new Element(IMG, "IMG", Element.EMPTY, BODY, null),
+            // INPUT - O EMPTY
+            new Element(INPUT, "INPUT", Element.EMPTY, BODY, null),
+            // INS - - (%flow;)*
+            new Element(INS, "INS", 0, BODY, null),
+            // ISINDEX
+            new Element(ISINDEX, "ISINDEX", 0, HEAD, null),
+        };
+        ELEMENTS_ARRAY['K'-'A'] = new Element[] {
+            // KBD - - (%inline;)*
+            new Element(KBD, "KBD", Element.INLINE, BODY, null),
+            // KEYGEN
+            new Element(KEYGEN, "KEYGEN", 0, BODY, null),
+        };
+        ELEMENTS_ARRAY['L'-'A'] = new Element[] {
+            // LABEL - - (%inline;)* -(LABEL)
+            new Element(LABEL, "LABEL", 0, BODY, null),
+            // LAYER
+            new Element(LAYER, "LAYER", Element.BLOCK, BODY, null),
+            // LEGEND - - (%inline;)*
+            new Element(LEGEND, "LEGEND", Element.INLINE, FIELDSET, null),
+            // LI - O (%flow;)*
+            new Element(LI, "LI", 0, new short[]{BODY,UL,OL}, new short[]{LI}),
+            // LINK - O EMPTY
+            new Element(LINK, "LINK", Element.EMPTY, HEAD, null),
+            // LISTING
+            new Element(LISTING, "LISTING", 0, BODY, null),
+        };
+        ELEMENTS_ARRAY['M'-'A'] = new Element[] {
+            // MAP - - ((%block;) | AREA)+
+            new Element(MAP, "MAP", Element.INLINE, BODY, null),
+            // MARQUEE
+            new Element(MARQUEE, "MARQUEE", 0, BODY, null),
+            // MENU
+            new Element(MENU, "MENU", 0, BODY, null),
+            // META - O EMPTY
+            new Element(META, "META", Element.EMPTY, HEAD, new short[]{STYLE,TITLE}),
+            // MULTICOL
+            new Element(MULTICOL, "MULTICOL", 0, BODY, null),
+        };
+        ELEMENTS_ARRAY['N'-'A'] = new Element[] {
+            // NEXTID
+            new Element(NEXTID, "NEXTID", Element.EMPTY, BODY, null),
+            // NOBR
+            new Element(NOBR, "NOBR", Element.INLINE, BODY, null),
+            // NOEMBED
+            new Element(NOEMBED, "NOEMBED", 0, BODY, null),
+            // NOFRAMES - - (BODY) -(NOFRAMES)
+            new Element(NOFRAMES, "NOFRAMES", 0, FRAMESET, null),
+            // NOLAYER
+            new Element(NOLAYER, "NOLAYER", 0, BODY, null),
+            // NOSCRIPT - - (%block;)+
+            new Element(NOSCRIPT, "NOSCRIPT", 0, new short[]{HEAD,BODY}, null),
+        };
+        ELEMENTS_ARRAY['O'-'A'] = new Element[] {
+            // OBJECT - - (PARAM | %flow;)*
+            new Element(OBJECT, "OBJECT", 0, BODY, null),
+            // OL - - (LI)+
+            new Element(OL, "OL", Element.BLOCK, BODY, null),
+            // OPTGROUP - - (OPTION)+
+            new Element(OPTGROUP, "OPTGROUP", 0, SELECT, new short[]{OPTION}),
+            // OPTION - O (#PCDATA)
+            new Element(OPTION, "OPTION", 0, SELECT, new short[]{OPTION}),
+        };
+        ELEMENTS_ARRAY['P'-'A'] = new Element[] {
+            // P - O (%inline;)*
+            new Element(P, "P", 0, BODY, new short[]{P}),
+            // PARAM - O EMPTY
+            new Element(PARAM, "PARAM", Element.EMPTY, new short[]{OBJECT,APPLET}, null),
+            // PLAINTEXT
+            new Element(PLAINTEXT, "PLAINTEXT", Element.SPECIAL, BODY, null),
+            // PRE - - (%inline;)* -(%pre.exclusion;)
+            new Element(PRE, "PRE", 0, BODY, null),
+        };
+        ELEMENTS_ARRAY['Q'-'A'] = new Element[] {
+            // Q - - (%inline;)*
+            new Element(Q, "Q", Element.INLINE, BODY, null),
+        };
+        ELEMENTS_ARRAY['R'-'A'] = new Element[] {
+            // RB
+            new Element(RB, "RB", Element.INLINE, RUBY, new short[]{RB}),
+            // RBC
+            new Element(RBC, "RBC", 0, RUBY, null),
+            // RP
+            new Element(RP, "RP", Element.INLINE, RUBY, new short[]{RB}),
+            // RT
+            new Element(RT, "RT", Element.INLINE, RUBY, new short[]{RB,RP}),
+            // RTC
+            new Element(RTC, "RTC", 0, RUBY, new short[]{RBC}),
+            // RUBY
+            new Element(RUBY, "RUBY", 0, BODY, new short[]{RUBY}),
+        };
+        ELEMENTS_ARRAY['S'-'A'] = new Element[] {
+            // S
+            new Element(S, "S", 0, BODY, null),
+            // SAMP - - (%inline;)*
+            new Element(SAMP, "SAMP", Element.INLINE, BODY, null),
+            // SCRIPT - - %Script;
+            new Element(SCRIPT, "SCRIPT", Element.SPECIAL, new short[]{HEAD,BODY}, null),
+            // SELECT - - (OPTGROUP|OPTION)+
+            new Element(SELECT, "SELECT", 0, BODY, new short[]{SELECT}),
+            // SMALL - - (%inline;)*
+            new Element(SMALL, "SMALL", Element.INLINE, BODY, null),
+            // SOUND
+            new Element(SOUND, "SOUND", Element.EMPTY, HEAD, null),
+            // SPACER
+            new Element(SPACER, "SPACER", Element.EMPTY, BODY, null),
+            // SPAN - - (%inline;)*
+            new Element(SPAN, "SPAN", Element.CONTAINER, BODY, null),
+            // STRIKE
+            new Element(STRIKE, "STRIKE", Element.INLINE, BODY, null),
+            // STRONG - - (%inline;)*
+            new Element(STRONG, "STRONG", Element.INLINE, BODY, null),
+            // STYLE - - %StyleSheet;
+            new Element(STYLE, "STYLE", Element.SPECIAL, new short[]{HEAD,BODY}, new short[]{STYLE,TITLE,META}),
+            // SUB - - (%inline;)*
+            new Element(SUB, "SUB", Element.INLINE, BODY, null),
+            // SUP - - (%inline;)*
+            new Element(SUP, "SUP", Element.INLINE, BODY, null),
+        };
+        ELEMENTS_ARRAY['T'-'A'] = new Element[] {
+            // TABLE - - (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
+            new Element(TABLE, "TABLE", Element.BLOCK|Element.CONTAINER, BODY, null),
+            // TBODY O O (TR)+
+            new Element(TBODY, "TBODY", 0, TABLE, new short[]{THEAD,TD,TH,TR,COLGROUP}),
+            // TD - O (%flow;)*
+            new Element(TD, "TD", 0, TR, TABLE, new short[]{TD,TH}),
+            // TEXTAREA - - (#PCDATA)
+            new Element(TEXTAREA, "TEXTAREA", Element.SPECIAL, BODY, null),
+            // TFOOT - O (TR)+
+            new Element(TFOOT, "TFOOT", 0, TABLE, new short[]{THEAD,TBODY,TD,TH,TR}),
+            // TH - O (%flow;)*
+            new Element(TH, "TH", 0, TR, TABLE, new short[]{TD,TH}),
+            // THEAD - O (TR)+
+            new Element(THEAD, "THEAD", 0, TABLE, new short[]{COLGROUP}),
+            // TITLE - - (#PCDATA) -(%head.misc;)
+            new Element(TITLE, "TITLE", 0, new short[]{HEAD,BODY}, null),
+            // TR - O (TH|TD)+
+            new Element(TR, "TR", Element.BLOCK, TABLE, new short[]{TD,TH,TR,COLGROUP}),
+            // TT - - (%inline;)*
+            new Element(TT, "TT", Element.INLINE, BODY, null),
+        };
+        ELEMENTS_ARRAY['U'-'A'] = new Element[] {
+            // U, 
+            new Element(U, "U", Element.INLINE, BODY, null),
+            // UL - - (LI)+
+            new Element(UL, "UL", Element.BLOCK, BODY, null),
+        };
+        ELEMENTS_ARRAY['V'-'A'] = new Element[] {
+            // VAR - - (%inline;)*
+            new Element(VAR, "VAR", Element.INLINE, BODY, null),
+        };
+        ELEMENTS_ARRAY['W'-'A'] = new Element[] {
+            // WBR
+            new Element(WBR, "WBR", Element.EMPTY, BODY, null),
+        };
+        ELEMENTS_ARRAY['X'-'A'] = new Element[] {
+            // XML
+            new Element(XML, "XML", 0, BODY, null),
+            // XMP
+            new Element(XMP, "XMP", Element.SPECIAL, BODY, null),
+        };
+
+        // keep contiguous list of elements for lookups by code
+        for (int i = 0; i < ELEMENTS_ARRAY.length; i++) {
+            Element[] elements = ELEMENTS_ARRAY[i];
+            if (elements != null) {
+                for (int j = 0; j < elements.length; j++) {
+                    Element element = elements[j];
+                    ELEMENTS.addElement(element);
+                }
+            }
+        }
+        ELEMENTS.addElement(NO_SUCH_ELEMENT);
+
+        // initialize cross references to parent elements
+        for (int i = 0; i < ELEMENTS.size; i++) {
+            Element element = ELEMENTS.data[i];
+            if (element.parentCodes != null) {
+                element.parent = new Element[element.parentCodes.length];
+                for (int j = 0; j < element.parentCodes.length; j++) {
+                    element.parent[j] = ELEMENTS.data[element.parentCodes[j]];
+                }
+                element.parentCodes = null;
+            }
+        }
+
+    } // <clinit>()
+
+    //
+    // Public static methods
+    //
+
+    /**
+     * Returns the element information for the specified element code.
+     *
+     * @param code The element code.
+     */
+    public static final Element getElement(short code) {
+        return ELEMENTS.data[code];
+    } // getElement(short):Element
+
+    /**
+     * Returns the element information for the specified element name.
+     *
+     * @param ename The element name.
+     */
+    public static final Element getElement(String ename) {
+        return getElement(ename, NO_SUCH_ELEMENT);
+    } // getElement(String):Element
+
+    /**
+     * Returns the element information for the specified element name.
+     *
+     * @param ename The element name.
+     * @param element The default element to return if not found.
+     */
+    public static final Element getElement(String ename, Element element) {
+
+        if (ename.length() > 0) {
+            int c = ename.charAt(0);
+            if (c >= 'a' && c <= 'z') {
+                c = 'A' + c - 'a';
+            }
+            if (c >= 'A' && c <= 'Z') {
+                Element[] elements = ELEMENTS_ARRAY[c - 'A'];
+                if (elements != null) {
+                    for (int i = 0; i < elements.length; i++) {
+                        Element elem = elements[i];
+                        if (elem.name.equalsIgnoreCase(ename)) {
+                            return elem;
+                        }
+                    }
+                }
+            }
+        }
+        return element;
+
+    } // getElement(String):Element
+
+    //
+    // Classes
+    //
+
+    /**
+     * Element information.
+     *
+     * @author Andy Clark
+     */
+    public static class Element {
+
+        //
+        // Constants
+        //
+
+        /** Inline element. */
+        public static final int INLINE = 0x01;
+
+        /** Block element. */
+        public static final int BLOCK = 0x02;
+
+        /** Empty element. */
+        public static final int EMPTY = 0x04;
+
+        /** Container element. */
+        public static final int CONTAINER = 0x08;
+
+        /** Special element. */
+        public static final int SPECIAL = 0x10;
+
+        /** Empty array. */
+        private static final short[] EMPTY_ARRAY = new short[0];
+
+        //
+        // Data
+        //
+
+        /** The element code. */
+        public short code;
+
+        /** The element name. */
+        public String name;
+
+        /** Informational flags. */
+        public int flags;
+
+        /** Parent elements. */
+        public short[] parentCodes;
+
+        /** Parent elements. */
+        public Element[] parent;
+
+        /** The bounding element code. */
+        public short bounds;
+
+        /** List of elements this element can close. */
+        public short[] closes;
+
+        //
+        // Constructors
+        //
+
+        /** 
+         * Constructs an element object.
+         *
+         * @param code The element code.
+         * @param name The element name.
+         * @param flags Informational flags
+         * @param parent Natural closing parent name.
+         * @param closes List of elements this element can close.
+         */
+        public Element(short code, String name, int flags, 
+                       short parent, short[] closes) {
+            this(code, name, flags, new short[]{parent}, (short)-1, closes);
+        } // <init>(short,String,int,short,short[]);
+
+        /** 
+         * Constructs an element object.
+         *
+         * @param code The element code.
+         * @param name The element name.
+         * @param flags Informational flags
+         * @param parent Natural closing parent name.
+         * @param closes List of elements this element can close.
+         */
+        public Element(short code, String name, int flags, 
+                       short parent, short bounds, short[] closes) {
+            this(code, name, flags, new short[]{parent}, bounds, closes);
+        } // <init>(short,String,int,short,short,short[])
+
+        /** 
+         * Constructs an element object.
+         *
+         * @param code The element code.
+         * @param name The element name.
+         * @param flags Informational flags
+         * @param parents Natural closing parent names.
+         * @param closes List of elements this element can close.
+         */
+        public Element(short code, String name, int flags, 
+                       short[] parents, short[] closes) {
+            this(code, name, flags, parents, (short)-1, closes);
+        } // <init>(short,String,int,short[],short[])
+
+        /** 
+         * Constructs an element object.
+         *
+         * @param code The element code.
+         * @param name The element name.
+         * @param flags Informational flags
+         * @param parents Natural closing parent names.
+         * @param closes List of elements this element can close.
+         */
+        public Element(short code, String name, int flags, 
+                       short[] parents, short bounds, short[] closes) {
+            this.code = code;
+            this.name = name;
+            this.flags = flags;
+            this.parentCodes = parents;
+            this.parent = null;
+            this.bounds = bounds;
+            this.closes = closes;
+        } // <init>(short,String,int,short[],short,short[])
+
+        //
+        // Public methods
+        //
+
+        /** Returns true if this element is an inline element. */
+        public final boolean isInline() {
+            return (flags & INLINE) != 0;
+        } // isInline():boolean
+
+        /** Returns true if this element is a block element. */
+        public final boolean isBlock() {
+            return (flags & BLOCK) != 0;
+        } // isBlock():boolean
+
+        /** Returns true if this element is an empty element. */
+        public final boolean isEmpty() {
+            return (flags & EMPTY) != 0;
+        } // isEmpty():boolean
+
+        /** Returns true if this element is a container element. */
+        public final boolean isContainer() {
+            return (flags & CONTAINER) != 0;
+        } // isContainer():boolean
+
+        /** 
+         * Returns true if this element is special -- if its content
+         * should be parsed ignoring markup.
+         */
+        public final boolean isSpecial() {
+            return (flags & SPECIAL) != 0;
+        } // isSpecial():boolean
+
+        /**
+         * Returns true if this element can close the specified Element.
+         *
+         * @param tag The element.
+         */
+        public boolean closes(short tag) {
+
+            if (closes != null) {
+                for (int i = 0; i < closes.length; i++) {
+                    if (closes[i] == tag) {
+                        return true;
+                    }
+                }
+            }
+            return false;
+
+        } // closes(short):boolean
+
+        //
+        // Object methods
+        //
+
+        /** Returns a hash code for this object. */
+        public int hashCode() {
+            return name.hashCode();
+        } // hashCode():int
+
+        /** Returns true if the objects are equal. */
+        public boolean equals(Object o) {
+            return name.equals(o);
+        } // equals(Object):boolean
+
+    } // class Element
+
+    /** Unsynchronized list of elements. */
+    public static class ElementList {
+
+        //
+        // Data
+        //
+
+        /** The size of the list. */
+        public int size;
+
+        /** The data in the list. */
+        public Element[] data = new Element[120];
+
+        //
+        // Public methods
+        //
+
+        /** Adds an element to list, resizing if necessary. */
+        public void addElement(Element element) {
+            if (size == data.length) {
+                Element[] newarray = new Element[size + 20];
+                System.arraycopy(data, 0, newarray, 0, size);
+                data = newarray;
+            }
+            data[size++] = element;
+        } // addElement(Element)
+
+    } // class Element
+
+} // class HTMLElements

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLEntities.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLEntities.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLEntities.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,126 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html;
+
+import java.io.IOException;
+import java.util.Enumeration;
+import java.util.Properties;
+                           
+/**
+ * Pre-defined HTML entities.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLEntities.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
+ */
+public class HTMLEntities {
+
+    //
+    // Constants
+    //
+
+    /** Entities. */
+    protected static final Properties ENTITIES = new Properties();
+
+    /** Reverse mapping from characters to names. */
+    protected static final IntProperties SEITITNE = new IntProperties();
+
+    //
+    // Static initialization
+    //
+
+    static {
+        // load entities
+        load0("res/HTMLlat1.properties");
+        load0("res/HTMLspecial.properties");
+        load0("res/HTMLsymbol.properties");
+        load0("res/XMLbuiltin.properties");
+
+        // store reverse mappings
+        Enumeration keys = ENTITIES.propertyNames();
+        while (keys.hasMoreElements()) {
+            String key = (String)keys.nextElement();
+            String value = ENTITIES.getProperty(key);
+            if (value.length() == 1) {
+                int ivalue = value.charAt(0);
+                SEITITNE.put(ivalue, key);
+            }
+        }
+    }
+
+    //
+    // Public static methods
+    //
+
+    /** 
+     * Returns the character associated to the given entity name, or
+     * -1 if the name is not known.
+     */
+    public static int get(String name) {
+        String value = (String)ENTITIES.get(name);
+        return value != null ? value.charAt(0) : -1;
+    } // get(String):char
+
+    /** 
+     * Returns the name associated to the given character or null if
+     * the character is not known.
+     */
+    public static String get(int c) {
+        return SEITITNE.get(c);
+    } // get(int):String
+
+    //
+    // Private static methods
+    //
+
+    /** Loads the entity values in the specified resource. */
+    private static void load0(String filename) {
+        try {
+            ENTITIES.load(HTMLEntities.class.getResourceAsStream(filename));
+        }
+        catch (IOException e) {
+            System.err.println("error: unable to load resource \""+filename+"\"");
+        }
+    } // load0(String)
+
+    //
+    // Classes
+    //
+
+    static class IntProperties {
+        private int top = 0;
+        private Entry[] entries = new Entry[101];
+        public void put(int key, String value) {
+            int hash = key % entries.length;
+            Entry entry = new Entry(key, value, entries[hash]);
+            entries[hash] = entry;
+        }
+        public String get(int key) {
+            int hash = key % entries.length;
+            Entry entry = entries[hash];
+            while (entry != null) {
+                if (entry.key == key) {
+                    return entry.value;
+                }
+                entry = entry.next;
+            }
+            return null;
+        }
+        static class Entry {
+            public int key;
+            public String value;
+            public Entry next;
+            public Entry(int key, String value, Entry next) {
+                this.key = key;
+                this.value = value;
+                this.next = next;
+            }
+        }
+    }
+
+} // class HTMLEntities

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLErrorReporter.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLErrorReporter.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLErrorReporter.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,47 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html;
+
+import org.apache.xerces.xni.parser.XMLParseException;
+
+/**
+ * Defines an error reporter for reporting HTML errors. There is no such 
+ * thing as a fatal error in parsing HTML. I/O errors are fatal but should 
+ * throw an <code>IOException</code> directly instead of reporting an error.
+ * <p>
+ * When used in a configuration, the error reporter instance should be
+ * set as a property with the following property identifier:
+ * <pre>
+ * "http://cyberneko.org/html/internal/error-reporter" in the
+ * </pre>
+ * Components in the configuration can query the error reporter using this
+ * property identifier.
+ * <p>
+ * <strong>Note:</strong>
+ * All reported errors are within the domain "http://cyberneko.org/html". 
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLErrorReporter.java,v 1.4 2005/02/14 03:56:54 andyc Exp $
+ */
+public interface HTMLErrorReporter {
+    
+    //
+    // HTMLErrorReporter methods
+    //
+
+    /** Format message without reporting error. */
+    public String formatMessage(String key, Object[] args);
+
+    /** Reports a warning. */
+    public void reportWarning(String key, Object[] args) throws XMLParseException;
+
+    /** Reports an error. */
+    public void reportError(String key, Object[] args) throws XMLParseException;
+
+} // interface HTMLErrorReporter

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLEventInfo.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLEventInfo.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLEventInfo.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,95 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html;
+
+/**
+ * This interface is used to pass augmentated information to the
+ * application through the XNI pipeline.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLEventInfo.java,v 1.4 2005/02/14 03:56:54 andyc Exp $
+ */
+public interface HTMLEventInfo {
+
+    //
+    // HTMLEventInfo methods
+    //
+
+    // location information
+
+    /** Returns the line number of the beginning of this event.*/
+    public int getBeginLineNumber();
+
+    /** Returns the column number of the beginning of this event.*/
+    public int getBeginColumnNumber();
+
+    /** Returns the line number of the end of this event.*/
+    public int getEndLineNumber();
+
+    /** Returns the column number of the end of this event.*/
+    public int getEndColumnNumber();
+
+    // other information
+
+    /** Returns true if this corresponding event was synthesized. */
+    public boolean isSynthesized();
+
+    /**
+     * Synthesized infoset item.
+     *
+     * @author Andy Clark
+     */
+    public static class SynthesizedItem
+        implements HTMLEventInfo {
+
+        //
+        // HTMLEventInfo methods
+        //
+
+        // location information
+
+        /** Returns the line number of the beginning of this event.*/
+        public int getBeginLineNumber() {
+            return -1;
+        } // getBeginLineNumber():int
+
+        /** Returns the column number of the beginning of this event.*/
+        public int getBeginColumnNumber() { 
+            return -1;
+        } // getBeginColumnNumber():int
+
+        /** Returns the line number of the end of this event.*/
+        public int getEndLineNumber() {
+            return -1;
+        } // getEndLineNumber():int
+
+        /** Returns the column number of the end of this event.*/
+        public int getEndColumnNumber() {
+            return -1;
+        } // getEndColumnNumber():int
+
+        // other information
+
+        /** Returns true if this corresponding event was synthesized. */
+        public boolean isSynthesized() {
+            return true;
+        } // isSynthesized():boolean
+
+        //
+        // Object methods
+        //
+
+        /** Returns a string representation of this object. */
+        public String toString() {
+            return "synthesized";
+        } // toString():String
+
+    } // class SynthesizedItem
+
+} // interface HTMLEventInfo

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLScanner.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLScanner.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLScanner.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,3277 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ *
+ * NOTE: The URI fixing code in this source was taken from the Apache
+ *       Xerces parser which is distributed under the Apache license.
+ *       Refer to the LICENSE_apache file for details.
+ */
+
+package org.cyberneko.html;
+
+import java.io.EOFException;
+import java.io.FileInputStream;
+import java.io.FilterInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.net.URL;
+import java.util.Stack;
+
+import org.apache.xerces.util.EncodingMap;
+import org.apache.xerces.util.NamespaceSupport;
+import org.apache.xerces.util.URI;
+import org.apache.xerces.util.XMLAttributesImpl;
+import org.apache.xerces.util.XMLResourceIdentifierImpl;
+import org.apache.xerces.util.XMLStringBuffer;
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLDocumentHandler;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLResourceIdentifier;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLComponentManager;
+import org.apache.xerces.xni.parser.XMLConfigurationException;
+import org.apache.xerces.xni.parser.XMLDocumentScanner;
+import org.apache.xerces.xni.parser.XMLInputSource;
+
+/**
+ * A simple HTML scanner. This scanner makes no attempt to balance tags
+ * or fix other problems in the source document &mdash; it just scans what 
+ * it can and generates XNI document "events", ignoring errors of all 
+ * kinds.
+ * <p>
+ * This component recognizes the following features:
+ * <ul>
+ * <li>http://cyberneko.org/html/features/augmentations
+ * <li>http://cyberneko.org/html/features/report-errors
+ * <li>http://apache.org/xml/features/scanner/notify-char-refs
+ * <li>http://apache.org/xml/features/scanner/notify-builtin-refs
+ * <li>http://cyberneko.org/html/features/scanner/notify-builtin-refs
+ * <li>http://cyberneko.org/html/features/scanner/fix-mswindows-refs
+ * <li>http://cyberneko.org/html/features/scanner/script/strip-cdata-delims
+ * <li>http://cyberneko.org/html/features/scanner/script/strip-comment-delims
+ * <li>http://cyberneko.org/html/features/scanner/style/strip-cdata-delims
+ * <li>http://cyberneko.org/html/features/scanner/style/strip-comment-delims
+ * <li>http://cyberneko.org/html/features/scanner/ignore-specified-charset
+ * <li>http://cyberneko.org/html/features/scanner/cdata-sections
+ * <li>http://cyberneko.org/html/features/override-doctype
+ * <li>http://cyberneko.org/html/features/insert-doctype
+ * </ul>
+ * <p>
+ * This component recognizes the following properties:
+ * <ul>
+ * <li>http://cyberneko.org/html/properties/names/elems
+ * <li>http://cyberneko.org/html/properties/names/attrs
+ * <li>http://cyberneko.org/html/properties/default-encoding
+ * <li>http://cyberneko.org/html/properties/error-reporter
+ * <li>http://cyberneko.org/html/properties/doctype/pubid
+ * <li>http://cyberneko.org/html/properties/doctype/sysid
+ * </ul>
+ *
+ * @see HTMLElements
+ * @see HTMLEntities
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLScanner.java,v 1.19 2005/06/14 05:52:37 andyc Exp $
+ */
+public class HTMLScanner 
+    implements XMLDocumentScanner, XMLLocator, HTMLComponent {
+
+    //
+    // Constants
+    //
+
+    // doctype info: HTML 4.01 strict
+
+    /** HTML 4.01 strict public identifier ("-//W3C//DTD HTML 4.01//EN"). */
+    public static final String HTML_4_01_STRICT_PUBID = "-//W3C//DTD HTML 4.01//EN";
+
+    /** HTML 4.01 strict system identifier ("http://www.w3.org/TR/html4/strict.dtd"). */
+    public static final String HTML_4_01_STRICT_SYSID = "http://www.w3.org/TR/html4/strict.dtd";
+
+    // doctype info: HTML 4.01 loose
+
+    /** HTML 4.01 transitional public identifier ("-//W3C//DTD HTML 4.01 Transitional//EN"). */
+    public static final String HTML_4_01_TRANSITIONAL_PUBID = "-//W3C//DTD HTML 4.01 Transitional//EN";
+
+    /** HTML 4.01 transitional system identifier ("http://www.w3.org/TR/html4/loose.dtd"). */
+    public static final String HTML_4_01_TRANSITIONAL_SYSID = "http://www.w3.org/TR/html4/loose.dtd";
+
+    // doctype info: HTML 4.01 frameset
+
+    /** HTML 4.01 frameset public identifier ("-//W3C//DTD HTML 4.01 Frameset//EN"). */
+    public static final String HTML_4_01_FRAMESET_PUBID = "-//W3C//DTD HTML 4.01 Frameset//EN";
+
+    /** HTML 4.01 frameset system identifier ("http://www.w3.org/TR/html4/frameset.dtd"). */
+    public static final String HTML_4_01_FRAMESET_SYSID = "http://www.w3.org/TR/html4/frameset.dtd";
+
+    // features
+
+    /** Include infoset augmentations. */
+    protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
+
+    /** Report errors. */
+    protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
+
+    /** Notify character entity references (e.g. &amp;#32;, &amp;#x20;, etc). */
+    public static final String NOTIFY_CHAR_REFS = "http://apache.org/xml/features/scanner/notify-char-refs";
+
+    /** 
+     * Notify handler of built-in entity references (e.g. &amp;amp;, 
+     * &amp;lt;, etc).
+     * <p>
+     * <strong>Note:</strong>
+     * This only applies to the five pre-defined XML general entities.
+     * Specifically, "amp", "lt", "gt", "quot", and "apos". This is done 
+     * for compatibility with the Xerces feature.
+     * <p>
+     * To be notified of the built-in entity references in HTML, set the 
+     * <code>http://cyberneko.org/html/features/scanner/notify-builtin-refs</code> 
+     * feature to <code>true</code>.
+     */
+    public static final String NOTIFY_XML_BUILTIN_REFS = "http://apache.org/xml/features/scanner/notify-builtin-refs";
+
+    /** 
+     * Notify handler of built-in entity references (e.g. &amp;nobr;, 
+     * &amp;copy;, etc).
+     * <p>
+     * <strong>Note:</strong>
+     * This <em>includes</em> the five pre-defined XML general entities.
+     */
+    public static final String NOTIFY_HTML_BUILTIN_REFS = "http://cyberneko.org/html/features/scanner/notify-builtin-refs";
+
+    /** Fix Microsoft Windows&reg; character entity references. */
+    public static final String FIX_MSWINDOWS_REFS = "http://cyberneko.org/html/features/scanner/fix-mswindows-refs";
+
+    /** 
+     * Strip HTML comment delimiters ("&lt;!&minus;&minus;" and 
+     * "&minus;&minus;&gt;") from SCRIPT tag contents.
+     */
+    public static final String SCRIPT_STRIP_COMMENT_DELIMS = "http://cyberneko.org/html/features/scanner/script/strip-comment-delims";
+
+    /** 
+     * Strip XHTML CDATA delimiters ("&lt;![CDATA[" and "]]&gt;") from 
+     * SCRIPT tag contents.
+     */
+    public static final String SCRIPT_STRIP_CDATA_DELIMS = "http://cyberneko.org/html/features/scanner/script/strip-cdata-delims";
+
+    /** 
+     * Strip HTML comment delimiters ("&lt;!&minus;&minus;" and 
+     * "&minus;&minus;&gt;") from STYLE tag contents.
+     */
+    public static final String STYLE_STRIP_COMMENT_DELIMS = "http://cyberneko.org/html/features/scanner/style/strip-comment-delims";
+
+    /** 
+     * Strip XHTML CDATA delimiters ("&lt;![CDATA[" and "]]&gt;") from 
+     * STYLE tag contents.
+     */
+    public static final String STYLE_STRIP_CDATA_DELIMS = "http://cyberneko.org/html/features/scanner/style/strip-cdata-delims";
+
+    /**
+     * Ignore specified charset found in the &lt;meta equiv='Content-Type'
+     * content='text/html;charset=&hellip;'&gt; tag.
+     */
+    public static final String IGNORE_SPECIFIED_CHARSET = "http://cyberneko.org/html/features/scanner/ignore-specified-charset";
+
+    /** Scan CDATA sections. */
+    public static final String CDATA_SECTIONS = "http://cyberneko.org/html/features/scanner/cdata-sections";
+
+    /** Override doctype declaration public and system identifiers. */
+    public static final String OVERRIDE_DOCTYPE = "http://cyberneko.org/html/features/override-doctype";
+
+    /** Insert document type declaration. */
+    public static final String INSERT_DOCTYPE = "http://cyberneko.org/html/features/insert-doctype";
+
+    /** Recognized features. */
+    private static final String[] RECOGNIZED_FEATURES = {
+        AUGMENTATIONS,
+        REPORT_ERRORS,
+        NOTIFY_CHAR_REFS,
+        NOTIFY_XML_BUILTIN_REFS,
+        NOTIFY_HTML_BUILTIN_REFS,
+        FIX_MSWINDOWS_REFS,
+        SCRIPT_STRIP_CDATA_DELIMS,
+        SCRIPT_STRIP_COMMENT_DELIMS,
+        STYLE_STRIP_CDATA_DELIMS,
+        STYLE_STRIP_COMMENT_DELIMS,
+        IGNORE_SPECIFIED_CHARSET,
+        CDATA_SECTIONS,
+        OVERRIDE_DOCTYPE,
+        INSERT_DOCTYPE,
+    };
+
+    /** Recognized features defaults. */
+    private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = {
+        null,
+        null,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+        Boolean.FALSE,
+    };
+
+    // properties
+
+    /** Modify HTML element names: { "upper", "lower", "default" }. */
+    protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
+
+    /** Modify HTML attribute names: { "upper", "lower", "default" }. */
+    protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
+    
+    /** Default encoding. */
+    protected static final String DEFAULT_ENCODING = "http://cyberneko.org/html/properties/default-encoding";
+    
+    /** Error reporter. */
+    protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
+
+    /** Doctype declaration public identifier. */
+    protected static final String DOCTYPE_PUBID = "http://cyberneko.org/html/properties/doctype/pubid";
+
+    /** Doctype declaration system identifier. */
+    protected static final String DOCTYPE_SYSID = "http://cyberneko.org/html/properties/doctype/sysid";
+
+    /** Recognized properties. */
+    private static final String[] RECOGNIZED_PROPERTIES = {
+        NAMES_ELEMS,
+        NAMES_ATTRS,
+        DEFAULT_ENCODING,
+        ERROR_REPORTER,
+        DOCTYPE_PUBID,
+        DOCTYPE_SYSID,
+    };
+
+    /** Recognized properties defaults. */
+    private static final Object[] RECOGNIZED_PROPERTIES_DEFAULTS = {
+        null,
+        null,
+        "Windows-1252",
+        null,
+        HTML_4_01_TRANSITIONAL_PUBID,
+        HTML_4_01_TRANSITIONAL_SYSID,
+    };
+
+    // states
+
+    /** State: content. */
+    protected static final short STATE_CONTENT = 0;
+
+    /** State: markup bracket. */
+    protected static final short STATE_MARKUP_BRACKET = 1;
+
+    /** State: start document. */
+    protected static final short STATE_START_DOCUMENT = 10;
+
+    /** State: end document. */
+    protected static final short STATE_END_DOCUMENT = 11;
+
+    // modify HTML names
+
+    /** Don't modify HTML names. */
+    protected static final short NAMES_NO_CHANGE = 0;
+
+    /** Uppercase HTML names. */
+    protected static final short NAMES_UPPERCASE = 1;
+
+    /** Lowercase HTML names. */
+    protected static final short NAMES_LOWERCASE = 2;
+
+    // defaults
+
+    /** Default buffer size. */
+    protected static final int DEFAULT_BUFFER_SIZE = 2048;
+
+    // debugging
+
+    /** Set to true to debug changes in the scanner. */
+    private static final boolean DEBUG_SCANNER = false;
+
+    /** Set to true to debug changes in the scanner state. */
+    private static final boolean DEBUG_SCANNER_STATE = false;
+
+    /** Set to true to debug the buffer. */
+    private static final boolean DEBUG_BUFFER = false;
+
+    /** Set to true to debug character encoding handling. */
+    private static final boolean DEBUG_CHARSET = false;
+
+    /** Set to true to debug callbacks. */
+    protected static final boolean DEBUG_CALLBACKS = false;
+
+    // static vars
+
+    /** Synthesized event info item. */
+    protected static final HTMLEventInfo SYNTHESIZED_ITEM = 
+        new HTMLEventInfo.SynthesizedItem();
+
+    //
+    // Data
+    //
+
+    // features
+
+    /** Augmentations. */
+    protected boolean fAugmentations;
+
+    /** Report errors. */
+    protected boolean fReportErrors;
+
+    /** Notify character entity references. */
+    protected boolean fNotifyCharRefs;
+
+    /** Notify XML built-in general entity references. */
+    protected boolean fNotifyXmlBuiltinRefs;
+
+    /** Notify HTML built-in general entity references. */
+    protected boolean fNotifyHtmlBuiltinRefs;
+
+    /** Fix Microsoft Windows&reg; character entity references. */
+    protected boolean fFixWindowsCharRefs;
+
+    /** Strip CDATA delimiters from SCRIPT tags. */
+    protected boolean fScriptStripCDATADelims;
+
+    /** Strip comment delimiters from SCRIPT tags. */
+    protected boolean fScriptStripCommentDelims;
+
+    /** Strip CDATA delimiters from STYLE tags. */
+    protected boolean fStyleStripCDATADelims;
+
+    /** Strip comment delimiters from STYLE tags. */
+    protected boolean fStyleStripCommentDelims;
+
+    /** Ignore specified character set. */
+    protected boolean fIgnoreSpecifiedCharset;
+
+    /** CDATA sections. */
+    protected boolean fCDATASections;
+
+    /** Override doctype declaration public and system identifiers. */
+    protected boolean fOverrideDoctype;
+
+    /** Insert document type declaration. */
+    protected boolean fInsertDoctype;
+
+    // properties
+
+    /** Modify HTML element names. */
+    protected short fNamesElems;
+
+    /** Modify HTML attribute names. */
+    protected short fNamesAttrs;
+
+    /** Default encoding. */
+    protected String fDefaultIANAEncoding;
+
+    /** Error reporter. */
+    protected HTMLErrorReporter fErrorReporter;
+
+    /** Doctype declaration public identifier. */
+    protected String fDoctypePubid;
+
+    /** Doctype declaration system identifier. */
+    protected String fDoctypeSysid;
+
+    // boundary locator information
+
+    /** Beginning line number. */
+    protected int fBeginLineNumber;
+
+    /** Beginning column number. */
+    protected int fBeginColumnNumber;
+
+    /** Ending line number. */
+    protected int fEndLineNumber;
+
+    /** Ending column number. */
+    protected int fEndColumnNumber;
+
+    // state
+
+    /** The playback byte stream. */
+    protected PlaybackInputStream fByteStream;
+
+    /** Current entity. */
+    protected CurrentEntity fCurrentEntity;
+    
+    /** The current entity stack. */
+    protected final Stack fCurrentEntityStack = new Stack();
+
+    /** The current scanner. */
+    protected Scanner fScanner;
+
+    /** The current scanner state. */
+    protected short fScannerState;
+
+    /** The document handler. */
+    protected XMLDocumentHandler fDocumentHandler;
+
+    /** Auto-detected IANA encoding. */
+    protected String fIANAEncoding;
+
+    /** Auto-detected Java encoding. */
+    protected String fJavaEncoding;
+
+    /** True if the encoding matches "ISO-8859-*". */
+    protected boolean fIso8859Encoding;
+
+    /** Element count. */
+    protected int fElementCount;
+
+    /** Element depth. */
+    protected int fElementDepth;
+
+    // scanners
+
+    /** Content scanner. */
+    protected Scanner fContentScanner = new ContentScanner();
+
+    /** 
+     * Special scanner used for elements whose content needs to be scanned 
+     * as plain text, ignoring markup such as elements and entity references.
+     * For example: &lt;SCRIPT&gt; and &lt;COMMENT&gt;.
+     */
+    protected SpecialScanner fSpecialScanner = new SpecialScanner();
+
+    // temp vars
+
+    /** String. */
+    protected final XMLString fString = new XMLString();
+
+    /** String buffer. */
+    protected final XMLStringBuffer fStringBuffer = new XMLStringBuffer(1024);
+
+    /** String buffer. */
+    private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(1024);
+
+    /** Non-normalized attribute string buffer. */
+    private final XMLStringBuffer fNonNormAttr = new XMLStringBuffer(128);
+
+    /** Augmentations. */
+    private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations();
+
+    /** Location infoset item. */
+    private final LocationItem fLocationItem = new LocationItem();
+
+    /** Single boolean array. */
+    private final boolean[] fSingleBoolean = { false };
+
+    /** Resource identifier. */
+    private final XMLResourceIdentifierImpl fResourceId = new XMLResourceIdentifierImpl();
+
+    //
+    // Public methods
+    //
+
+    /** 
+     * Pushes an input source onto the current entity stack. This 
+     * enables the scanner to transparently scan new content (e.g. 
+     * the output written by an embedded script). At the end of the
+     * current entity, the scanner returns where it left off at the
+     * time this entity source was pushed.
+     * <p>
+     * <strong>Note:</strong>
+     * This functionality is experimental at this time and is
+     * subject to change in future releases of NekoHTML.
+     *
+     * @param inputSource The new input source to start scanning.
+     */
+    public void pushInputSource(XMLInputSource inputSource) {
+        Reader reader = inputSource.getCharacterStream();
+        if (reader == null) {
+            throw new IllegalArgumentException("pushed input source has no reader");
+        }
+        fCurrentEntityStack.push(fCurrentEntity);
+        String encoding = inputSource.getEncoding();
+        String publicId = inputSource.getPublicId();
+        String baseSystemId = inputSource.getBaseSystemId();
+        String literalSystemId = inputSource.getSystemId();
+        String expandedSystemId = expandSystemId(literalSystemId, baseSystemId);
+        fCurrentEntity = new CurrentEntity(reader, encoding, 
+                                           publicId, baseSystemId,
+                                           literalSystemId, expandedSystemId);
+    } // pushInputSource(XMLInputSource)
+
+    /**
+     * Cleans up used resources. For example, if scanning is terminated
+     * early, then this method ensures all remaining open streams are
+     * closed.
+     *
+     * @param closeall Close all streams, including the original.
+     *                 This is used in cases when the application has
+     *                 opened the original document stream and should
+     *                 be responsible for closing it.
+     */
+    public void cleanup(boolean closeall) {
+        int size = fCurrentEntityStack.size();
+        if (size > 0) {
+            // current entity is not the original, so close it
+            if (fCurrentEntity != null) {
+                try {
+                    fCurrentEntity.stream.close();
+                }
+                catch (IOException e) {
+                    // ignore
+                }
+            }
+            // close remaining streams
+            for (int i = closeall ? 0 : 1; i < size; i++) {
+                fCurrentEntity = (CurrentEntity)fCurrentEntityStack.pop();
+                try {
+                    fCurrentEntity.stream.close();
+                }
+                catch (IOException e) {
+                    // ignore
+                }
+            }
+        }
+        else if (closeall && fCurrentEntity != null) {
+            try {
+                fCurrentEntity.stream.close();
+            }
+            catch (IOException e) {
+                // ignore
+            }
+        }
+    } // cleanup(boolean)
+
+    //
+    // XMLLocator methods
+    //
+
+    /** Returns the encoding. */
+    public String getEncoding() {
+        return fCurrentEntity != null ? fCurrentEntity.encoding : null;
+    } // getEncoding():String
+
+    /** Returns the public identifier. */
+    public String getPublicId() { 
+        return fCurrentEntity != null ? fCurrentEntity.publicId : null; 
+    } // getPublicId():String
+
+    /** Returns the base system identifier. */
+    public String getBaseSystemId() { 
+        return fCurrentEntity != null ? fCurrentEntity.baseSystemId : null; 
+    } // getBaseSystemId():String
+
+    /** Returns the literal system identifier. */
+    public String getLiteralSystemId() { 
+        return fCurrentEntity != null ? fCurrentEntity.literalSystemId : null; 
+    } // getLiteralSystemId():String
+
+    /** Returns the expanded system identifier. */
+    public String getExpandedSystemId() { 
+        return fCurrentEntity != null ? fCurrentEntity.expandedSystemId : null; 
+    } // getExpandedSystemId():String
+
+    /** Returns the current line number. */
+    public int getLineNumber() { 
+        return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1; 
+    } // getLineNumber():int
+
+    /** Returns the current column number. */
+    public int getColumnNumber() { 
+        return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1; 
+    } // getColumnNumber():int
+
+    //
+    // HTMLComponent methods
+    //
+
+    /** Returns the default state for a feature. */
+    public Boolean getFeatureDefault(String featureId) {
+        int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0;
+        for (int i = 0; i < length; i++) {
+            if (RECOGNIZED_FEATURES[i].equals(featureId)) {
+                return RECOGNIZED_FEATURES_DEFAULTS[i];
+            }
+        }
+        return null;
+    } // getFeatureDefault(String):Boolean
+
+    /** Returns the default state for a property. */
+    public Object getPropertyDefault(String propertyId) {
+        int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0;
+        for (int i = 0; i < length; i++) {
+            if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
+                return RECOGNIZED_PROPERTIES_DEFAULTS[i];
+            }
+        }
+        return null;
+    } // getPropertyDefault(String):Object
+
+    //
+    // XMLComponent methods
+    //
+
+    /** Returns recognized features. */
+    public String[] getRecognizedFeatures() {
+        return RECOGNIZED_FEATURES;
+    } // getRecognizedFeatures():String[]
+
+    /** Returns recognized properties. */
+    public String[] getRecognizedProperties() {
+        return RECOGNIZED_PROPERTIES;
+    } // getRecognizedProperties():String[]
+
+    /** Resets the component. */
+    public void reset(XMLComponentManager manager)
+        throws XMLConfigurationException {
+
+        // get features
+        fAugmentations = manager.getFeature(AUGMENTATIONS);
+        fReportErrors = manager.getFeature(REPORT_ERRORS);
+        fNotifyCharRefs = manager.getFeature(NOTIFY_CHAR_REFS);
+        fNotifyXmlBuiltinRefs = manager.getFeature(NOTIFY_XML_BUILTIN_REFS);
+        fNotifyHtmlBuiltinRefs = manager.getFeature(NOTIFY_HTML_BUILTIN_REFS);
+        fFixWindowsCharRefs = manager.getFeature(FIX_MSWINDOWS_REFS);
+        fScriptStripCDATADelims = manager.getFeature(SCRIPT_STRIP_CDATA_DELIMS);
+        fScriptStripCommentDelims = manager.getFeature(SCRIPT_STRIP_COMMENT_DELIMS);
+        fStyleStripCDATADelims = manager.getFeature(STYLE_STRIP_CDATA_DELIMS);
+        fStyleStripCommentDelims = manager.getFeature(STYLE_STRIP_COMMENT_DELIMS);
+        fIgnoreSpecifiedCharset = manager.getFeature(IGNORE_SPECIFIED_CHARSET);
+        fCDATASections = manager.getFeature(CDATA_SECTIONS);
+        fOverrideDoctype = manager.getFeature(OVERRIDE_DOCTYPE);
+        fInsertDoctype = manager.getFeature(INSERT_DOCTYPE);
+
+        // get properties
+        fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS)));
+        fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS)));
+        fDefaultIANAEncoding = String.valueOf(manager.getProperty(DEFAULT_ENCODING));
+        fErrorReporter = (HTMLErrorReporter)manager.getProperty(ERROR_REPORTER);
+        fDoctypePubid = String.valueOf(manager.getProperty(DOCTYPE_PUBID));
+        fDoctypeSysid = String.valueOf(manager.getProperty(DOCTYPE_SYSID));
+    
+    } // reset(XMLComponentManager)
+
+    /** Sets a feature. */
+    public void setFeature(String featureId, boolean state)
+        throws XMLConfigurationException {
+
+        if (featureId.equals(AUGMENTATIONS)) { 
+            fAugmentations = state; 
+        }
+        else if (featureId.equals(IGNORE_SPECIFIED_CHARSET)) { 
+            fIgnoreSpecifiedCharset = state; 
+        }
+        else if (featureId.equals(NOTIFY_CHAR_REFS)) { 
+            fNotifyCharRefs = state; 
+        }
+        else if (featureId.equals(NOTIFY_XML_BUILTIN_REFS)) { 
+            fNotifyXmlBuiltinRefs = state; 
+        }
+        else if (featureId.equals(NOTIFY_HTML_BUILTIN_REFS)) { 
+            fNotifyHtmlBuiltinRefs = state; 
+        }
+        else if (featureId.equals(FIX_MSWINDOWS_REFS)) { 
+            fFixWindowsCharRefs = state; 
+        }
+        else if (featureId.equals(SCRIPT_STRIP_CDATA_DELIMS)) { 
+            fScriptStripCDATADelims = state; 
+        }
+        else if (featureId.equals(SCRIPT_STRIP_COMMENT_DELIMS)) { 
+            fScriptStripCommentDelims = state; 
+        }
+        else if (featureId.equals(STYLE_STRIP_CDATA_DELIMS)) { 
+            fStyleStripCDATADelims = state; 
+        }
+        else if (featureId.equals(STYLE_STRIP_COMMENT_DELIMS)) { 
+            fStyleStripCommentDelims = state; 
+        }
+        else if (featureId.equals(IGNORE_SPECIFIED_CHARSET)) { 
+            fIgnoreSpecifiedCharset = state; 
+        }
+
+    } // setFeature(String,boolean)
+
+    /** Sets a property. */
+    public void setProperty(String propertyId, Object value)
+        throws XMLConfigurationException {
+    
+        if (propertyId.equals(NAMES_ELEMS)) {
+            fNamesElems = getNamesValue(String.valueOf(value));
+            return;
+        }
+
+        if (propertyId.equals(NAMES_ATTRS)) {
+            fNamesAttrs = getNamesValue(String.valueOf(value));
+            return;
+        }
+
+        if (propertyId.equals(DEFAULT_ENCODING)) {
+            fDefaultIANAEncoding = String.valueOf(value);
+            return;
+        }
+
+    } // setProperty(String,Object)
+
+    //
+    // XMLDocumentScanner methods
+    //
+
+    /** Sets the input source. */
+    public void setInputSource(XMLInputSource source) throws IOException {
+
+        // reset state
+        fElementCount = 0;
+        fElementDepth = -1;
+        fByteStream = null;
+        fCurrentEntityStack.removeAllElements();
+
+        fBeginLineNumber = 1;
+        fBeginColumnNumber = 1;
+        fEndLineNumber = fBeginLineNumber;
+        fEndColumnNumber = fBeginColumnNumber;
+
+        // reset encoding information
+        fIANAEncoding = fDefaultIANAEncoding;
+        fJavaEncoding = fIANAEncoding;
+
+        // get location information
+        String encoding = source.getEncoding();
+        String publicId = source.getPublicId();
+        String baseSystemId = source.getBaseSystemId();
+        String literalSystemId = source.getSystemId();
+        String expandedSystemId = expandSystemId(literalSystemId, baseSystemId);
+
+        // open stream
+        Reader reader = source.getCharacterStream();
+        if (reader == null) {
+            InputStream inputStream = source.getByteStream();
+            if (inputStream == null) {
+                URL url = new URL(expandedSystemId);
+                inputStream = url.openStream();
+            }
+            fByteStream = new PlaybackInputStream(inputStream);
+            String[] encodings = new String[2];
+            if (encoding == null) {
+                fByteStream.detectEncoding(encodings);
+            }
+            else {
+                encodings[0] = encoding;
+            }
+            if (encodings[0] == null) {
+                encodings[0] = fDefaultIANAEncoding;
+                if (fReportErrors) {
+                    fErrorReporter.reportWarning("HTML1000", null);
+                }
+            }
+            if (encodings[1] == null) {
+                encodings[1] = EncodingMap.getIANA2JavaMapping(encodings[0].toUpperCase());
+                if (encodings[1] == null) {
+                    encodings[1] = encodings[0];
+                    if (fReportErrors) {
+                        fErrorReporter.reportWarning("HTML1001", new Object[]{encodings[0]});
+                    }
+                }
+            }
+            fIANAEncoding = encodings[0];
+            fJavaEncoding = encodings[1];
+            /* PATCH: Asgeir Asgeirsson */
+            fIso8859Encoding = fIANAEncoding == null 
+                            || fIANAEncoding.toUpperCase().startsWith("ISO-8859")
+                            || fIANAEncoding.equalsIgnoreCase(fDefaultIANAEncoding);
+            encoding = fIANAEncoding;
+            reader = new InputStreamReader(fByteStream, fJavaEncoding);
+        }
+        fCurrentEntity = new CurrentEntity(reader, encoding,
+                                           publicId, baseSystemId,
+                                           literalSystemId, expandedSystemId);
+
+        // set scanner and state
+        setScanner(fContentScanner);
+        setScannerState(STATE_START_DOCUMENT);
+
+    } // setInputSource(XMLInputSource)
+
+    /** Scans the document. */
+    public boolean scanDocument(boolean complete) throws XNIException, IOException {
+        do {
+            if (!fScanner.scan(complete)) {
+                return false;
+            }
+        } while (complete);
+        return true;
+    } // scanDocument(boolean):boolean
+
+    /** Sets the document handler. */
+    public void setDocumentHandler(XMLDocumentHandler handler) {
+        fDocumentHandler = handler;
+    } // setDocumentHandler(XMLDocumentHandler)
+
+    // @since Xerces 2.1.0
+
+    /** Returns the document handler. */
+    public XMLDocumentHandler getDocumentHandler() {
+        return fDocumentHandler;
+    } // getDocumentHandler():XMLDocumentHandler
+
+    //
+    // Protected static methods
+    //
+
+    /** Returns the value of the specified attribute, ignoring case. */
+    protected static String getValue(XMLAttributes attrs, String aname) {
+        int length = attrs != null ? attrs.getLength() : 0;
+        for (int i = 0; i < length; i++) {
+            if (attrs.getQName(i).equalsIgnoreCase(aname)) {
+                return attrs.getValue(i);
+            }
+        }
+        return null;
+    } // getValue(XMLAttributes,String):String
+
+    /**
+     * Expands a system id and returns the system id as a URI, if
+     * it can be expanded. A return value of null means that the
+     * identifier is already expanded. An exception thrown
+     * indicates a failure to expand the id.
+     *
+     * @param systemId The systemId to be expanded.
+     *
+     * @return Returns the URI string representing the expanded system
+     *         identifier. A null value indicates that the given
+     *         system identifier is already expanded.
+     *
+     */
+    public static String expandSystemId(String systemId, String baseSystemId) {
+
+        // check for bad parameters id
+        if (systemId == null || systemId.length() == 0) {
+            return systemId;
+        }
+        // if id already expanded, return
+        try {
+            URI uri = new URI(systemId);
+            if (uri != null) {
+                return systemId;
+            }
+        }
+        catch (URI.MalformedURIException e) {
+            // continue on...
+        }
+        // normalize id
+        String id = fixURI(systemId);
+
+        // normalize base
+        URI base = null;
+        URI uri = null;
+        try {
+            if (baseSystemId == null || baseSystemId.length() == 0 ||
+                baseSystemId.equals(systemId)) {
+                String dir;
+                try {
+                    dir = fixURI(System.getProperty("user.dir"));
+                }
+                catch (SecurityException se) {
+                    dir = "";
+                }
+                if (!dir.endsWith("/")) {
+                    dir = dir + "/";
+                }
+                base = new URI("file", "", dir, null, null);
+            }
+            else {
+                try {
+                    base = new URI(fixURI(baseSystemId));
+                }
+                catch (URI.MalformedURIException e) {
+                    String dir;
+                    try {
+                        dir = fixURI(System.getProperty("user.dir"));
+                    }
+                    catch (SecurityException se) {
+                        dir = "";
+                    }
+                    if (baseSystemId.indexOf(':') != -1) {
+                        // for xml schemas we might have baseURI with
+                        // a specified drive
+                        base = new URI("file", "", fixURI(baseSystemId), null, null);
+                    }
+                    else {
+                        if (!dir.endsWith("/")) {
+                            dir = dir + "/";
+                        }
+                        dir = dir + fixURI(baseSystemId);
+                        base = new URI("file", "", dir, null, null);
+                    }
+                }
+             }
+             // expand id
+             uri = new URI(base, id);
+        }
+        catch (URI.MalformedURIException e) {
+            // let it go through
+        }
+
+        if (uri == null) {
+            return systemId;
+        }
+        return uri.toString();
+
+    } // expandSystemId(String,String):String
+
+    /**
+     * Fixes a platform dependent filename to standard URI form.
+     *
+     * @param str The string to fix.
+     *
+     * @return Returns the fixed URI string.
+     */
+    protected static String fixURI(String str) {
+
+        // handle platform dependent strings
+        str = str.replace(java.io.File.separatorChar, '/');
+
+        // Windows fix
+        if (str.length() >= 2) {
+            char ch1 = str.charAt(1);
+            // change "C:blah" to "/C:blah"
+            if (ch1 == ':') {
+                char ch0 = Character.toUpperCase(str.charAt(0));
+                if (ch0 >= 'A' && ch0 <= 'Z') {
+                    str = "/" + str;
+                }
+            }
+            // change "//blah" to "file://blah"
+            else if (ch1 == '/' && str.charAt(0) == '/') {
+                str = "file:" + str;
+            }
+        }
+
+        // done
+        return str;
+
+    } // fixURI(String):String
+
+    /** Modifies the given name based on the specified mode. */
+    protected static final String modifyName(String name, short mode) {
+        switch (mode) {
+            case NAMES_UPPERCASE: return name.toUpperCase();
+            case NAMES_LOWERCASE: return name.toLowerCase();
+        }
+        return name;
+    } // modifyName(String,short):String
+
+    /**
+     * Converts HTML names string value to constant value. 
+     *
+     * @see #NAMES_NO_CHANGE
+     * @see #NAMES_LOWERCASE
+     * @see #NAMES_UPPERCASE
+     */
+    protected static final short getNamesValue(String value) {
+        if (value.equals("lower")) {
+            return NAMES_LOWERCASE;
+        }
+        if (value.equals("upper")) {
+            return NAMES_UPPERCASE;
+        }
+        return NAMES_NO_CHANGE;
+    } // getNamesValue(String):short
+
+    /**
+     * Fixes Microsoft Windows&reg; specific characters.
+     * <p>
+     * Details about this common problem can be found at 
+     * <a href='http://www.cs.tut.fi/~jkorpela/www/windows-chars.html'>http://www.cs.tut.fi/~jkorpela/www/windows-chars.html</a>
+     */
+    protected int fixWindowsCharacter(int origChar) {
+        /* PATCH: Asgeir Asgeirsson */
+        switch(origChar) {
+            case 130: return 8218;
+            case 131: return 402;
+            case 132: return 8222;
+            case 133: return 8230;
+            case 134: return 8224;
+            case 135: return 8225;
+            case 136: return 710;
+            case 137: return 8240;
+            case 138: return 352;
+            case 139: return 8249;
+            case 140: return 338;
+            case 145: return 8216;
+            case 146: return 8217;
+            case 147: return 8220;
+            case 148: return 8221;
+            case 149: return 8226;
+            case 150: return 8211;
+            case 151: return 8212;
+            case 152: return 732;
+            case 153: return 8482;
+            case 154: return 353;
+            case 155: return 8250;
+            case 156: return 339;
+            case 159: return 376;
+        }
+        return origChar;
+    } // fixWindowsCharacter(int):int
+
+    //
+    // Protected methods
+    //
+
+    // i/o
+
+    /** Reads a single character. */
+    protected int read() throws IOException {
+        if (DEBUG_BUFFER) { 
+            System.out.print("(read: ");
+            printBuffer();
+            System.out.println();
+        }
+        if (fCurrentEntity.offset == fCurrentEntity.length) {
+            if (load(0) == -1) {
+                if (DEBUG_BUFFER) { 
+                    System.out.println(")read: -> -1");
+                }
+                return -1;
+            }
+        }
+        int c = fCurrentEntity.buffer[fCurrentEntity.offset++];
+        fCurrentEntity.columnNumber++;
+        if (DEBUG_BUFFER) { 
+            System.out.print(")read: ");
+            printBuffer();
+            System.out.print(" -> ");
+            System.out.print(c);
+            System.out.println();
+        }
+        return c;
+    } // read():int
+
+    /** 
+     * Loads a new chunk of data into the buffer and returns the number of
+     * characters loaded or -1 if no additional characters were loaded.
+     *
+     * @param offset The offset at which new characters should be loaded.
+     */
+    protected int load(int offset) throws IOException {
+        if (DEBUG_BUFFER) { 
+            System.out.print("(load: ");
+            printBuffer();
+            System.out.println();
+        }
+        // resize buffer, if needed
+        if (offset == fCurrentEntity.buffer.length) {
+            int adjust = fCurrentEntity.buffer.length / 4;
+            char[] array = new char[fCurrentEntity.buffer.length + adjust];
+            System.arraycopy(fCurrentEntity.buffer, 0, array, 0, fCurrentEntity.length);
+            fCurrentEntity.buffer = array;
+        }
+        // read a block of characters
+        int count = fCurrentEntity.stream.read(fCurrentEntity.buffer, offset, fCurrentEntity.buffer.length - offset);
+        fCurrentEntity.length = count != -1 ? count + offset : offset;
+        fCurrentEntity.offset = offset;
+        if (DEBUG_BUFFER) { 
+            System.out.print(")load: ");
+            printBuffer();
+            System.out.print(" -> ");
+            System.out.print(count);
+            System.out.println();
+        }
+        return count;
+    } // load():int
+
+    // debugging
+
+    /** Sets the scanner. */
+    protected void setScanner(Scanner scanner) {
+        fScanner = scanner;
+        if (DEBUG_SCANNER) {
+            System.out.print("$$$ setScanner(");
+            System.out.print(scanner!=null?scanner.getClass().getName():"null");
+            System.out.println(");");
+        }
+    } // setScanner(Scanner)
+    
+    /** Sets the scanner state. */
+    protected void setScannerState(short state) {
+        fScannerState = state;
+        if (DEBUG_SCANNER_STATE) {
+            System.out.print("$$$ setScannerState(");
+            switch (fScannerState) {
+                case STATE_CONTENT: { System.out.print("STATE_CONTENT"); break; }
+                case STATE_MARKUP_BRACKET: { System.out.print("STATE_MARKUP_BRACKET"); break; }
+                case STATE_START_DOCUMENT: { System.out.print("STATE_START_DOCUMENT"); break; }
+                case STATE_END_DOCUMENT: { System.out.print("STATE_END_DOCUMENT"); break; }
+            }
+            System.out.println(");");
+        }
+    } // setScannerState(short)
+
+    // scanning
+
+    /** Scans a DOCTYPE line. */
+    protected void scanDoctype() throws IOException {
+        String root = null;
+        String pubid = null;
+        String sysid = null;
+
+        if (skipSpaces()) {
+            root = scanName();
+            if (root == null) {
+                if (fReportErrors) {
+                    fErrorReporter.reportError("HTML1014", null);
+                }
+            }
+            else {
+                root = modifyName(root, fNamesElems);
+            }
+            if (skipSpaces()) {
+                if (skip("PUBLIC", false)) {
+                    skipSpaces();
+                    pubid = scanLiteral();
+                    if (skipSpaces()) {
+                        sysid = scanLiteral();
+                    }
+                }
+                else if (skip("SYSTEM", false)) {
+                    skipSpaces();
+                    sysid = scanLiteral();
+                }
+            }
+        }
+        int c;
+        while ((c = read()) != -1) {
+            if (c == '<') {
+                fCurrentEntity.offset--;
+                fCurrentEntity.columnNumber--;
+                break;
+            }
+            if (c == '>') {
+                break;
+            }
+            if (c == '[') {
+                skipMarkup(true);
+                break;
+            }
+        }
+
+        if (fDocumentHandler != null) {
+            if (fOverrideDoctype) {
+                pubid = fDoctypePubid;
+                sysid = fDoctypeSysid;
+            }
+            fEndLineNumber = fCurrentEntity.lineNumber;
+            fEndColumnNumber = fCurrentEntity.columnNumber;
+            fDocumentHandler.doctypeDecl(root, pubid, sysid, locationAugs());
+        }
+
+    } // scanDoctype()
+
+    /** Scans a quoted literal. */
+    protected String scanLiteral() throws IOException {
+        int quote = read();
+        if (quote == '\'' || quote == '"') {
+            StringBuffer str = new StringBuffer();
+            int c;
+            while ((c = read()) != -1) {
+                if (c == quote) {
+                    break;
+                }
+                if (c == '\r' || c == '\n') {
+                    fCurrentEntity.offset--;
+                    fCurrentEntity.columnNumber--;
+                    // NOTE: This collapses newlines to a single space.
+                    //       [Q] Is this the right thing to do here? -Ac
+                    skipNewlines();
+                    str.append(' ');
+                }
+                else if (c == '<') {
+                    fCurrentEntity.offset--;
+                    fCurrentEntity.columnNumber--;
+                    break;
+                }
+                else {
+                    str.append((char)c);
+                }
+            }
+            if (c == -1) {
+                if (fReportErrors) {
+                    fErrorReporter.reportError("HTML1007", null);
+                }
+                throw new EOFException();
+            }
+            return str.toString();
+        }
+        else {
+            fCurrentEntity.offset--;
+            fCurrentEntity.columnNumber--;
+        }
+        return null;
+    } // scanLiteral():String
+
+    /** Scans a name. */
+    protected String scanName() throws IOException {
+        if (DEBUG_BUFFER) {
+            System.out.print("(scanName: ");
+            printBuffer();
+            System.out.println();
+        }
+        if (fCurrentEntity.offset == fCurrentEntity.length) {
+            if (load(0) == -1) {
+                if (DEBUG_BUFFER) {
+                    System.out.print(")scanName: ");
+                    printBuffer();
+                    System.out.println(" -> null");
+                }
+                return null;
+            }
+        }
+        int offset = fCurrentEntity.offset;
+        while (true) {
+            while (fCurrentEntity.offset < fCurrentEntity.length) {
+                char c = fCurrentEntity.buffer[fCurrentEntity.offset];
+                if (!Character.isLetterOrDigit(c) &&
+                    !(c == '-' || c == '.' || c == ':' || c == '_')) {
+                    break;
+                }
+                fCurrentEntity.offset++;
+                fCurrentEntity.columnNumber++;
+            }
+            if (fCurrentEntity.offset == fCurrentEntity.length) {
+                int length = fCurrentEntity.length - offset;
+                System.arraycopy(fCurrentEntity.buffer, offset, fCurrentEntity.buffer, 0, length);
+                int count = load(length);
+                offset = 0;
+                if (count == -1) {
+                    break;
+                }
+            }
+            else {
+                break;
+            }
+        }
+        int length = fCurrentEntity.offset - offset;
+        String name = length > 0 ? new String(fCurrentEntity.buffer, offset, length) : null;
+        if (DEBUG_BUFFER) {
+            System.out.print(")scanName: ");
+            printBuffer();
+            System.out.print(" -> \"");
+            System.out.print(name);
+            System.out.println('"');
+        }
+        return name;
+    } // scanName():String
+
+    /** Scans an entity reference. */
+    protected int scanEntityRef(XMLStringBuffer str, boolean content) 
+        throws IOException {
+        str.clear();
+        str.append('&');
+        while (true) {
+            int c = read();
+            if (c == ';') {
+                str.append(';');
+                break;
+            }
+            if (c == -1) {
+                if (fReportErrors) {
+                    fErrorReporter.reportWarning("HTML1004", null);
+                }
+                if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
+                    fEndLineNumber = fCurrentEntity.lineNumber;
+                    fEndColumnNumber = fCurrentEntity.columnNumber;
+                    fDocumentHandler.characters(str, locationAugs());
+                }
+                return -1;
+            }
+            if (!Character.isLetterOrDigit((char)c) && c != '#') {
+                if (fReportErrors) {
+                    fErrorReporter.reportWarning("HTML1004", null);
+                }
+                fCurrentEntity.offset--;
+                fCurrentEntity.columnNumber--;
+                if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
+                    fEndLineNumber = fCurrentEntity.lineNumber;
+                    fEndColumnNumber = fCurrentEntity.columnNumber;
+                    fDocumentHandler.characters(str, locationAugs());
+                }
+                return -1;
+            }
+            str.append((char)c);
+        }
+        if (str.length == 1) {
+            if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
+                fEndLineNumber = fCurrentEntity.lineNumber;
+                fEndColumnNumber = fCurrentEntity.columnNumber;
+                fDocumentHandler.characters(str, locationAugs());
+            }
+            return -1;
+        }
+
+        String name = str.toString().substring(1, str.length-1);
+        if (name.startsWith("#")) {
+            int value = -1;
+            try {
+                if (name.startsWith("#x")) {
+                    value = Integer.parseInt(name.substring(2), 16);
+                }
+                else {
+                    value = Integer.parseInt(name.substring(1));
+                }
+                /* PATCH: Asgeir Asgeirsson */
+                if (fFixWindowsCharRefs && fIso8859Encoding) {
+                    value = fixWindowsCharacter(value);
+                }
+                if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
+                    fEndLineNumber = fCurrentEntity.lineNumber;
+                    fEndColumnNumber = fCurrentEntity.columnNumber;
+                    if (fNotifyCharRefs) {
+                        XMLResourceIdentifier id = resourceId();
+                        String encoding = null;
+                        fDocumentHandler.startGeneralEntity(name, id, encoding, locationAugs());
+                    }
+                    str.clear();
+                    str.append((char)value);
+                    fDocumentHandler.characters(str, locationAugs());
+                    if (fNotifyCharRefs) {
+                        fDocumentHandler.endGeneralEntity(name, locationAugs());
+                    }
+                }
+            }
+            catch (NumberFormatException e) {
+                if (fReportErrors) {
+                    fErrorReporter.reportError("HTML1005", new Object[]{name});
+                }
+                if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
+                    fEndLineNumber = fCurrentEntity.lineNumber;
+                    fEndColumnNumber = fCurrentEntity.columnNumber;
+                    fDocumentHandler.characters(str, locationAugs());
+                }
+            }
+            return value;
+        }
+
+        int c = HTMLEntities.get(name);
+        if (c == -1) {
+            if (fReportErrors) {
+                fErrorReporter.reportWarning("HTML1006", new Object[]{name});
+            }
+            if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
+                fEndLineNumber = fCurrentEntity.lineNumber;
+                fEndColumnNumber = fCurrentEntity.columnNumber;
+                fDocumentHandler.characters(str, locationAugs());
+            }
+            return -1;
+        }
+        if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
+            fEndLineNumber = fCurrentEntity.lineNumber;
+            fEndColumnNumber = fCurrentEntity.columnNumber;
+            boolean notify = fNotifyHtmlBuiltinRefs || (fNotifyXmlBuiltinRefs && builtinXmlRef(name));
+            if (notify) {
+                XMLResourceIdentifier id = resourceId();
+                String encoding = null;
+                fDocumentHandler.startGeneralEntity(name, id, encoding, locationAugs());
+            }
+            str.clear();
+            str.append((char)c);
+            fDocumentHandler.characters(str, locationAugs());
+            if (notify) {
+                fDocumentHandler.endGeneralEntity(name, locationAugs());
+            }
+        }
+        return c;
+
+    } // scanEntityRef(XMLStringBuffer,boolean):int
+
+    /** Returns true if the specified text is present and is skipped. */
+    protected boolean skip(String s, boolean caseSensitive) throws IOException {
+        int length = s != null ? s.length() : 0;
+        for (int i = 0; i < length; i++) {
+            if (fCurrentEntity.offset == fCurrentEntity.length) {
+                System.arraycopy(fCurrentEntity.buffer, fCurrentEntity.offset - i, fCurrentEntity.buffer, 0, i);
+                if (load(i) == -1) {
+                    fCurrentEntity.offset = 0;
+                    return false;
+                }
+            }
+            char c0 = s.charAt(i);
+            char c1 = fCurrentEntity.buffer[fCurrentEntity.offset++];
+            fCurrentEntity.columnNumber++;
+            if (!caseSensitive) {
+                c0 = Character.toUpperCase(c0);
+                c1 = Character.toUpperCase(c1);
+            }
+            if (c0 != c1) {
+                fCurrentEntity.offset -= i + 1;
+                return false;
+            }
+        }
+        return true;
+    } // skip(String):boolean
+
+    /** Skips markup. */
+    protected boolean skipMarkup(boolean balance) throws IOException {
+        if (DEBUG_BUFFER) {
+            System.out.print("(skipMarkup: ");
+            printBuffer();
+            System.out.println();
+        }
+        int depth = 1;
+        boolean slashgt = false;
+        OUTER: while (true) {
+            if (fCurrentEntity.offset == fCurrentEntity.length) {
+                if (load(0) == -1) {
+                    break OUTER;
+                }
+            }
+            while (fCurrentEntity.offset < fCurrentEntity.length) {
+                char c = fCurrentEntity.buffer[fCurrentEntity.offset++];
+                fCurrentEntity.columnNumber++;
+                if (balance && c == '<') {
+                    depth++;
+                }
+                else if (c == '>') {
+                    depth--;
+                    if (depth == 0) {
+                        break OUTER;
+                    }
+                }
+                else if (c == '/') {
+                    if (fCurrentEntity.offset == fCurrentEntity.length) {
+                        if (load(0) == -1) {
+                            break OUTER;
+                        }
+                    }
+                    c = fCurrentEntity.buffer[fCurrentEntity.offset++];
+                    fCurrentEntity.columnNumber++;
+                    if (c == '>') {
+                        slashgt = true;
+                        depth--;
+                        if (depth == 0) {
+                            break OUTER;
+                        }
+                    }
+                    else {
+                        fCurrentEntity.offset--;
+                        fCurrentEntity.columnNumber--;
+                    }
+                }
+                else if (c == '\r' || c == '\n') {
+                    skipNewlines();
+                }
+            }
+        }
+        if (DEBUG_BUFFER) {
+            System.out.print(")skipMarkup: ");
+            printBuffer();
+            System.out.print(" -> "+slashgt);
+            System.out.println();
+        }
+        return slashgt;
+    } // skipMarkup():boolean
+
+    /** Skips whitespace. */
+    protected boolean skipSpaces() throws IOException {
+        if (DEBUG_BUFFER) {
+            System.out.print("(skipSpaces: ");
+            printBuffer();
+            System.out.println();
+        }
+        boolean spaces = false;
+        while (true) {
+            if (fCurrentEntity.offset == fCurrentEntity.length) {
+                if (load(0) == -1) {
+                    break;
+                }
+            }
+            char c = fCurrentEntity.buffer[fCurrentEntity.offset];
+            if (!Character.isSpace(c)) {
+                break;
+            }
+            spaces = true;
+            if (c == '\r' || c == '\n') {
+                skipNewlines();
+                continue;
+            }
+            fCurrentEntity.offset++;
+            fCurrentEntity.columnNumber++;
+        }
+        if (DEBUG_BUFFER) {
+            System.out.print(")skipSpaces: ");
+            printBuffer();
+            System.out.print(" -> ");
+            System.out.print(spaces);
+            System.out.println();
+        }
+        return spaces;
+    } // skipSpaces()
+
+    /** Skips newlines and returns the number of newlines skipped. */
+    protected int skipNewlines() throws IOException {
+        return skipNewlines(Integer.MAX_VALUE);
+    } // skipNewlines():int
+
+    /** Skips newlines and returns the number of newlines skipped. */
+    protected int skipNewlines(int maxlines) throws IOException {
+        if (DEBUG_BUFFER) {
+            System.out.print("(skipNewlines: ");
+            printBuffer();
+            System.out.println();
+        }
+        if (fCurrentEntity.offset == fCurrentEntity.length) {
+            if (load(0) == -1) {
+                if (DEBUG_BUFFER) {
+                    System.out.print(")skipNewlines: ");
+                    printBuffer();
+                    System.out.println();
+                }
+                return 0;
+            }
+        }
+        char c = fCurrentEntity.buffer[fCurrentEntity.offset];
+        int newlines = 0;
+        int offset = fCurrentEntity.offset;
+        if (c == '\n' || c == '\r') {
+            do {
+                c = fCurrentEntity.buffer[fCurrentEntity.offset++];
+                if (c == '\r') {
+                    newlines++;
+                    if (fCurrentEntity.offset == fCurrentEntity.length) {
+                        offset = 0;
+                        fCurrentEntity.offset = newlines;
+                        if (load(newlines) == -1) {
+                            break;
+                        }
+                    }
+                    if (fCurrentEntity.buffer[fCurrentEntity.offset] == '\n') {
+                        fCurrentEntity.offset++;
+                        offset++;
+                    }
+                }
+                else if (c == '\n') {
+                    newlines++;
+                    if (fCurrentEntity.offset == fCurrentEntity.length) {
+                        offset = 0;
+                        fCurrentEntity.offset = newlines;
+                        if (load(newlines) == -1) {
+                            break;
+                        }
+                    }
+                }
+                else {
+                    fCurrentEntity.offset--;
+                    break;
+                }
+            } while (newlines < maxlines &&
+                     fCurrentEntity.offset < fCurrentEntity.length - 1);
+            fCurrentEntity.lineNumber += newlines;
+            fCurrentEntity.columnNumber = 1;
+        }
+        if (DEBUG_BUFFER) {
+            System.out.print(")skipNewlines: ");
+            printBuffer();
+            System.out.print(" -> ");
+            System.out.print(newlines);
+            System.out.println();
+        }
+        return newlines;
+    } // skipNewlines(int):int
+
+    // infoset utility methods
+
+    /** Returns an augmentations object with a location item added. */
+    protected final Augmentations locationAugs() {
+        HTMLAugmentations augs = null;
+        if (fAugmentations) {
+            fLocationItem.setValues(fBeginLineNumber, fBeginColumnNumber, 
+                                    fEndLineNumber, fEndColumnNumber);
+            augs = fInfosetAugs;
+            augs.removeAllItems();
+            augs.putItem(AUGMENTATIONS, fLocationItem);
+        }
+        return augs;
+    } // locationAugs():Augmentations
+
+    /** Returns an augmentations object with a synthesized item added. */
+    protected final Augmentations synthesizedAugs() {
+        HTMLAugmentations augs = null;
+        if (fAugmentations) {
+            augs = fInfosetAugs;
+            augs.removeAllItems();
+            augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM);
+        }
+        return augs;
+    } // synthesizedAugs():Augmentations
+
+    /** Returns an empty resource identifier. */
+    protected final XMLResourceIdentifier resourceId() {
+        /***/
+        fResourceId.clear();
+        return fResourceId;
+        /***
+        // NOTE: Unfortunately, the Xerces DOM parser classes expect a
+        //       non-null resource identifier object to be passed to
+        //       startGeneralEntity. -Ac
+        return null;
+        /***/
+    } // resourceId():XMLResourceIdentifier
+
+    //
+    // Protected static methods
+    //
+
+    /** Returns true if the name is a built-in XML general entity reference. */
+    protected static boolean builtinXmlRef(String name) {
+        return name.equals("amp") || name.equals("lt") || name.equals("gt") ||
+               name.equals("quot") || name.equals("apos");
+    } // builtinXmlRef(String):boolean
+
+    //
+    // Private methods
+    //
+
+    /** Prints the contents of the character buffer to standard out. */
+    private void printBuffer() {
+        if (DEBUG_BUFFER) {
+            System.out.print('[');
+            System.out.print(fCurrentEntity.length);
+            System.out.print(' ');
+            System.out.print(fCurrentEntity.offset);
+            if (fCurrentEntity.length > 0) {
+                System.out.print(" \"");
+                for (int i = 0; i < fCurrentEntity.length; i++) {
+                    if (i == fCurrentEntity.offset) {
+                        System.out.print('^');
+                    }
+                    char c = fCurrentEntity.buffer[i];
+                    switch (c) {
+                        case '\r': {
+                            System.out.print("\\r");
+                            break;
+                        }
+                        case '\n': {
+                            System.out.print("\\n");
+                            break;
+                        }
+                        case '\t': {
+                            System.out.print("\\t");
+                            break;
+                        }
+                        case '"': {
+                            System.out.print("\\\"");
+                            break;
+                        }
+                        default: {
+                            System.out.print(c);
+                        }
+                    }
+                }
+                if (fCurrentEntity.offset == fCurrentEntity.length) {
+                    System.out.print('^');
+                }
+                System.out.print('"');
+            }
+            System.out.print(']');
+        }
+    } // printBuffer()
+
+    //
+    // Interfaces
+    //
+
+    /**
+     * Basic scanner interface.
+     *
+     * @author Andy Clark
+     */
+    public interface Scanner {
+
+        //
+        // Scanner methods
+        //
+
+        /** 
+         * Scans part of the document. This interface allows scanning to
+         * be performed in a pulling manner.
+         *
+         * @param complete True if the scanner should not return until
+         *                 scanning is complete.
+         *
+         * @return True if additional scanning is required.
+         *
+         * @throws IOException Thrown if I/O error occurs.
+         */
+        public boolean scan(boolean complete) throws IOException;
+
+    } // interface Scanner
+
+    //
+    // Classes
+    //
+
+    /**
+     * Current entity.
+     *
+     * @author Andy Clark
+     */
+    public static class CurrentEntity {
+
+        //
+        // Data
+        //
+
+        /** Character stream. */
+        public Reader stream;
+
+        /** Encoding. */
+        public String encoding;
+
+        /** Public identifier. */
+        public String publicId;
+
+        /** Base system identifier. */
+        public String baseSystemId;
+
+        /** Literal system identifier. */
+        public String literalSystemId;
+
+        /** Expanded system identifier. */
+        public String expandedSystemId;
+
+        /** Line number. */
+        public int lineNumber = 1;
+
+        /** Column number. */
+        public int columnNumber = 1;
+
+        // buffer
+
+        /** Character buffer. */
+        public char[] buffer = new char[DEFAULT_BUFFER_SIZE];
+
+        /** Offset into character buffer. */
+        public int offset = 0;
+
+        /** Length of characters read into character buffer. */
+        public int length = 0;
+
+        //
+        // Constructors
+        //
+
+        /** Constructs an entity from the specified stream. */
+        public CurrentEntity(Reader stream, String encoding, 
+                             String publicId, String baseSystemId,
+                             String literalSystemId, String expandedSystemId) {
+            this.stream = stream;
+            this.encoding = encoding;
+            this.publicId = publicId;
+            this.baseSystemId = baseSystemId;
+            this.literalSystemId = literalSystemId;
+            this.expandedSystemId = expandedSystemId;
+        } // <init>(Reader,String,String,String,String)
+
+    } // class CurrentEntity
+
+    /**
+     * The primary HTML document scanner.
+     *
+     * @author Andy Clark
+     */
+    public class ContentScanner 
+        implements Scanner {
+
+        //
+        // Data
+        //
+
+        // temp vars
+
+        /** A qualified name. */
+        private final QName fQName = new QName();
+
+        /** Attributes. */
+        private final XMLAttributesImpl fAttributes = new XMLAttributesImpl();
+
+        //
+        // Scanner methods
+        //
+
+        /** Scan. */
+        public boolean scan(boolean complete) throws IOException {
+            boolean next;
+            do {
+                try {
+                    next = false;
+                    switch (fScannerState) {
+                        case STATE_CONTENT: {
+                            fBeginLineNumber = fCurrentEntity.lineNumber;
+                            fBeginColumnNumber = fCurrentEntity.columnNumber;
+                            int c = read();
+                            if (c == '<') {
+                                setScannerState(STATE_MARKUP_BRACKET);
+                                next = true;
+                            }
+                            else if (c == '&') {
+                                scanEntityRef(fStringBuffer, true);
+                            }
+                            else if (c == -1) {
+                                throw new EOFException();
+                            }
+                            else {
+                                fCurrentEntity.offset--;
+                                fCurrentEntity.columnNumber--;
+                                scanCharacters();
+                            }
+                            break;
+                        }
+                        case STATE_MARKUP_BRACKET: {
+                            int c = read();
+                            if (c == '!') {
+                                if (skip("--", false)) {
+                                    scanComment();
+                                }
+                                else if (skip("[CDATA[", false)) {
+                                    scanCDATA();
+                                }
+                                else if (skip("DOCTYPE", false)) {
+                                    scanDoctype();
+                                }
+                                else {
+                                    if (fReportErrors) {
+                                        fErrorReporter.reportError("HTML1002", null);
+                                    }
+                                    skipMarkup(true);
+                                }
+                            }
+                            else if (c == '?') {
+                                scanPI();
+                            }
+                            else if (c == '/') {
+                                scanEndElement();
+                            }
+                            else if (c == -1) {
+                                if (fReportErrors) {
+                                    fErrorReporter.reportError("HTML1003", null);
+                                }
+                                if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                                    fStringBuffer.clear();
+                                    fStringBuffer.append('<');
+                                    fDocumentHandler.characters(fStringBuffer, null);
+                                }
+                                throw new EOFException();
+                            }
+                            else {
+                                fCurrentEntity.offset--;
+                                fCurrentEntity.columnNumber--;
+                                fElementCount++;
+                                fSingleBoolean[0] = false;
+                                String ename = scanStartElement(fSingleBoolean);
+                                if (ename != null && !fSingleBoolean[0] &&
+                                    HTMLElements.getElement(ename).isSpecial()) {
+                                    setScanner(fSpecialScanner.setElementName(ename));
+                                    setScannerState(STATE_CONTENT);
+                                    return true;
+                                }
+                            }
+                            setScannerState(STATE_CONTENT);
+                            break;
+                        }
+                        case STATE_START_DOCUMENT: {
+                            if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                                if (DEBUG_CALLBACKS) {
+                                    System.out.println("startDocument()");
+                                }
+                                XMLLocator locator = HTMLScanner.this;
+                                String encoding = fIANAEncoding;
+                                Augmentations augs = locationAugs();
+                                try {
+                                    // NOTE: Hack to allow the default filter to work with
+                                    //       old and new versions of the XNI document handler
+                                    //       interface. -Ac
+                                    Class cls = fDocumentHandler.getClass();
+                                    Class[] types = {
+                                        XMLLocator.class, String.class,
+                                        NamespaceContext.class, Augmentations.class
+                                    };
+                                    Method method = cls.getMethod("startDocument", types);
+                                    NamespaceContext nscontext = new NamespaceSupport();
+                                    Object[] params = {
+                                        locator, encoding, 
+                                        nscontext, augs
+                                    };
+                                    method.invoke(fDocumentHandler, params);
+                                }
+                                catch (IllegalAccessException e) {
+                                    throw new XNIException(e);
+                                } 
+                                catch (InvocationTargetException e) {
+                                    throw new XNIException(e);
+                                } 
+                                catch (NoSuchMethodException e) {
+                                    try {
+                                        // NOTE: Hack to allow the default filter to work with
+                                        //       old and new versions of the XNI document handler
+                                        //       interface. -Ac
+                                        Class cls = fDocumentHandler.getClass();
+                                        Class[] types = {
+                                            XMLLocator.class, String.class, Augmentations.class
+                                        };
+                                        Method method = cls.getMethod("startDocument", types);
+                                        Object[] params = {
+                                            locator, encoding, augs
+                                        };
+                                        method.invoke(fDocumentHandler, params);
+                                    }
+                                    catch (IllegalAccessException ex) {
+                                        // NOTE: Should never reach here!
+                                        throw new XNIException(ex);
+                                    } 
+                                    catch (InvocationTargetException ex) {
+                                        // NOTE: Should never reach here!
+                                        throw new XNIException(ex);
+                                    } 
+                                    catch (NoSuchMethodException ex) {
+                                        // NOTE: Should never reach here!
+                                        throw new XNIException(ex);
+                                    }
+                                }
+                            }
+                            if (fInsertDoctype && fDocumentHandler != null) {
+                                String root = HTMLElements.getElement(HTMLElements.HTML).name;
+                                root = modifyName(root, fNamesElems);
+                                String pubid = fDoctypePubid;
+                                String sysid = fDoctypeSysid;
+                                fDocumentHandler.doctypeDecl(root, pubid, sysid,
+                                                             synthesizedAugs());
+                            }
+                            setScannerState(STATE_CONTENT);
+                            break;
+                        }
+                        case STATE_END_DOCUMENT: {
+                            if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                                if (DEBUG_CALLBACKS) {
+                                    System.out.println("endDocument()");
+                                }
+                                fEndLineNumber = fCurrentEntity.lineNumber;
+                                fEndColumnNumber = fCurrentEntity.columnNumber;
+                                fDocumentHandler.endDocument(locationAugs());
+                            }
+                            return false;
+                        }
+                        default: {
+                            throw new RuntimeException("unknown scanner state: "+fScannerState);
+                        }
+                    }
+                }
+                catch (EOFException e) {
+                    if (fCurrentEntityStack.empty()) {
+                        setScannerState(STATE_END_DOCUMENT);
+                    }
+                    else {
+                        fCurrentEntity = (CurrentEntity)fCurrentEntityStack.pop();
+                    }
+                    next = true;
+                }
+            } while (next || complete);
+            return true;
+        } // scan(boolean):boolean
+
+        //
+        // Protected methods
+        //
+
+        /** Scans characters. */
+        protected void scanCharacters() throws IOException {
+            if (DEBUG_BUFFER) {
+                System.out.print("(scanCharacters: ");
+                printBuffer();
+                System.out.println();
+            }
+            int newlines = skipNewlines();
+            if (newlines == 0 && fCurrentEntity.offset == fCurrentEntity.length) {
+                if (DEBUG_BUFFER) {
+                    System.out.print(")scanCharacters: ");
+                    printBuffer();
+                    System.out.println();
+                }
+                return;
+            }
+            char c;
+            int offset = fCurrentEntity.offset - newlines;
+            for (int i = offset; i < fCurrentEntity.offset; i++) {
+                fCurrentEntity.buffer[i] = '\n';
+            }
+            while (fCurrentEntity.offset < fCurrentEntity.length) {
+                c = fCurrentEntity.buffer[fCurrentEntity.offset];
+                if (c == '<' || c == '&' || c == '\n' || c == '\r') {
+                    break;
+                }
+                fCurrentEntity.offset++;
+                fCurrentEntity.columnNumber++;
+            }
+            if (fCurrentEntity.offset > offset && 
+                fDocumentHandler != null && fElementCount >= fElementDepth) {
+                fString.setValues(fCurrentEntity.buffer, offset, fCurrentEntity.offset - offset);
+                if (DEBUG_CALLBACKS) {
+                    System.out.println("characters("+fString+")");
+                }
+                fEndLineNumber = fCurrentEntity.lineNumber;
+                fEndColumnNumber = fCurrentEntity.columnNumber;
+                fDocumentHandler.characters(fString, locationAugs());
+            }
+            if (DEBUG_BUFFER) {
+                System.out.print(")scanCharacters: ");
+                printBuffer();
+                System.out.println();
+            }
+        } // scanCharacters()
+
+        /** Scans a CDATA section. */
+        protected void scanCDATA() throws IOException {
+            if (DEBUG_BUFFER) {
+                System.out.print("(scanCDATA: ");
+                printBuffer();
+                System.out.println();
+            }
+            fStringBuffer.clear();
+            if (fCDATASections) {
+                if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                    fEndLineNumber = fCurrentEntity.lineNumber;
+                    fEndColumnNumber = fCurrentEntity.columnNumber;
+                    if (DEBUG_CALLBACKS) {
+                        System.out.println("startCDATA()");
+                    }
+                    fDocumentHandler.startCDATA(locationAugs());
+                }
+            }
+            else {
+                fStringBuffer.append("[CDATA[");
+            }
+            boolean eof = scanMarkupContent(fStringBuffer, ']');
+            if (!fCDATASections) {
+                fStringBuffer.append("]]");
+            }
+            if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                fEndLineNumber = fCurrentEntity.lineNumber;
+                fEndColumnNumber = fCurrentEntity.columnNumber;
+                if (fCDATASections) {
+                    if (DEBUG_CALLBACKS) {
+                        System.out.println("characters("+fStringBuffer+")");
+                    }
+                    fDocumentHandler.characters(fStringBuffer, locationAugs());
+                    if (DEBUG_CALLBACKS) {
+                        System.out.println("endCDATA()");
+                    }
+                    fDocumentHandler.endCDATA(locationAugs());
+                }
+                else {
+                    if (DEBUG_CALLBACKS) {
+                        System.out.println("comment("+fStringBuffer+")");
+                    }
+                    fDocumentHandler.comment(fStringBuffer, locationAugs());
+                }
+            }
+            if (DEBUG_BUFFER) {
+                System.out.print(")scanCDATA: ");
+                printBuffer();
+                System.out.println();
+            }
+            if (eof) {
+                throw new EOFException();
+            }
+        } // scanCDATA()
+        
+        /** Scans a comment. */
+        protected void scanComment() throws IOException {
+            if (DEBUG_BUFFER) {
+                System.out.print("(scanComment: ");
+                printBuffer();
+                System.out.println();
+            }
+            fStringBuffer.clear();
+            boolean eof = scanMarkupContent(fStringBuffer, '-');
+            if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                if (DEBUG_CALLBACKS) {
+                    System.out.println("comment("+fStringBuffer+")");
+                }
+                fEndLineNumber = fCurrentEntity.lineNumber;
+                fEndColumnNumber = fCurrentEntity.columnNumber;
+                fDocumentHandler.comment(fStringBuffer, locationAugs());
+            }
+            if (DEBUG_BUFFER) {
+                System.out.print(")scanComment: ");
+                printBuffer();
+                System.out.println();
+            }
+            if (eof) {
+                throw new EOFException();
+            }
+        } // scanComment()
+
+        /** Scans markup content. */
+        protected boolean scanMarkupContent(XMLStringBuffer buffer, 
+                                            char cend) throws IOException {
+            int c = -1;
+            OUTER: while (true) {
+                c = read();
+                if (c == cend) {
+                    int count = 1;
+                    while (true) {
+                        c = read();
+                        if (c == cend) {
+                            count++;
+                            continue;
+                        }
+                        break;
+                    }
+                    if (c == -1) {
+                        if (fReportErrors) {
+                            fErrorReporter.reportError("HTML1007", null);
+                        }
+                        break OUTER;
+                    }
+                    if (count < 2) {
+                        buffer.append(cend);
+                        //if (c != -1) {
+                        fCurrentEntity.offset--;
+                        fCurrentEntity.columnNumber--;
+                        //}
+                        continue;
+                    }
+                    if (c != '>') {
+                        for (int i = 0; i < count; i++) {
+                            buffer.append(cend);
+                        }
+                        fCurrentEntity.offset--;
+                        fCurrentEntity.columnNumber--;
+                        continue;
+                    }
+                    for (int i = 0; i < count - 2; i++) {
+                        buffer.append(cend);
+                    }
+                    break;
+                }
+                else if (c == '\n' || c == '\r') {
+                    fCurrentEntity.offset--;
+                    fCurrentEntity.columnNumber--;
+                    int newlines = skipNewlines();
+                    for (int i = 0; i < newlines; i++) {
+                        buffer.append('\n');
+                    }
+                    continue;
+                }
+                else if (c == -1) {
+                    if (fReportErrors) {
+                        fErrorReporter.reportError("HTML1007", null);
+                    }
+                    break;
+                }
+                buffer.append((char)c);
+            }
+            return c == -1;
+        } // scanMarkupContent(XMLStringBuffer,char):boolean
+
+        /** Scans a processing instruction. */
+        protected void scanPI() throws IOException {
+            if (DEBUG_BUFFER) {
+                System.out.print("(scanPI: ");
+                printBuffer();
+                System.out.println();
+            }
+            if (fReportErrors) {
+                fErrorReporter.reportWarning("HTML1008", null);
+            }
+
+            // scan processing instruction
+            String target = scanName();
+            if (target != null && !target.equalsIgnoreCase("xml")) {
+                while (true) {
+                    int c = read();
+                    if (c == '\r' || c == '\n') {
+                        fCurrentEntity.lineNumber++;
+                        fCurrentEntity.columnNumber = 1;
+                        if (c == '\r') {
+                            c = read();
+                            if (c != '\n') {
+                                fCurrentEntity.offset--;
+                            }
+                        }
+                        continue;
+                    }
+                    if (c == -1) {
+                        break;
+                    }
+                    if (c != ' ' && c != '\t') {
+                        fCurrentEntity.offset--;
+                        fCurrentEntity.columnNumber--;
+                        break;
+                    }
+                }
+                fStringBuffer.clear();
+                while (true) {
+                    int c = read();
+                    if (c == '?' || c == '/') {
+                        char c0 = (char)c;
+                        c = read();
+                        if (c == '>') {
+                            break;
+                        }
+                        else {
+                            fStringBuffer.append(c0);
+                            fCurrentEntity.offset--;
+                            fCurrentEntity.columnNumber--;
+                            continue;
+                        }
+                    }
+                    else if (c == '\r' || c == '\n') {
+                        fStringBuffer.append('\n');
+                        fCurrentEntity.lineNumber++;
+                        fCurrentEntity.columnNumber = 1;
+                        if (c == '\r') {
+                            c = read();
+                            if (c != '\n') {
+                                fCurrentEntity.offset--;
+                            }
+                        }
+                        continue;
+                    }
+                    else if (c == -1) {
+                        break;
+                    }
+                    else {
+                        fStringBuffer.append((char)c);
+                    }
+                }
+                XMLString data = fStringBuffer;
+                if (fDocumentHandler != null) {
+                    fEndLineNumber = fCurrentEntity.lineNumber;
+                    fEndColumnNumber = fCurrentEntity.columnNumber;
+                    fDocumentHandler.processingInstruction(target, data, locationAugs());
+                }
+            }
+
+            // scan xml/text declaration
+            else {
+                int beginLineNumber = fBeginLineNumber;
+                int beginColumnNumber = fBeginColumnNumber;
+                fAttributes.removeAllAttributes();
+                int aindex = 0;
+                while (scanPseudoAttribute(fAttributes)) {
+                    fAttributes.getName(aindex,fQName);
+                    fQName.rawname = fQName.rawname.toLowerCase();
+                    fAttributes.setName(aindex,fQName);
+                    aindex++;
+                }
+                if (fDocumentHandler != null) {
+                    String version = fAttributes.getValue("version");
+                    String encoding = fAttributes.getValue("encoding");
+                    String standalone = fAttributes.getValue("standalone");
+
+                    fBeginLineNumber = beginLineNumber;
+                    fBeginColumnNumber = beginColumnNumber;
+                    fEndLineNumber = fCurrentEntity.lineNumber;
+                    fEndColumnNumber = fCurrentEntity.columnNumber;
+                    fDocumentHandler.xmlDecl(version, encoding, standalone,
+                                             locationAugs());
+                }
+            }
+
+            if (DEBUG_BUFFER) {
+                System.out.print(")scanPI: ");
+                printBuffer();
+                System.out.println();
+            }
+        } // scanPI()
+
+        /** 
+         * Scans a start element. 
+         *
+         * @param empty Is used for a second return value to indicate whether
+         *              the start element tag is empty (e.g. "/&gt;").
+         */
+        protected String scanStartElement(boolean[] empty) throws IOException {
+            String ename = scanName();
+            int length = ename != null ? ename.length() : 0;
+            int c = length > 0 ? ename.charAt(0) : -1;
+            if (length == 0 || !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
+                if (fReportErrors) {
+                    fErrorReporter.reportError("HTML1009", null);
+                }
+                if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                    fStringBuffer.clear();
+                    fStringBuffer.append('<');
+                    if (length > 0) {
+                        fStringBuffer.append(ename);
+                    }
+                    fDocumentHandler.characters(fStringBuffer, null);
+                }
+                return null;
+            }
+            ename = modifyName(ename, fNamesElems);
+            fAttributes.removeAllAttributes();
+            boolean print = false;
+            int beginLineNumber = fBeginLineNumber;
+            int beginColumnNumber = fBeginColumnNumber;
+            while (scanAttribute(fAttributes, empty)) {
+                // do nothing
+            }
+            fBeginLineNumber = beginLineNumber;
+            fBeginColumnNumber = beginColumnNumber;
+            if (fByteStream != null && fElementDepth == -1) {
+                if (ename.equalsIgnoreCase("META")) {
+                    if (DEBUG_CHARSET) {
+                        System.out.println("+++ <META>");
+                    }
+                    String httpEquiv = getValue(fAttributes, "http-equiv");
+                    if (httpEquiv != null && httpEquiv.equalsIgnoreCase("content-type")) {
+                        if (DEBUG_CHARSET) {
+                            System.out.println("+++ @content-type: \""+httpEquiv+'"');
+                        }
+                        String content = getValue(fAttributes, "content");
+                        int index1 = content != null ? content.toLowerCase().indexOf("charset=") : -1;
+                        if (index1 != -1 && !fIgnoreSpecifiedCharset) {
+                            int index2 = content.indexOf(';', index1);
+                            String charset = index2 != -1 ? content.substring(index1+8, index2) : content.substring(index1+8);
+                            try {
+                                String ianaEncoding = charset;
+                                String javaEncoding = EncodingMap.getIANA2JavaMapping(ianaEncoding.toUpperCase());
+                                if (DEBUG_CHARSET) {
+                                    System.out.println("+++ ianaEncoding: "+ianaEncoding);
+                                    System.out.println("+++ javaEncoding: "+javaEncoding);
+                                }
+                                if (javaEncoding == null) {
+                                    javaEncoding = ianaEncoding;
+                                    if (fReportErrors) {
+                                        fErrorReporter.reportError("HTML1001", new Object[]{ianaEncoding});
+                                    }
+                                }
+                                fIso8859Encoding = ianaEncoding == null 
+                                                || ianaEncoding.toUpperCase().startsWith("ISO-8859")
+                                                || ianaEncoding.equalsIgnoreCase(fDefaultIANAEncoding);
+                                fCurrentEntity.stream = new InputStreamReader(fByteStream, javaEncoding);
+                                fByteStream.playback();
+                                fElementDepth = fElementCount;
+                                fElementCount = 0;
+                                fCurrentEntity.offset = fCurrentEntity.length = 0;
+                                fCurrentEntity.lineNumber = 1;
+                                fCurrentEntity.columnNumber = 1;
+                            }
+                            catch (UnsupportedEncodingException e) {
+                                if (fReportErrors) {
+                                    fErrorReporter.reportError("HTML1010", new Object[]{charset});
+                                }
+                                // NOTE: If the encoding change doesn't work, 
+                                //       then there's no point in continuing to 
+                                //       buffer the input stream.
+                                fByteStream.clear();
+                            }
+                        }
+                    }
+                }
+                else if (ename.equalsIgnoreCase("BODY")) {
+                    fByteStream.clear();
+                }
+                else {
+                     HTMLElements.Element element = HTMLElements.getElement(ename);
+                     if (element.parent != null && element.parent.length > 0) {
+                         if (element.parent[0].code == HTMLElements.BODY) {
+                             fByteStream.clear();
+                         }
+                     }
+                }
+            }
+            if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                fQName.setValues(null, ename, ename, null);
+                if (DEBUG_CALLBACKS) {
+                    System.out.println("startElement("+fQName+','+fAttributes+")");
+                }
+                fEndLineNumber = fCurrentEntity.lineNumber;
+                fEndColumnNumber = fCurrentEntity.columnNumber;
+                if (empty[0]) {
+                    fDocumentHandler.emptyElement(fQName, fAttributes, locationAugs());
+                }
+                else {
+                    fDocumentHandler.startElement(fQName, fAttributes, locationAugs());
+                }
+            }
+            return ename;
+        } // scanStartElement():ename
+
+        /** 
+         * Scans a real attribute. 
+         *
+         * @param attributes The list of attributes.
+         * @param empty      Is used for a second return value to indicate 
+         *                   whether the start element tag is empty 
+         *                   (e.g. "/&gt;").
+         */
+        protected boolean scanAttribute(XMLAttributesImpl attributes,
+                                        boolean[] empty)
+            throws IOException {
+            return scanAttribute(attributes,empty,'/');
+        } // scanAttribute(XMLAttributesImpl,boolean[]):boolean
+
+        /** 
+         * Scans a pseudo attribute. 
+         *
+         * @param attributes The list of attributes.
+         */
+        protected boolean scanPseudoAttribute(XMLAttributesImpl attributes)
+            throws IOException {
+            return scanAttribute(attributes,fSingleBoolean,'?');
+        } // scanPseudoAttribute(XMLAttributesImpl):boolean
+
+        /** 
+         * Scans an attribute, pseudo or real. 
+         *
+         * @param attributes The list of attributes.
+         * @param empty      Is used for a second return value to indicate 
+         *                   whether the start element tag is empty 
+         *                   (e.g. "/&gt;").
+         * @param endc       The end character that appears before the
+         *                   closing angle bracket ('>').
+         */
+        protected boolean scanAttribute(XMLAttributesImpl attributes,
+                                        boolean[] empty, char endc)
+            throws IOException {
+            boolean skippedSpaces = skipSpaces();
+            fBeginLineNumber = fCurrentEntity.lineNumber;
+            fBeginColumnNumber = fCurrentEntity.columnNumber;
+            int c = read();
+            if (c == -1) {
+                if (fReportErrors) {
+                    fErrorReporter.reportError("HTML1007", null);
+                }
+                throw new EOFException();
+            }
+            if (c == '>') {
+                return false;
+            }
+            fCurrentEntity.offset--;
+            fCurrentEntity.columnNumber--;
+            String aname = scanName();
+            if (aname == null) {
+                if (fReportErrors) {
+                    fErrorReporter.reportError("HTML1011", null);
+                }
+                empty[0] = skipMarkup(false);
+                return false;
+            }
+            if (!skippedSpaces && fReportErrors) {
+                fErrorReporter.reportError("HTML1013", new Object[] { aname });
+            }
+            aname = modifyName(aname, fNamesAttrs);
+            skipSpaces();
+            c = read();
+            if (c == -1) {
+                if (fReportErrors) {
+                    fErrorReporter.reportError("HTML1007", null);
+                }
+                throw new EOFException();
+            }
+            if (c == '/' || c == '>') {
+                fQName.setValues(null, aname, aname, null);
+                attributes.addAttribute(fQName, "CDATA", "");
+                attributes.setSpecified(attributes.getLength()-1, true);
+                if (fAugmentations) {
+                    addLocationItem(attributes, attributes.getLength() - 1);
+                }
+                if (c == '/') {
+                    fCurrentEntity.offset--;
+                    fCurrentEntity.columnNumber--;
+                    empty[0] = skipMarkup(false);
+                }
+                return false;
+            }
+            /***
+            // REVISIT: [Q] Why is this still here? -Ac
+            if (c == '/' || c == '>') {
+                if (c == '/') {
+                    fCurrentEntity.offset--;
+                    fCurrentEntity.columnNumber--;
+                    empty[0] = skipMarkup(false);
+                }
+                fQName.setValues(null, aname, aname, null);
+                attributes.addAttribute(fQName, "CDATA", "");
+                attributes.setSpecified(attributes.getLength()-1, true);
+                if (fAugmentations) {
+                    addLocationItem(attributes, attributes.getLength() - 1);
+                }
+                return false;
+            }
+            /***/
+            if (c == '=') {
+                skipSpaces();
+                c = read();
+                if (c == -1) {
+                    if (fReportErrors) {
+                        fErrorReporter.reportError("HTML1007", null);
+                    }
+                    throw new EOFException();
+                }
+                // Xiaowei/Ac: Fix for <a href=/cgi-bin/myscript>...</a>
+                if (c == '>') {
+                    fQName.setValues(null, aname, aname, null);
+                    attributes.addAttribute(fQName, "CDATA", "");
+                    attributes.setSpecified(attributes.getLength()-1, true);
+                    if (fAugmentations) {
+                        addLocationItem(attributes, attributes.getLength() - 1);
+                    }
+                    return false;
+                }
+                fStringBuffer.clear();
+                fNonNormAttr.clear();
+                if (c != '\'' && c != '"') {
+                    fCurrentEntity.offset--;
+                    fCurrentEntity.columnNumber--;
+                    while (true) {
+                        c = read();
+                        // Xiaowei/Ac: Fix for <a href=/broken/>...</a>
+                        if (Character.isSpace((char)c) || c == '>') {
+                            //fCharOffset--;
+                            fCurrentEntity.offset--;
+                            fCurrentEntity.columnNumber--;
+                            break;
+                        }
+                        if (c == -1) {
+                            if (fReportErrors) {
+                                fErrorReporter.reportError("HTML1007", null);
+                            }
+                            throw new EOFException();
+                        }
+                        if (c == '&') {
+                            int ce = scanEntityRef(fStringBuffer2, false);
+                            if (ce != -1) {
+                                fStringBuffer.append((char)ce);
+                            }
+                            else {
+                                fStringBuffer.append(fStringBuffer2);
+                            }
+                            fNonNormAttr.append(fStringBuffer2);
+                        }
+                        else {
+                            fStringBuffer.append((char)c);
+                            fNonNormAttr.append((char)c);
+                        }
+                    }
+                    fQName.setValues(null, aname, aname, null);
+                    String avalue = fStringBuffer.toString();
+                    attributes.addAttribute(fQName, "CDATA", avalue);
+
+                    int lastattr = attributes.getLength()-1;
+                    attributes.setSpecified(lastattr, true);
+                    attributes.setNonNormalizedValue(lastattr, fNonNormAttr.toString());
+                    if (fAugmentations) {
+                        addLocationItem(attributes, attributes.getLength() - 1);
+                    }
+                    return true;
+                }
+                char quote = (char)c;
+                do {
+                    c = read();
+                    if (c == -1) {
+                        if (fReportErrors) {
+                            fErrorReporter.reportError("HTML1007", null);
+                        }
+                        throw new EOFException();
+                    }
+                    if (c == '&') {
+                        int ce = scanEntityRef(fStringBuffer2, false);
+                        if (ce != -1) {
+                            fStringBuffer.append((char)ce);
+                        }
+                        else {
+                            fStringBuffer.append(fStringBuffer2);
+                        }
+                        fNonNormAttr.append(fStringBuffer2);
+                    }
+                    else if (c == '\t') {
+                        fStringBuffer.append(' ');
+                        fNonNormAttr.append('\t');
+                    }
+                    else if (c == '\r' || c == '\n') {
+                        fCurrentEntity.lineNumber++;
+                        fCurrentEntity.columnNumber = 0;
+                        if (c == '\r') {
+                            int c2 = read();
+                            if (c2 != '\n') {
+                                fCurrentEntity.offset--;
+                                fCurrentEntity.columnNumber--;
+                            }
+                            else {
+                                fNonNormAttr.append('\r');
+                                c = c2;
+                            }
+                        }
+                        fStringBuffer.append(' ');
+                        fNonNormAttr.append((char)c);
+                    }
+                    else if (c != quote) {
+                        fStringBuffer.append((char)c);
+                        fNonNormAttr.append((char)c);
+                    }
+                } while (c != quote);
+                fQName.setValues(null, aname, aname, null);
+                String avalue = fStringBuffer.toString();
+                attributes.addAttribute(fQName, "CDATA", avalue);
+
+                int lastattr = attributes.getLength()-1;
+                attributes.setSpecified(lastattr, true);
+                attributes.setNonNormalizedValue(lastattr, fNonNormAttr.toString());
+                if (fAugmentations) {
+                    addLocationItem(attributes, attributes.getLength() - 1);
+                }
+            }
+            else {
+                fQName.setValues(null, aname, aname, null);
+                attributes.addAttribute(fQName, "CDATA", "");
+                attributes.setSpecified(attributes.getLength()-1, true);
+                fCurrentEntity.offset--;
+                fCurrentEntity.columnNumber--;
+                if (fAugmentations) {
+                    addLocationItem(attributes, attributes.getLength() - 1);
+                }
+            }
+            return true;
+        } // scanAttribute(XMLAttributesImpl):boolean
+
+        /** Adds location augmentations to the specified attribute. */
+        protected void addLocationItem(XMLAttributes attributes, int index) {
+            fEndLineNumber = fCurrentEntity.lineNumber;
+            fEndColumnNumber = fCurrentEntity.columnNumber;
+            LocationItem locationItem = new LocationItem();
+            locationItem.setValues(fBeginLineNumber, fBeginColumnNumber,
+                                   fEndLineNumber, fEndColumnNumber);
+            Augmentations augs = attributes.getAugmentations(index);
+            augs.putItem(AUGMENTATIONS, locationItem);
+        } // addLocationItem(XMLAttributes,int)
+
+        /** Scans an end element. */
+        protected void scanEndElement() throws IOException {
+            String ename = scanName();
+            if (fReportErrors && ename == null) {
+                fErrorReporter.reportError("HTML1012", null);
+            }
+            skipMarkup(false);
+            if (ename != null) {
+                ename = modifyName(ename, fNamesElems);
+                if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                    fQName.setValues(null, ename, ename, null);
+                    if (DEBUG_CALLBACKS) {
+                        System.out.println("endElement("+fQName+")");
+                    }
+                    fEndLineNumber = fCurrentEntity.lineNumber;
+                    fEndColumnNumber = fCurrentEntity.columnNumber;
+                    fDocumentHandler.endElement(fQName, locationAugs());
+                }
+            }
+        } // scanEndElement()
+
+    } // class ContentScanner
+
+    /**
+     * Special scanner used for elements whose content needs to be scanned 
+     * as plain text, ignoring markup such as elements and entity references.
+     * For example: &lt;SCRIPT&gt; and &lt;COMMENT&gt;.
+     *
+     * @author Andy Clark
+     */
+    public class SpecialScanner
+        implements Scanner {
+
+        //
+        // Data
+        //
+
+        /** Name of element whose content needs to be scanned as text. */
+        protected String fElementName;
+
+        /** True if &lt;script&gt; element. */
+        protected boolean fScript;
+
+        /** True if &lt;style&gt; element. */
+        protected boolean fStyle;
+
+        /** True if &lt;textarea&gt; element. */
+        protected boolean fTextarea;
+
+        // temp vars
+
+        /** A qualified name. */
+        private final QName fQName = new QName();
+
+        /** A string buffer. */
+        private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
+
+        //
+        // Public methods
+        //
+
+        /** Sets the element name. */
+        public Scanner setElementName(String ename) {
+            fElementName = ename;
+            fScript = fElementName.equalsIgnoreCase("SCRIPT");
+            fStyle = fElementName.equalsIgnoreCase("STYLE");
+            fTextarea = fElementName.equalsIgnoreCase("TEXTAREA");
+            return this;
+        } // setElementName(String):Scanner
+
+        //
+        // Scanner methods
+        //
+
+        /** Scan. */
+        public boolean scan(boolean complete) throws IOException {
+            boolean next;
+            do {
+                try {
+                    next = false;
+                    int delimiter = -1;
+                    switch (fScannerState) {
+                        case STATE_CONTENT: {
+                            fBeginLineNumber = fCurrentEntity.lineNumber;
+                            fBeginColumnNumber = fCurrentEntity.columnNumber;
+                            int c = read();
+                            if (c == '<') {
+                                setScannerState(STATE_MARKUP_BRACKET);
+                                continue;
+                            }
+                            if (c == '&') {
+                                if (fTextarea) {
+                                    scanEntityRef(fStringBuffer, true);
+                                    continue;
+                                }
+                                fStringBuffer.clear();
+                                fStringBuffer.append('&');
+                            }
+                            else if (c == -1) {
+                                if (fReportErrors) {
+                                    fErrorReporter.reportError("HTML1007", null);
+                                }
+                                throw new EOFException();
+                            }
+                            else {
+                                fCurrentEntity.offset--;
+                                fCurrentEntity.columnNumber--;
+                                fStringBuffer.clear();
+                            }
+                            scanCharacters(fStringBuffer, -1);
+                            break;
+                        } // case STATE_CONTENT
+                        case STATE_MARKUP_BRACKET: {
+                            int c = read();
+                            if (c == '!') {
+                                if (skip("--", false)) {
+                                    fStringBuffer.clear();
+                                    boolean strip = (fScript && fScriptStripCommentDelims) ||
+                                                    (fStyle && fStyleStripCommentDelims);
+                                    if (strip) {
+                                        do {
+                                            c = read();
+                                            if (c == '\r' || c == '\n') {
+                                                fCurrentEntity.columnNumber--;
+                                                fCurrentEntity.offset--;
+                                                break;
+                                            }
+                                        } while (c != -1);
+                                        skipNewlines(1);
+                                        delimiter = '-';
+                                    }
+                                    else {
+                                        fStringBuffer.append("<!--");
+                                    }
+                                }
+                                else if (skip("[CDATA[", false)) {
+                                    fStringBuffer.clear();
+                                    boolean strip = (fScript && fScriptStripCDATADelims) ||
+                                                    (fStyle  && fStyleStripCDATADelims);
+                                    if (strip) {
+                                        do {
+                                            c = read();
+                                            if (c == '\r' || c == '\n') {
+                                                fCurrentEntity.columnNumber--;
+                                                fCurrentEntity.offset--;
+                                                break;
+                                            }
+                                        } while (c != -1);
+                                        skipNewlines(1);
+                                        delimiter = ']';
+                                    }
+                                    else {
+                                        fStringBuffer.append("<![CDATA[");
+                                    }
+                                }
+                            }
+                            else if (c == '/') {
+                                String ename = scanName();
+                                if (ename != null) {
+                                    if (ename.equalsIgnoreCase(fElementName)) {
+                                        if (read() == '>') {
+                                            ename = modifyName(ename, fNamesElems);
+                                            if (fDocumentHandler != null && fElementCount >= fElementDepth) {
+                                                fQName.setValues(null, ename, ename, null);
+                                                if (DEBUG_CALLBACKS) {
+                                                    System.out.println("endElement("+fQName+")");
+                                                }
+                                                fEndLineNumber = fCurrentEntity.lineNumber;
+                                                fEndColumnNumber = fCurrentEntity.columnNumber;
+                                                fDocumentHandler.endElement(fQName, locationAugs());
+                                            }
+                                            setScanner(fContentScanner);
+                                            setScannerState(STATE_CONTENT);
+                                            return true;
+                                        }
+                                        else {
+                                            fCurrentEntity.offset--;
+                                            fCurrentEntity.columnNumber--;
+                                        }
+                                    }
+                                    fStringBuffer.clear();
+                                    fStringBuffer.append("</");
+                                    fStringBuffer.append(ename);
+                                }
+                                else {
+                                    fStringBuffer.clear();
+                                    fStringBuffer.append("</");
+                                }
+                            }
+                            else {
+                                fStringBuffer.clear();
+                                fStringBuffer.append('<');
+                                fStringBuffer.append((char)c);
+                            }
+                            scanCharacters(fStringBuffer, delimiter);
+                            setScannerState(STATE_CONTENT);
+                            break;
+                        } // case STATE_MARKUP_BRACKET
+                    } // switch
+                } // try
+                catch (EOFException e) {
+                    setScanner(fContentScanner);
+                    if (fCurrentEntityStack.empty()) {
+                        setScannerState(STATE_END_DOCUMENT);
+                    }
+                    else {
+                        fCurrentEntity = (CurrentEntity)fCurrentEntityStack.pop();
+                        setScannerState(STATE_CONTENT);
+                    }
+                    return true;
+                }
+            } // do
+            while (next || complete);
+            return true;
+        } // scan(boolean):boolean
+
+        //
+        // Protected methods
+        //
+
+        /** Scan characters. */
+        protected void scanCharacters(XMLStringBuffer buffer,
+                                      int delimiter) throws IOException {
+            if (DEBUG_BUFFER) {
+                System.out.print("(scanCharacters, delimiter="+delimiter+": ");
+                printBuffer();
+                System.out.println();
+            }
+            boolean strip = (fScript && fScriptStripCommentDelims) ||
+                            (fScript && fScriptStripCDATADelims) ||
+                            (fStyle  && fStyleStripCommentDelims) ||
+                            (fStyle  && fStyleStripCDATADelims);
+            while (true) {
+                int c = read();
+                if (c == -1 || (delimiter == -1 && (c == '<' || c == '&'))) {
+                    if (c != -1) {
+                        fCurrentEntity.offset--;
+                        fCurrentEntity.columnNumber--;
+                    }
+                    break;
+                }
+                // Patch supplied by Jonathan Baxter
+                else if (c == '\r' || c == '\n') {
+                    fCurrentEntity.offset--;
+                    fCurrentEntity.columnNumber--;
+                    int newlines = skipNewlines();
+                    for (int i = 0; i < newlines; i++) {
+                        buffer.append('\n');
+                    }
+                }
+                else if (delimiter != -1 && c == (char)delimiter) {
+                    int count = 0;
+                    do {
+                        count++;
+                        c = read();
+                    } while (c == (char)delimiter);
+                    for (int i = strip && c == '>' ? 2 : 0; i < count; i++) {
+                        buffer.append((char)delimiter);
+                    }
+                    if (c == -1 || (count >= 2 && c == '>')) {
+                        if (!strip) {
+                            buffer.append((char)c);
+                        }
+                        break;
+                    }
+                    fCurrentEntity.offset--;
+                    fCurrentEntity.columnNumber--;
+                }
+                else {
+                    buffer.append((char)c);
+                    if (c == '\n') {
+                        fCurrentEntity.columnNumber = 1;
+                        fCurrentEntity.lineNumber++;
+                    }
+                }
+            }
+            if (buffer.length > 0 && fDocumentHandler != null && fElementCount >= fElementDepth) {
+                if (DEBUG_CALLBACKS) {
+                    System.out.println("characters("+buffer+")");
+                }
+                fEndLineNumber = fCurrentEntity.lineNumber;
+                fEndColumnNumber = fCurrentEntity.columnNumber;
+                fDocumentHandler.characters(buffer, locationAugs());
+            }
+            if (DEBUG_BUFFER) {
+                System.out.print(")scanCharacters: ");
+                printBuffer();
+                System.out.println();
+            }
+        } // scanCharacters(StringBuffer)
+
+    } // class SpecialScanner
+
+    /**
+     * A playback input stream. This class has the ability to save the bytes
+     * read from the underlying input stream and play the bytes back later.
+     * This class is used by the HTML scanner to switch encodings when a 
+     * &lt;meta&gt; tag is detected that specifies a different encoding. 
+     * <p>
+     * If the encoding is changed, then the scanner calls the 
+     * <code>playback</code> method and re-scans the beginning of the HTML
+     * document again. This should not be too much of a performance problem
+     * because the &lt;meta&gt; tag appears at the beginning of the document.
+     * <p>
+     * If the &lt;body&gt; tag is reached without playing back the bytes,
+     * then the buffer can be cleared by calling the <code>clear</code>
+     * method. This stops the buffering of bytes and allows the memory used
+     * by the buffer to be reclaimed. 
+     * <p>
+     * <strong>Note:</strong> 
+     * If the buffer is never played back or cleared, this input stream
+     * will continue to buffer the entire stream. Therefore, it is very
+     * important to use this stream correctly.
+     *
+     * @author Andy Clark
+     */
+    public static class PlaybackInputStream
+        extends FilterInputStream {
+
+        //
+        // Constants
+        //
+
+        /** Set to true to debug playback. */
+        private static final boolean DEBUG_PLAYBACK = false;
+
+        //
+        // Data
+        //
+
+        // state
+
+        /** Playback mode. */
+        protected boolean fPlayback = false;
+
+        /** Buffer cleared. */
+        protected boolean fCleared = false;
+
+        /** Encoding detected. */
+        protected boolean fDetected = false;
+
+        // buffer info
+
+        /** Byte buffer. */
+        protected byte[] fByteBuffer = new byte[1024];
+
+        /** Offset into byte buffer during playback. */
+        protected int fByteOffset = 0;
+
+        /** Length of bytes read into byte buffer. */
+        protected int fByteLength = 0;
+
+        /** Pushback offset. */
+        public int fPushbackOffset = 0;
+
+        /** Pushback length. */
+        public int fPushbackLength = 0;
+
+        //
+        // Constructors
+        //
+
+        /** Constructor. */
+        public PlaybackInputStream(InputStream in) {
+            super(in);
+        } // <init>(InputStream)
+
+        //
+        // Public methods
+        //
+
+        /** Detect encoding. */
+        public void detectEncoding(String[] encodings) throws IOException {
+            if (fDetected) {
+                throw new IOException("Should not detect encoding twice.");
+            }
+            fDetected = true;
+            int b1 = read();
+            if (b1 == -1) {
+                return;
+            }
+            int b2 = read();
+            if (b2 == -1) {
+                fPushbackLength = 1;
+                return;
+            }
+            // UTF-8 BOM: 0xEFBBBF
+            if (b1 == 0xEF && b2 == 0xBB) {
+                int b3 = read();
+                if (b3 == 0xBF) {
+                    fPushbackOffset = 3;
+                    encodings[0] = "UTF-8";
+                    encodings[1] = "UTF8";
+                    return;
+                }
+                fPushbackLength = 3;
+            }
+            // UTF-16 LE BOM: 0xFFFE
+            if (b1 == 0xFF && b2 == 0xFE) {
+                encodings[0] = "UTF-16";
+                encodings[1] = "UnicodeLittleUnmarked";
+                return;
+            }
+            // UTF-16 BE BOM: 0xFEFF
+            else if (b1 == 0xFE && b2 == 0xFF) {
+                encodings[0] = "UTF-16";
+                encodings[1] = "UnicodeBigUnmarked";
+                return;
+            }
+            // unknown
+            fPushbackLength = 2;
+        } // detectEncoding()
+
+        /** Playback buffer contents. */
+        public void playback() {
+            fPlayback = true;
+        } // playback()
+
+        /** 
+         * Clears the buffer.
+         * <p>
+         * <strong>Note:</strong>
+         * The buffer cannot be cleared during playback. Therefore, calling
+         * this method during playback will not do anything. However, the
+         * buffer will be cleared automatically at the end of playback.
+         */
+        public void clear() {
+            if (!fPlayback) {
+                fCleared = true;
+                fByteBuffer = null;
+            }
+        } // clear()
+
+        //
+        // InputStream methods
+        //
+
+        /** Read a byte. */
+        public int read() throws IOException {
+            if (DEBUG_PLAYBACK) {
+                System.out.println("(read");
+            }
+            if (fPushbackOffset < fPushbackLength) {
+                return fByteBuffer[fPushbackOffset++];
+            }
+            if (fCleared) {
+                return in.read();
+            }
+            if (fPlayback) {
+                int c = fByteBuffer[fByteOffset++];
+                if (fByteOffset == fByteLength) {
+                    fCleared = true;
+                    fByteBuffer = null;
+                }
+                if (DEBUG_PLAYBACK) {
+                    System.out.println(")read -> "+(char)c);
+                }
+                return c;
+            }
+            int c = in.read();
+            if (c != -1) {
+                if (fByteLength == fByteBuffer.length) {
+                    byte[] newarray = new byte[fByteLength + 1024];
+                    System.arraycopy(fByteBuffer, 0, newarray, 0, fByteLength);
+                    fByteBuffer = newarray;
+                }
+                fByteBuffer[fByteLength++] = (byte)c;
+            }
+            if (DEBUG_PLAYBACK) {
+                System.out.println(")read -> "+(char)c);
+            }
+            return c;
+        } // read():int
+
+        /** Read an array of bytes. */
+        public int read(byte[] array) throws IOException {
+            return read(array, 0, array.length);
+        } // read(byte[]):int
+
+        /** Read an array of bytes. */
+        public int read(byte[] array, int offset, int length) throws IOException {
+            if (DEBUG_PLAYBACK) {
+                System.out.println(")read("+offset+','+length+')');
+            }
+            if (fPushbackOffset < fPushbackLength) {
+                int count = fPushbackLength - fPushbackOffset;
+                if (count > length) {
+                    count = length;
+                }
+                System.arraycopy(fByteBuffer, fPushbackOffset, array, offset, count);
+                fPushbackOffset += count;
+                return count;
+            }
+            if (fCleared) {
+                return in.read(array, offset, length);
+            }
+            if (fPlayback) {
+                if (fByteOffset + length > fByteLength) {
+                    length = fByteLength - fByteOffset;
+                }
+                System.arraycopy(fByteBuffer, fByteOffset, array, offset, length);
+                fByteOffset += length;
+                if (fByteOffset == fByteLength) {
+                    fCleared = true;
+                    fByteBuffer = null;
+                }
+                return length;
+            }
+            int count = in.read(array, offset, length);
+            if (count != -1) {
+                if (fByteLength + count > fByteBuffer.length) {
+                    byte[] newarray = new byte[fByteLength + count + 512];
+                    System.arraycopy(fByteBuffer, 0, newarray, 0, fByteLength);
+                    fByteBuffer = newarray;
+                }
+                System.arraycopy(array, offset, fByteBuffer, fByteLength, count);
+                fByteLength += count;
+            }
+            if (DEBUG_PLAYBACK) {
+                System.out.println(")read("+offset+','+length+") -> "+count);
+            }
+            return count;
+        } // read(byte[]):int
+
+    } // class PlaybackInputStream
+
+    /**
+     * Location infoset item. 
+     *
+     * @author Andy Clark
+     */
+    protected static class LocationItem 
+        implements HTMLEventInfo {
+
+        //
+        // Data
+        //
+
+        /** Beginning line number. */
+        protected int fBeginLineNumber;
+
+        /** Beginning column number. */
+        protected int fBeginColumnNumber;
+
+        /** Ending line number. */
+        protected int fEndLineNumber;
+
+        /** Ending column number. */
+        protected int fEndColumnNumber;
+
+        //
+        // Public methods
+        //
+
+        /** Sets the values of this item. */
+        public void setValues(int beginLine, int beginColumn,
+                              int endLine, int endColumn) {
+            fBeginLineNumber = beginLine;
+            fBeginColumnNumber = beginColumn;
+            fEndLineNumber = endLine;
+            fEndColumnNumber = endColumn;
+        } // setValues(int,int,int,int)
+
+        //
+        // HTMLEventInfo methods
+        //
+
+        // location information
+
+        /** Returns the line number of the beginning of this event.*/
+        public int getBeginLineNumber() {
+            return fBeginLineNumber;
+        } // getBeginLineNumber():int
+
+        /** Returns the column number of the beginning of this event.*/
+        public int getBeginColumnNumber() { 
+            return fBeginColumnNumber;
+        } // getBeginColumnNumber():int
+
+        /** Returns the line number of the end of this event.*/
+        public int getEndLineNumber() {
+            return fEndLineNumber;
+        } // getEndLineNumber():int
+
+        /** Returns the column number of the end of this event.*/
+        public int getEndColumnNumber() {
+            return fEndColumnNumber;
+        } // getEndColumnNumber():int
+
+        // other information
+
+        /** Returns true if this corresponding event was synthesized. */
+        public boolean isSynthesized() {
+            return false;
+        } // isSynthesize():boolean
+
+        //
+        // Object methods
+        //
+
+        /** Returns a string representation of this object. */
+        public String toString() {
+            StringBuffer str = new StringBuffer();
+            str.append(fBeginLineNumber);
+            str.append(':');
+            str.append(fBeginColumnNumber);
+            str.append(':');
+            str.append(fEndLineNumber);
+            str.append(':');
+            str.append(fEndColumnNumber);
+            return str.toString();
+        } // toString():String
+
+    } // class LocationItem
+
+} // class HTMLScanner

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLTagBalancer.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLTagBalancer.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/HTMLTagBalancer.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,1234 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+import org.apache.xerces.util.XMLAttributesImpl;
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLDocumentHandler;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLResourceIdentifier;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLComponentManager;
+import org.apache.xerces.xni.parser.XMLConfigurationException;
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.apache.xerces.xni.parser.XMLDocumentSource;
+                      
+/**
+ * Balances tags in an HTML document. This component receives document events
+ * and tries to correct many common mistakes that human (and computer) HTML
+ * document authors make. This tag balancer can:
+ * <ul>
+ * <li>add missing parent elements;
+ * <li>automatically close elements with optional end tags; and
+ * <li>handle mis-matched inline element tags.
+ * </ul>
+ * <p>
+ * This component recognizes the following features:
+ * <ul>
+ * <li>http://cyberneko.org/html/features/augmentations
+ * <li>http://cyberneko.org/html/features/report-errors
+ * <li>http://cyberneko.org/html/features/balance-tags/document-fragment
+ * <li>http://cyberneko.org/html/features/balance-tags/ignore-outside-content
+ * </ul>
+ * <p>
+ * This component recognizes the following properties:
+ * <ul>
+ * <li>http://cyberneko.org/html/properties/names/elems
+ * <li>http://cyberneko.org/html/properties/names/attrs
+ * <li>http://cyberneko.org/html/properties/error-reporter
+ * </ul>
+ *
+ * @see HTMLElements
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLTagBalancer.java,v 1.20 2005/02/14 04:06:22 andyc Exp $
+ */
+public class HTMLTagBalancer
+    implements XMLDocumentFilter, HTMLComponent {
+
+    //
+    // Constants
+    //
+
+    // features
+
+    /** Namespaces. */
+    protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces";
+
+    /** Include infoset augmentations. */
+    protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
+
+    /** Report errors. */
+    protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
+
+    /** Document fragment balancing only (deprecated). */
+    protected static final String DOCUMENT_FRAGMENT_DEPRECATED = "http://cyberneko.org/html/features/document-fragment";
+
+    /** Document fragment balancing only. */
+    protected static final String DOCUMENT_FRAGMENT = "http://cyberneko.org/html/features/balance-tags/document-fragment";
+
+    /** Ignore outside content. */
+    protected static final String IGNORE_OUTSIDE_CONTENT = "http://cyberneko.org/html/features/balance-tags/ignore-outside-content";
+
+    /** Recognized features. */
+    private static final String[] RECOGNIZED_FEATURES = {
+        NAMESPACES,
+        AUGMENTATIONS,
+        REPORT_ERRORS,
+        DOCUMENT_FRAGMENT_DEPRECATED,
+        DOCUMENT_FRAGMENT,
+        IGNORE_OUTSIDE_CONTENT,
+    };
+
+    /** Recognized features defaults. */
+    private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = {
+        null,
+        null,
+        null,
+        null,
+        Boolean.FALSE,
+        Boolean.FALSE,
+    };
+
+    // properties
+
+    /** Modify HTML element names: { "upper", "lower", "default" }. */
+    protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
+
+    /** Modify HTML attribute names: { "upper", "lower", "default" }. */
+    protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
+    
+    /** Error reporter. */
+    protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
+
+    /** Recognized properties. */
+    private static final String[] RECOGNIZED_PROPERTIES = {
+        NAMES_ELEMS,
+        NAMES_ATTRS,
+        ERROR_REPORTER,
+    };
+
+    /** Recognized properties defaults. */
+    private static final Object[] RECOGNIZED_PROPERTIES_DEFAULTS = {
+        null,
+        null,
+        null,
+    };
+
+    // modify HTML names
+
+    /** Don't modify HTML names. */
+    protected static final short NAMES_NO_CHANGE = 0;
+
+    /** Match HTML element names. */
+    protected static final short NAMES_MATCH = 0;
+
+    /** Uppercase HTML names. */
+    protected static final short NAMES_UPPERCASE = 1;
+
+    /** Lowercase HTML names. */
+    protected static final short NAMES_LOWERCASE = 2;
+
+    // static vars
+
+    /** Synthesized event info item. */
+    protected static final HTMLEventInfo SYNTHESIZED_ITEM = 
+        new HTMLEventInfo.SynthesizedItem();
+
+    //
+    // Data
+    //
+
+    // features
+
+    /** Namespaces. */
+    protected boolean fNamespaces;
+
+    /** Include infoset augmentations. */
+    protected boolean fAugmentations;
+    
+    /** Report errors. */
+    protected boolean fReportErrors;
+
+    /** Document fragment balancing only. */
+    protected boolean fDocumentFragment;
+
+    /** Ignore outside content. */
+    protected boolean fIgnoreOutsideContent;
+
+    // properties
+
+    /** Modify HTML element names. */
+    protected short fNamesElems;
+
+    /** Modify HTML attribute names. */
+    protected short fNamesAttrs;
+
+    /** Error reporter. */
+    protected HTMLErrorReporter fErrorReporter;
+
+    // connections
+
+    /** The document source. */
+    protected XMLDocumentSource fDocumentSource;
+
+    /** The document handler. */
+    protected XMLDocumentHandler fDocumentHandler;
+
+    // state
+
+    /** The element stack. */
+    protected final InfoStack fElementStack = new InfoStack();
+
+    /** The inline stack. */
+    protected final InfoStack fInlineStack = new InfoStack();
+
+    /** True if seen anything. Important for xml declaration. */
+    protected boolean fSeenAnything;
+
+    /** True if root element has been seen. */
+    protected boolean fSeenDoctype;
+
+    /** True if root element has been seen. */
+    protected boolean fSeenRootElement;
+
+    /** 
+     * True if seen the end of the document element. In other words, 
+     * this variable is set to false <em>until</em> the end &lt;/HTML&gt; 
+     * tag is seen (or synthesized). This is used to ensure that 
+     * extraneous events after the end of the document element do not 
+     * make the document stream ill-formed.
+     */
+    protected boolean fSeenRootElementEnd;
+
+    /** True if seen &lt;head&lt; element. */
+    protected boolean fSeenHeadElement;
+
+    /** True if seen &lt;body&lt; element. */
+    protected boolean fSeenBodyElement;
+
+    // temp vars
+
+    /** A qualified name. */
+    private final QName fQName = new QName();
+
+    /** Empty attributes. */
+    private final XMLAttributes fEmptyAttrs = new XMLAttributesImpl();
+
+    /** Augmentations. */
+    private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations();
+
+    //
+    // HTMLComponent methods
+    //
+
+    /** Returns the default state for a feature. */
+    public Boolean getFeatureDefault(String featureId) {
+        int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0;
+        for (int i = 0; i < length; i++) {
+            if (RECOGNIZED_FEATURES[i].equals(featureId)) {
+                return RECOGNIZED_FEATURES_DEFAULTS[i];
+            }
+        }
+        return null;
+    } // getFeatureDefault(String):Boolean
+
+    /** Returns the default state for a property. */
+    public Object getPropertyDefault(String propertyId) {
+        int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0;
+        for (int i = 0; i < length; i++) {
+            if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
+                return RECOGNIZED_PROPERTIES_DEFAULTS[i];
+            }
+        }
+        return null;
+    } // getPropertyDefault(String):Object
+
+    //
+    // XMLComponent methods
+    //
+
+    /** Returns recognized features. */
+    public String[] getRecognizedFeatures() {
+        return RECOGNIZED_FEATURES;
+    } // getRecognizedFeatures():String[]
+
+    /** Returns recognized properties. */
+    public String[] getRecognizedProperties() {
+        return RECOGNIZED_PROPERTIES;
+    } // getRecognizedProperties():String[]
+
+    /** Resets the component. */
+    public void reset(XMLComponentManager manager)
+        throws XMLConfigurationException {
+
+        // get features
+        fNamespaces = manager.getFeature(NAMESPACES);
+        fAugmentations = manager.getFeature(AUGMENTATIONS);
+        fReportErrors = manager.getFeature(REPORT_ERRORS);
+        fDocumentFragment = manager.getFeature(DOCUMENT_FRAGMENT) ||
+                            manager.getFeature(DOCUMENT_FRAGMENT_DEPRECATED);
+        fIgnoreOutsideContent = manager.getFeature(IGNORE_OUTSIDE_CONTENT);
+
+        // get properties
+        fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS)));
+        fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS)));
+        fErrorReporter = (HTMLErrorReporter)manager.getProperty(ERROR_REPORTER);
+
+    } // reset(XMLComponentManager)
+
+    /** Sets a feature. */
+    public void setFeature(String featureId, boolean state)
+        throws XMLConfigurationException {
+
+        if (featureId.equals(AUGMENTATIONS)) {
+            fAugmentations = state;
+            return;
+        }
+        if (featureId.equals(REPORT_ERRORS)) {
+            fReportErrors = state;
+            return;
+        }
+        if (featureId.equals(IGNORE_OUTSIDE_CONTENT)) {
+            fIgnoreOutsideContent = state;
+            return;
+        }
+
+    } // setFeature(String,boolean)
+
+    /** Sets a property. */
+    public void setProperty(String propertyId, Object value)
+        throws XMLConfigurationException {
+    
+        if (propertyId.equals(NAMES_ELEMS)) {
+            fNamesElems = getNamesValue(String.valueOf(value));
+            return;
+        }
+
+        if (propertyId.equals(NAMES_ATTRS)) {
+            fNamesAttrs = getNamesValue(String.valueOf(value));
+            return;
+        }
+
+    } // setProperty(String,Object)
+
+    //
+    // XMLDocumentSource methods
+    //
+
+    /** Sets the document handler. */
+    public void setDocumentHandler(XMLDocumentHandler handler) {
+        fDocumentHandler = handler;
+    } // setDocumentHandler(XMLDocumentHandler)
+
+    // @since Xerces 2.1.0
+
+    /** Returns the document handler. */
+    public XMLDocumentHandler getDocumentHandler() {
+        return fDocumentHandler;
+    } // getDocumentHandler():XMLDocumentHandler
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    // since Xerces-J 2.2.0
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, 
+                              NamespaceContext nscontext, Augmentations augs) 
+        throws XNIException {
+
+        // reset state
+        fElementStack.top = 0;
+        fSeenAnything = false;
+        fSeenDoctype = false;
+        fSeenRootElement = false;
+        fSeenRootElementEnd = false;
+        fSeenHeadElement = false;
+        fSeenBodyElement = false;
+
+        // pass on event
+        if (fDocumentHandler != null) {
+            try {
+                // NOTE: Hack to allow the default filter to work with
+                //       old and new versions of the XNI document handler
+                //       interface. -Ac
+                Class cls = fDocumentHandler.getClass();
+                Class[] types = {
+                    XMLLocator.class, String.class,
+                    NamespaceContext.class, Augmentations.class
+                };
+                Method method = cls.getMethod("startDocument", types);
+                Object[] params = {
+                    locator, encoding, 
+                    nscontext, augs
+                };
+                method.invoke(fDocumentHandler, params);
+            }
+            catch (IllegalAccessException e) {
+                throw new XNIException(e);
+            } 
+            catch (InvocationTargetException e) {
+                throw new XNIException(e);                
+            } 
+            catch (NoSuchMethodException e) {
+                try {
+                    // NOTE: Hack to allow the default filter to work with
+                    //       old and new versions of the XNI document handler
+                    //       interface. -Ac
+                    Class cls = fDocumentHandler.getClass();
+                    Class[] types = {
+                        XMLLocator.class, String.class, Augmentations.class
+                    };
+                    Method method = cls.getMethod("startDocument", types);
+                    Object[] params = {
+                        locator, encoding, augs
+                    };
+                    method.invoke(fDocumentHandler, params);
+                }
+                catch (IllegalAccessException ex) {
+                    // NOTE: Should never reach here!
+                    throw new XNIException(ex);
+                } 
+                catch (InvocationTargetException ex) {
+                    // NOTE: Should never reach here!
+                    throw new XNIException(ex);                
+                } 
+                catch (NoSuchMethodException ex) {
+                    // NOTE: Should never reach here!
+                    throw new XNIException(ex);
+                }
+            }
+        }
+    
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    // old methods
+
+    /** XML declaration. */
+    public void xmlDecl(String version, String encoding, String standalone,
+                        Augmentations augs) throws XNIException {
+        if (!fSeenAnything && fDocumentHandler != null) {
+            fDocumentHandler.xmlDecl(version, encoding, standalone, augs);
+        }
+    } // xmlDecl(String,String,String,Augmentations)
+
+    /** Doctype declaration. */
+    public void doctypeDecl(String rootElementName, String publicId, String systemId,
+                            Augmentations augs) throws XNIException {
+        fSeenAnything = true;
+        if (fReportErrors) {
+            if (fSeenRootElement) {
+                fErrorReporter.reportError("HTML2010", null);
+            }
+            else if (fSeenDoctype) {
+                fErrorReporter.reportError("HTML2011", null);
+            }
+        }
+        if (!fSeenRootElement && !fSeenDoctype) {
+            fSeenDoctype = true;
+            if (fDocumentHandler != null) {
+                fDocumentHandler.doctypeDecl(rootElementName, publicId, systemId, augs);
+            }
+        }
+    } // doctypeDecl(String,String,String,Augmentations)
+
+    /** End document. */
+    public void endDocument(Augmentations augs) throws XNIException {
+
+        // handle empty document
+        if (!fSeenRootElement && !fDocumentFragment) {
+            if (fReportErrors) {
+                fErrorReporter.reportError("HTML2000", null);
+            }
+            String ename = modifyName("html", fNamesElems);
+            fQName.setValues(null, ename, ename, null);
+            if (fDocumentHandler != null) {
+                callStartElement(fQName, emptyAttributes(), synthesizedAugs());
+                callEndElement(fQName, synthesizedAugs());
+            }
+        }
+
+        // pop all remaining elements
+        else {
+            int length = fElementStack.top;
+            for (int i = 0; i < length; i++) {
+                Info info = fElementStack.pop();
+                if (fReportErrors) {
+                    String ename = info.qname.rawname;
+                    fErrorReporter.reportWarning("HTML2001", new Object[]{ename});
+                }
+                if (fDocumentHandler != null) {
+                    callEndElement(info.qname, synthesizedAugs());
+                }
+            }
+        }
+
+        // call handler
+        if (fDocumentHandler != null) {
+            fDocumentHandler.endDocument(augs);
+        }
+
+    } // endDocument(Augmentations)
+
+    /** Comment. */
+    public void comment(XMLString text, Augmentations augs) throws XNIException {
+        fSeenAnything = true;
+        if (fDocumentHandler != null) {
+            fDocumentHandler.comment(text, augs);
+        }
+    } // comment(XMLString,Augmentations)
+
+    /** Processing instruction. */
+    public void processingInstruction(String target, XMLString data,
+                                      Augmentations augs) throws XNIException {
+        fSeenAnything = true;
+        if (fDocumentHandler != null) {
+            fDocumentHandler.processingInstruction(target, data, augs);
+        }
+    } // processingInstruction(String,XMLString,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName elem, XMLAttributes attrs, Augmentations augs)
+        throws XNIException {
+        fSeenAnything = true;
+        
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // get element information
+        HTMLElements.Element element = getElement(elem.rawname);
+
+        // ignore multiple html, head, body elements
+        if (fSeenRootElement && element.code == HTMLElements.HTML) {
+            return;
+        }
+        if (element.code == HTMLElements.HEAD) {
+            if (fSeenHeadElement) {
+                return;
+            }
+            fSeenHeadElement = true;
+        }
+        if (element.code == HTMLElements.BODY) {
+            if (fSeenBodyElement) {
+                return;
+            }
+            fSeenBodyElement = true;
+        }
+
+        // check proper parent
+        if (element.parent != null) {
+            if (!fSeenRootElement && !fDocumentFragment) {
+                String pname = element.parent[0].name;
+                pname = modifyName(pname, fNamesElems);
+                if (fReportErrors) {
+                    String ename = elem.rawname;
+                    fErrorReporter.reportWarning("HTML2002", new Object[]{ename,pname});
+                }
+                QName qname = new QName(null, pname, pname, null);
+                startElement(qname, null, synthesizedAugs());
+            }
+            else {
+                HTMLElements.Element pelement = element.parent[0];
+                if (pelement.code != HTMLElements.HEAD || (!fSeenBodyElement && !fDocumentFragment)) {
+                    int depth = getParentDepth(element.parent, element.bounds);
+                    if (depth == -1) {
+                        String pname = pelement.name;
+                        pname = modifyName(pname, fNamesElems);
+                        int pdepth = getParentDepth(pelement.parent, pelement.bounds);
+                        if (pdepth != -1) {
+                            QName qname = new QName(null, pname, pname, null);
+                            if (fReportErrors) {
+                                String ename = elem.rawname;
+                                fErrorReporter.reportWarning("HTML2004", new Object[]{ename,pname});
+                            }
+                            startElement(qname, null, synthesizedAugs());
+                        }
+                    }
+                }
+            }
+        }
+
+        // if block element, save immediate parent inline elements
+        int depth = 0;
+        if (element.flags == 0) {
+            int length = fElementStack.top;
+            fInlineStack.top = 0;
+            for (int i = length - 1; i >= 0; i--) {
+                Info info = fElementStack.data[i];
+                if (!info.element.isInline()) {
+                    break;
+                }
+                fInlineStack.push(info);
+                endElement(info.qname, synthesizedAugs());
+            }
+            depth = fInlineStack.top;
+        }
+
+        // close previous elements
+        if (element.closes != null) {
+            int length = fElementStack.top;
+            for (int i = length - 1; i >= 0; i--) {
+                Info info = fElementStack.data[i];
+
+                // does it close the element we're looking at?
+                if (element.closes(info.element.code)) {
+                    if (fReportErrors) {
+                        String ename = elem.rawname;
+                        String iname = info.qname.rawname;
+                        fErrorReporter.reportWarning("HTML2005", new Object[]{ename,iname});
+                    }
+                    for (int j = length - 1; j >= i; j--) {
+                        info = fElementStack.pop();
+                        if (fDocumentHandler != null) {
+                            // PATCH: Marc-André Morissette
+                            callEndElement(info.qname, synthesizedAugs());
+                        }
+                    }
+                    length = i;
+                    continue;
+                }
+                
+                // should we stop searching?
+                boolean container = info.element.isContainer();
+                boolean parent = false;
+                if (!container) {
+                    for (int j = 0; j < element.parent.length; j++) {
+                        parent = parent || info.element.code == element.parent[j].code;
+                    }
+                }
+                if (container || parent) {
+                    break;
+                }
+            }
+        }
+
+        // call handler
+        fSeenRootElement = true;
+        if (element != null && element.isEmpty()) {
+            if (attrs == null) {
+                attrs = emptyAttributes();
+            }
+            if (fDocumentHandler != null) {
+                fDocumentHandler.emptyElement(elem, attrs, augs);
+            }
+        }
+        else {
+            boolean inline = element != null && element.isInline();
+            fElementStack.push(new Info(element, elem, inline ? attrs : null));
+            if (attrs == null) {
+                attrs = emptyAttributes();
+            }
+            if (fDocumentHandler != null) {
+                callStartElement(elem, attrs, augs);
+            }
+        }
+
+        // re-open inline elements
+        for (int i = 0; i < depth; i++) {
+            Info info = fInlineStack.pop();
+            startElement(info.qname, info.attributes, synthesizedAugs());
+        }
+
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName elem, XMLAttributes attrs, Augmentations augs)
+        throws XNIException {
+        startElement(elem, attrs, augs);
+        endElement(elem, augs);
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** Start entity. */
+    public void startGeneralEntity(String name, 
+                                   XMLResourceIdentifier id,
+                                   String encoding,
+                                   Augmentations augs) throws XNIException {
+        fSeenAnything = true;
+
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // insert body, if needed
+        if (!fDocumentFragment) {
+            boolean insertBody = !fSeenRootElement;
+            if (!insertBody) {
+                Info info = fElementStack.peek();
+                if (info.element.code == HTMLElements.HEAD ||
+                    info.element.code == HTMLElements.HTML) {
+                    String hname = modifyName("head", fNamesElems);
+                    String bname = modifyName("body", fNamesElems);
+                    if (fReportErrors) {
+                        fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname});
+                    }
+                    fQName.setValues(null, hname, hname, null);
+                    endElement(fQName, synthesizedAugs());
+                    insertBody = true;
+                }
+            }
+            if (insertBody) {
+                String ename = modifyName("body", fNamesElems);
+                fQName.setValues(null, ename, ename, null);
+                if (fReportErrors) {
+                    fErrorReporter.reportWarning("HTML2006", new Object[]{ename});
+                }
+                startElement(fQName, null, synthesizedAugs());
+            }
+        }
+        
+        // call handler
+        if (fDocumentHandler != null) {
+            fDocumentHandler.startGeneralEntity(name, id, encoding, augs);
+        }
+
+    } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
+
+    /** Text declaration. */
+    public void textDecl(String version, String encoding, Augmentations augs)
+        throws XNIException {
+        fSeenAnything = true;
+        
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // call handler
+        if (fDocumentHandler != null) {
+            fDocumentHandler.textDecl(version, encoding, augs);
+        }
+
+    } // textDecl(String,String,Augmentations)
+
+    /** End entity. */
+    public void endGeneralEntity(String name, Augmentations augs) throws XNIException {
+        
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // call handler
+        if (fDocumentHandler != null) {
+            fDocumentHandler.endGeneralEntity(name, augs);
+        }
+
+    } // endGeneralEntity(String,Augmentations)
+
+    /** Start CDATA section. */
+    public void startCDATA(Augmentations augs) throws XNIException {
+        fSeenAnything = true;
+        
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // call handler
+        if (fDocumentHandler != null) {
+            fDocumentHandler.startCDATA(augs);
+        }
+
+    } // startCDATA(Augmentations)
+
+    /** End CDATA section. */
+    public void endCDATA(Augmentations augs) throws XNIException {
+
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // call handler
+        if (fDocumentHandler != null) {
+            fDocumentHandler.endCDATA(augs);
+        }
+
+    } // endCDATA(Augmentations)
+
+    /** Characters. */
+    public void characters(XMLString text, Augmentations augs) throws XNIException {
+
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // is this text whitespace?
+        boolean whitespace = true;
+        for (int i = 0; i < text.length; i++) {
+            if (!Character.isWhitespace(text.ch[text.offset + i])) {
+                whitespace = false;
+                break;
+            }
+        }
+
+        if (!fDocumentFragment) {
+            // handle bare characters
+            if (!fSeenRootElement) {
+                if (whitespace) {
+                    return;
+                }
+                String ename = modifyName("body", fNamesElems);
+                fQName.setValues(null, ename, ename, null);
+                if (fReportErrors) {
+                    fErrorReporter.reportWarning("HTML2006", new Object[]{ename});
+                }
+                startElement(fQName, null, synthesizedAugs());
+            }
+
+            // handle character content in head
+            // NOTE: This fequently happens when the document looks like:
+            //       <title>Title</title>
+            //       And here's some text.
+            else if (!whitespace) {
+                Info info = fElementStack.peek();
+                if (info.element.code == HTMLElements.HEAD ||
+                    info.element.code == HTMLElements.HTML) {
+                    String hname = modifyName("head", fNamesElems);
+                    String bname = modifyName("body", fNamesElems);
+                    if (fReportErrors) {
+                        fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname});
+                    }
+                    fQName.setValues(null, hname, hname, null);
+                    endElement(fQName, synthesizedAugs());
+                    fQName.setValues(null, bname, bname, null);
+                    startElement(fQName, null, synthesizedAugs());
+                }
+            }
+        }
+
+        // call handler
+        if (fDocumentHandler != null) {
+            fDocumentHandler.characters(text, augs);
+        }
+
+    } // characters(XMLString,Augmentations)
+
+    /** Ignorable whitespace. */
+    public void ignorableWhitespace(XMLString text, Augmentations augs)
+        throws XNIException {
+        characters(text, augs);
+    } // ignorableWhitespace(XMLString,Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs) throws XNIException {
+        
+        // is there anything to do?
+        if (fSeenRootElementEnd) {
+            return;
+        }
+        
+        // get element information
+        HTMLElements.Element elem = getElement(element.rawname);
+
+        // do we ignore outside content?
+        if (!fIgnoreOutsideContent &&
+            (elem.code == HTMLElements.BODY || elem.code == HTMLElements.HTML)) {
+            return;
+        }
+
+        // check for end of document
+        if (elem.code == HTMLElements.HTML) {
+            fSeenRootElementEnd = true;
+        }
+
+        // empty element
+        int depth = getElementDepth(elem);
+        if (depth == -1 && elem.code == HTMLElements.P) {
+            startElement(element, emptyAttributes(), synthesizedAugs());
+            endElement(element, augs);
+            return;
+        }
+
+        // find unbalanced inline elements
+        if (depth > 1 && elem.isInline()) {
+            int size = fElementStack.top;
+            fInlineStack.top = 0;
+            for (int i = 0; i < depth - 1; i++) {
+                Info info = fElementStack.data[size - i - 1];
+                HTMLElements.Element pelem = info.element;
+                if (pelem.isInline()) {
+                    // NOTE: I don't have to make a copy of the info because
+                    //       it will just be popped off of the element stack
+                    //       as soon as we close it, anyway.
+                    fInlineStack.push(info);
+                }
+            }
+        }
+
+        // close children up to appropriate element
+        for (int i = 0; i < depth; i++) {
+            Info info = fElementStack.pop();
+            if (fReportErrors && i < depth - 1) {
+                String ename = modifyName(element.rawname, fNamesElems);
+                String iname = info.qname.rawname;
+                fErrorReporter.reportWarning("HTML2007", new Object[]{ename,iname});
+            }
+            if (fDocumentHandler != null) {
+                // PATCH: Marc-André Morissette
+                callEndElement(info.qname, i < depth - 1 ? synthesizedAugs() : augs);
+            }
+        }
+
+        // re-open inline elements
+        if (depth > 1) {
+            int size = fInlineStack.top;
+            for (int i = 0; i < size; i++) {
+                Info info = (Info)fInlineStack.pop();
+                XMLAttributes attributes = info.attributes;
+                if (fReportErrors) {
+                    String iname = info.qname.rawname;
+                    fErrorReporter.reportWarning("HTML2008", new Object[]{iname});
+                }
+                startElement(info.qname, attributes, synthesizedAugs());
+            }
+        }
+
+    } // endElement(QName,Augmentations)
+
+    // @since Xerces 2.1.0
+
+    /** Sets the document source. */
+    public void setDocumentSource(XMLDocumentSource source) {
+        fDocumentSource = source;
+    } // setDocumentSource(XMLDocumentSource)
+
+    /** Returns the document source. */
+    public XMLDocumentSource getDocumentSource() {
+        return fDocumentSource;
+    } // getDocumentSource():XMLDocumentSource
+
+    // removed since Xerces-J 2.3.0
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, Augmentations augs) 
+        throws XNIException {
+        startDocument(locator, encoding, null, augs);
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    /** Start prefix mapping. */
+    public void startPrefixMapping(String prefix, String uri, Augmentations augs)
+        throws XNIException {
+        
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // call handler
+        if (fDocumentHandler != null) {
+            Class cls = fDocumentHandler.getClass();
+            Class[] types = { String.class, String.class, Augmentations.class };
+            try {
+                Method method = cls.getMethod("startPrefixMapping", types);
+                Object[] args = { prefix, uri, augs };
+                method.invoke(fDocumentHandler, args);
+            }
+            catch (NoSuchMethodException e) {
+                // ignore
+            }
+            catch (IllegalAccessException e) {
+                // ignore
+            }
+            catch (InvocationTargetException e) {
+                // ignore
+            }
+        }
+    
+    } // startPrefixMapping(String,String,Augmentations)
+
+    /** End prefix mapping. */
+    public void endPrefixMapping(String prefix, Augmentations augs)
+        throws XNIException {
+        
+        // check for end of document
+        if (fSeenRootElementEnd) {
+            return;
+        }
+
+        // call handler
+        if (fDocumentHandler != null) {
+            Class cls = fDocumentHandler.getClass();
+            Class[] types = { String.class, Augmentations.class };
+            try {
+                Method method = cls.getMethod("endPrefixMapping", types);
+                Object[] args = { prefix, augs };
+                method.invoke(fDocumentHandler, args);
+            }
+            catch (NoSuchMethodException e) {
+                // ignore
+            }
+            catch (IllegalAccessException e) {
+                // ignore
+            }
+            catch (InvocationTargetException e) {
+                // ignore
+            }
+        }
+    
+    } // endPrefixMapping(String,Augmentations)
+
+    //
+    // Protected methods
+    //
+
+    /** Returns an HTML element. */
+    protected HTMLElements.Element getElement(String name) {
+        if (fNamespaces) {
+            int index = name.indexOf(':');
+            if (index != -1) {
+                name = name.substring(index+1);
+            }
+        }
+        return HTMLElements.getElement(name);
+    } // getElement(String):HTMLElements.Element
+
+    /** Call document handler start element. */
+    protected final void callStartElement(QName element, XMLAttributes attrs,
+                                          Augmentations augs) 
+        throws XNIException {
+        fDocumentHandler.startElement(element, attrs, augs);
+    } // callStartElement(QName,XMLAttributes,Augmentations)
+
+    /** Call document handler end element. */
+    protected final void callEndElement(QName element, Augmentations augs) 
+        throws XNIException {
+        fDocumentHandler.endElement(element, augs);
+    } // callEndElement(QName,Augmentations)
+
+    /**
+     * Returns the depth of the open tag associated with the specified
+     * element name or -1 if no matching element is found.
+     *
+     * @param element The element.
+     */
+    protected final int getElementDepth(HTMLElements.Element element) {
+        final boolean container = element.isContainer();
+        int depth = -1;
+        for (int i = fElementStack.top - 1; i >= 0; i--) {
+            Info info = fElementStack.data[i];
+            if (info.element.code == element.code) {
+                depth = fElementStack.top - i;
+                break;
+            }
+            if (!container && info.element.isBlock()) {
+                break;
+            }
+        }
+        return depth;
+    } // getElementDepth(HTMLElements.Element)
+
+    /**
+     * Returns the depth of the open tag associated with the specified
+     * element parent names or -1 if no matching element is found.
+     *
+     * @param parents The parent elements.
+     */
+    protected int getParentDepth(HTMLElements.Element[] parents, short bounds) {
+        if (parents != null) {
+            for (int i = fElementStack.top - 1; i >= 0; i--) {
+                Info info = fElementStack.data[i];
+                if (info.element.code == bounds) {
+                    break;
+                }
+                for (int j = 0; j < parents.length; j++) {
+                    if (info.element.code == parents[j].code) {
+                        return fElementStack.top - i;
+                    }
+                }
+            }
+        }
+        return -1;
+    } // getParentDepth(HTMLElements.Element[],short):int
+
+    /** Returns a set of empty attributes. */
+    protected final XMLAttributes emptyAttributes() {
+        fEmptyAttrs.removeAllAttributes();
+        return fEmptyAttrs;
+    } // emptyAttributes():XMLAttributes
+
+    /** Returns an augmentations object with a synthesized item added. */
+    protected final Augmentations synthesizedAugs() {
+        HTMLAugmentations augs = null;
+        if (fAugmentations) {
+            augs = fInfosetAugs;
+            augs.removeAllItems();
+            augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM);
+        }
+        return augs;
+    } // synthesizedAugs():Augmentations
+
+    //
+    // Protected static methods
+    //
+
+    /** Modifies the given name based on the specified mode. */
+    protected static final String modifyName(String name, short mode) {
+        switch (mode) {
+            case NAMES_UPPERCASE: return name.toUpperCase();
+            case NAMES_LOWERCASE: return name.toLowerCase();
+        }
+        return name;
+    } // modifyName(String,short):String
+
+    /**
+     * Converts HTML names string value to constant value. 
+     *
+     * @see #NAMES_NO_CHANGE
+     * @see #NAMES_LOWERCASE
+     * @see #NAMES_UPPERCASE
+     */
+    protected static final short getNamesValue(String value) {
+        if (value.equals("lower")) {
+            return NAMES_LOWERCASE;
+        }
+        if (value.equals("upper")) {
+            return NAMES_UPPERCASE;
+        }
+        return NAMES_NO_CHANGE;
+    } // getNamesValue(String):short
+
+    //
+    // Classes
+    //
+
+    /**
+     * Element info for each start element. This information is used when
+     * closing unbalanced inline elements. For example:
+     * <pre>
+     * &lt;i>unbalanced &lt;b>HTML&lt;/i> content&lt;/b>
+     * </pre>
+     * <p>
+     * It seems that it is a waste of processing and memory to copy the 
+     * attributes for every start element even if there are no unbalanced 
+     * inline elements in the document. However, if the attributes are
+     * <em>not</em> saved, then important attributes such as style
+     * information would be lost.
+     *
+     * @author Andy Clark
+     */
+    public static class Info {
+
+        //
+        // Data
+        //
+
+        /** The element. */
+        public HTMLElements.Element element;
+
+        /** The element qualified name. */
+        public QName qname;
+
+        /** The element attributes. */
+        public XMLAttributes attributes;
+
+        //
+        // Constructors
+        //
+
+        /**
+         * Creates an element information object.
+         * <p>
+         * <strong>Note:</strong>
+         * This constructor makes a copy of the element information.
+         *
+         * @param element The element qualified name.
+         */
+        public Info(HTMLElements.Element element, QName qname) {
+            this(element, qname, null);
+        } // <init>(HTMLElements.Element,QName)
+
+        /**
+         * Creates an element information object.
+         * <p>
+         * <strong>Note:</strong>
+         * This constructor makes a copy of the element information.
+         *
+         * @param element The element qualified name.
+         * @param attributes The element attributes.
+         */
+        public Info(HTMLElements.Element element,
+                    QName qname, XMLAttributes attributes) {
+            this.element = element;
+            this.qname = new QName(qname);
+            if (attributes != null) {
+                int length = attributes.getLength();
+                if (length > 0) {
+                    QName aqname = new QName();
+                    XMLAttributes newattrs = new XMLAttributesImpl();
+                    for (int i = 0; i < length; i++) {
+                        attributes.getName(i, aqname);
+                        String type = attributes.getType(i);
+                        String value = attributes.getValue(i);
+                        String nonNormalizedValue = attributes.getNonNormalizedValue(i);
+                        boolean specified = attributes.isSpecified(i);
+                        newattrs.addAttribute(aqname, type, value);
+                        newattrs.setNonNormalizedValue(i, nonNormalizedValue);
+                        newattrs.setSpecified(i, specified);
+                    }
+                    this.attributes = newattrs;
+                }
+            }
+        } // <init>(HTMLElements.Element,QName,XMLAttributes)
+
+    } // class Info
+
+    /** Unsynchronized stack of element information. */
+    public static class InfoStack {
+
+        //
+        // Data
+        //
+
+        /** The top of the stack. */
+        public int top;
+
+        /** The stack data. */
+        public Info[] data = new Info[10];
+
+        //
+        // Public methods
+        //
+
+        /** Pushes element information onto the stack. */
+        public void push(Info info) {
+            if (top == data.length) {
+                Info[] newarray = new Info[top + 10];
+                System.arraycopy(data, 0, newarray, 0, top);
+                data = newarray;
+            }
+            data[top++] = info;
+        } // push(Info)
+
+        /** Peeks at the top of the stack. */
+        public Info peek() {
+            return data[top-1];
+        } // peek():Info
+
+        /** Pops the top item off of the stack. */
+        public Info pop() {
+            return data[--top];
+        } // pop():Info
+
+    } // class InfoStack
+
+} // class HTMLTagBalancer

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/ObjectFactory.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/ObjectFactory.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/ObjectFactory.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,510 @@
+/*
+ * Copyright 2001-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.cyberneko.html;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.File;
+import java.io.FileInputStream;
+
+import java.util.Properties;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+/**
+ * This class is duplicated for each JAXP subpackage so keep it in sync.
+ * It is package private and therefore is not exposed as part of the JAXP
+ * API.
+ * <p>
+ * This code is designed to implement the JAXP 1.1 spec pluggability
+ * feature and is designed to run on JDK version 1.1 and
+ * later, and to compile on JDK 1.2 and onward.
+ * The code also runs both as part of an unbundled jar file and
+ * when bundled as part of the JDK.
+ * <p>
+ *
+ * @version $Id: ObjectFactory.java,v 1.1 2004/03/31 20:00:21 andyc Exp $
+ */
+class ObjectFactory {
+
+    //
+    // Constants
+    //
+
+    // name of default properties file to look for in JDK's jre/lib directory
+    private static final String DEFAULT_PROPERTIES_FILENAME = "xerces.properties";
+
+    /** Set to true for debugging */
+    private static final boolean DEBUG = false;
+    
+    /**
+     * Default columns per line.
+     */
+    private static final int DEFAULT_LINE_LENGTH = 80;
+
+    /** cache the contents of the xerces.properties file.
+     *  Until an attempt has been made to read this file, this will
+     * be null; if the file does not exist or we encounter some other error
+     * during the read, this will be empty.
+     */
+    private static Properties fXercesProperties = null;
+
+    /***
+     * Cache the time stamp of the xerces.properties file so
+     * that we know if it's been modified and can invalidate
+     * the cache when necessary.
+     */
+    private static long fLastModified = -1;
+
+    //
+    // static methods
+    //
+
+    /**
+     * Finds the implementation Class object in the specified order.  The
+     * specified order is the following:
+     * <ol>
+     *  <li>query the system property using <code>System.getProperty</code>
+     *  <li>read <code>META-INF/services/<i>factoryId</i></code> file
+     *  <li>use fallback classname
+     * </ol>
+     *
+     * @return Class object of factory, never null
+     *
+     * @param factoryId             Name of the factory to find, same as
+     *                              a property name
+     * @param fallbackClassName     Implementation class name, if nothing else
+     *                              is found.  Use null to mean no fallback.
+     *
+     * @exception ObjectFactory.ConfigurationError
+     */
+    static Object createObject(String factoryId, String fallbackClassName)
+        throws ConfigurationError {
+        return createObject(factoryId, null, fallbackClassName);
+    } // createObject(String,String):Object
+
+    /**
+     * Finds the implementation Class object in the specified order.  The
+     * specified order is the following:
+     * <ol>
+     *  <li>query the system property using <code>System.getProperty</code>
+     *  <li>read <code>$java.home/lib/<i>propertiesFilename</i></code> file
+     *  <li>read <code>META-INF/services/<i>factoryId</i></code> file
+     *  <li>use fallback classname
+     * </ol>
+     *
+     * @return Class object of factory, never null
+     *
+     * @param factoryId             Name of the factory to find, same as
+     *                              a property name
+     * @param propertiesFilename The filename in the $java.home/lib directory
+     *                           of the properties file.  If none specified,
+     *                           ${java.home}/lib/xerces.properties will be used.
+     * @param fallbackClassName     Implementation class name, if nothing else
+     *                              is found.  Use null to mean no fallback.
+     *
+     * @exception ObjectFactory.ConfigurationError
+     */
+    static Object createObject(String factoryId,
+                                      String propertiesFilename,
+                                      String fallbackClassName)
+        throws ConfigurationError
+    {
+        if (DEBUG) debugPrintln("debug is on");
+
+        SecuritySupport ss = SecuritySupport.getInstance();
+        ClassLoader cl = findClassLoader();
+
+        // Use the system property first
+        try {
+            String systemProp = ss.getSystemProperty(factoryId);
+            if (systemProp != null) {
+                if (DEBUG) debugPrintln("found system property, value=" + systemProp);
+                return newInstance(systemProp, cl, true);
+            }
+        } catch (SecurityException se) {
+            // Ignore and continue w/ next location
+        }
+
+        // Try to read from propertiesFilename, or $java.home/lib/xerces.properties
+        String factoryClassName = null;
+        // no properties file name specified; use $JAVA_HOME/lib/xerces.properties:
+        if (propertiesFilename == null) {
+            File propertiesFile = null;
+            boolean propertiesFileExists = false;
+            try {
+                String javah = ss.getSystemProperty("java.home");
+                propertiesFilename = javah + File.separator +
+                    "lib" + File.separator + DEFAULT_PROPERTIES_FILENAME;
+                propertiesFile = new File(propertiesFilename);
+                propertiesFileExists = ss.getFileExists(propertiesFile);
+            } catch (SecurityException e) {
+                // try again...
+                fLastModified = -1;
+                fXercesProperties = null;
+            }
+
+            synchronized (ObjectFactory.class) {
+                boolean loadProperties = false;
+                try {
+                    // file existed last time
+                    if(fLastModified >= 0) {
+                        if(propertiesFileExists &&
+                                (fLastModified < (fLastModified = ss.getLastModified(propertiesFile)))) {
+                            loadProperties = true;
+                        } else {
+                            // file has stopped existing...
+                            if(!propertiesFileExists) {
+                                fLastModified = -1;
+                                fXercesProperties = null;
+                            } // else, file wasn't modified!
+                        }
+                    } else {
+                        // file has started to exist:
+                        if(propertiesFileExists) {
+                            loadProperties = true;
+                            fLastModified = ss.getLastModified(propertiesFile);
+                        } // else, nothing's changed
+                    }
+                    if(loadProperties) {
+                        // must never have attempted to read xerces.properties before (or it's outdeated)
+                        fXercesProperties = new Properties();
+                        FileInputStream fis = ss.getFileInputStream(propertiesFile);
+                        fXercesProperties.load(fis);
+                        fis.close();
+                    }
+	            } catch (Exception x) {
+	                fXercesProperties = null;
+	                fLastModified = -1;
+                    // assert(x instanceof FileNotFoundException
+	                //        || x instanceof SecurityException)
+	                // In both cases, ignore and continue w/ next location
+	            }
+            }
+            if(fXercesProperties != null) {
+                factoryClassName = fXercesProperties.getProperty(factoryId);
+            }
+        } else {
+            try {
+                FileInputStream fis = ss.getFileInputStream(new File(propertiesFilename));
+                Properties props = new Properties();
+                props.load(fis);
+                fis.close();
+                factoryClassName = props.getProperty(factoryId);
+            } catch (Exception x) {
+                // assert(x instanceof FileNotFoundException
+                //        || x instanceof SecurityException)
+                // In both cases, ignore and continue w/ next location
+            }
+        }
+        if (factoryClassName != null) {
+            if (DEBUG) debugPrintln("found in " + propertiesFilename + ", value=" + factoryClassName);
+            return newInstance(factoryClassName, cl, true);
+        }
+
+        // Try Jar Service Provider Mechanism
+        Object provider = findJarServiceProvider(factoryId);
+        if (provider != null) {
+            return provider;
+        }
+
+        if (fallbackClassName == null) {
+            throw new ConfigurationError(
+                "Provider for " + factoryId + " cannot be found", null);
+        }
+
+        if (DEBUG) debugPrintln("using fallback, value=" + fallbackClassName);
+        return newInstance(fallbackClassName, cl, true);
+    } // createObject(String,String,String):Object
+
+    //
+    // Private static methods
+    //
+
+    /** Prints a message to standard error if debugging is enabled. */
+    private static void debugPrintln(String msg) {
+        if (DEBUG) {
+            System.err.println("JAXP: " + msg);
+        }
+    } // debugPrintln(String)
+
+    /**
+     * Figure out which ClassLoader to use.  For JDK 1.2 and later use
+     * the context ClassLoader.
+     */
+    static ClassLoader findClassLoader()
+        throws ConfigurationError
+    {
+        SecuritySupport ss = SecuritySupport.getInstance();
+
+        // Figure out which ClassLoader to use for loading the provider
+        // class.  If there is a Context ClassLoader then use it.
+        ClassLoader context = ss.getContextClassLoader();
+        ClassLoader system = ss.getSystemClassLoader();
+
+        ClassLoader chain = system;
+        while (true) {
+            if (context == chain) {
+                // Assert: we are on JDK 1.1 or we have no Context ClassLoader
+                // or any Context ClassLoader in chain of system classloader
+                // (including extension ClassLoader) so extend to widest
+                // ClassLoader (always look in system ClassLoader if Xerces
+                // is in boot/extension/system classpath and in current
+                // ClassLoader otherwise); normal classloaders delegate
+                // back to system ClassLoader first so this widening doesn't
+                // change the fact that context ClassLoader will be consulted
+                ClassLoader current = ObjectFactory.class.getClassLoader();
+
+                chain = system;
+                while (true) {
+                    if (current == chain) {
+                        // Assert: Current ClassLoader in chain of
+                        // boot/extension/system ClassLoaders
+                        return system;
+                    }
+                    if (chain == null) {
+                        break;
+                    }
+                    chain = ss.getParentClassLoader(chain);
+                }
+
+                // Assert: Current ClassLoader not in chain of
+                // boot/extension/system ClassLoaders
+                return current;
+            }
+
+            if (chain == null) {
+                // boot ClassLoader reached
+                break;
+            }
+
+            // Check for any extension ClassLoaders in chain up to
+            // boot ClassLoader
+            chain = ss.getParentClassLoader(chain);
+        };
+
+        // Assert: Context ClassLoader not in chain of
+        // boot/extension/system ClassLoaders
+        return context;
+    } // findClassLoader():ClassLoader
+
+    /**
+     * Create an instance of a class using the specified ClassLoader
+     */
+    static Object newInstance(String className, ClassLoader cl,
+                                      boolean doFallback)
+        throws ConfigurationError
+    {
+        // assert(className != null);
+        try{
+            Class providerClass = findProviderClass(className, cl, doFallback);
+            Object instance = providerClass.newInstance();
+            if (DEBUG) debugPrintln("created new instance of " + providerClass +
+                   " using ClassLoader: " + cl);
+            return instance;
+        } catch (ClassNotFoundException x) {
+            throw new ConfigurationError(
+                "Provider " + className + " not found", x);
+        } catch (Exception x) {
+            throw new ConfigurationError(
+                "Provider " + className + " could not be instantiated: " + x,
+                x);
+        }
+    }
+
+    /**
+     * Find a Class using the specified ClassLoader
+     */
+    static Class findProviderClass(String className, ClassLoader cl,
+                                      boolean doFallback)
+        throws ClassNotFoundException, ConfigurationError
+    {
+        //throw security exception if the calling thread is not allowed to access the package
+        //restrict the access to package as speicified in java.security policy
+        SecurityManager security = System.getSecurityManager();
+        try{
+            if (security != null) {
+                final int lastDot = className.lastIndexOf(".");
+                String packageName = className;
+                if (lastDot != -1) packageName = className.substring(0, lastDot);
+                security.checkPackageAccess(packageName);
+            }
+        }catch(SecurityException e){
+            throw e ;
+        }
+        Class providerClass;
+        if (cl == null) {
+            // XXX Use the bootstrap ClassLoader.  There is no way to
+            // load a class using the bootstrap ClassLoader that works
+            // in both JDK 1.1 and Java 2.  However, this should still
+            // work b/c the following should be true:
+            //
+            // (cl == null) iff current ClassLoader == null
+            //
+            // Thus Class.forName(String) will use the current
+            // ClassLoader which will be the bootstrap ClassLoader.
+            providerClass = Class.forName(className);
+        } else {
+            try {
+                providerClass = cl.loadClass(className);
+            } catch (ClassNotFoundException x) {
+                if (doFallback) {
+                    // Fall back to current classloader
+                    ClassLoader current = ObjectFactory.class.getClassLoader();
+                    if (current == null) {
+                        providerClass = Class.forName(className);
+                    } else if (cl != current) {
+                        cl = current;
+                        providerClass = cl.loadClass(className);
+                    } else {
+                        throw x;
+                    }
+                } else {
+                    throw x;
+                }
+            }
+        }
+
+        return providerClass;
+    }
+
+    /*
+     * Try to find provider using Jar Service Provider Mechanism
+     *
+     * @return instance of provider class if found or null
+     */
+    private static Object findJarServiceProvider(String factoryId)
+        throws ConfigurationError
+    {
+        SecuritySupport ss = SecuritySupport.getInstance();
+        String serviceId = "META-INF/services/" + factoryId;
+        InputStream is = null;
+
+        // First try the Context ClassLoader
+        ClassLoader cl = findClassLoader();
+
+        is = ss.getResourceAsStream(cl, serviceId);
+
+        // If no provider found then try the current ClassLoader
+        if (is == null) {
+            ClassLoader current = ObjectFactory.class.getClassLoader();
+            if (cl != current) {
+                cl = current;
+                is = ss.getResourceAsStream(cl, serviceId);
+            }
+        }
+
+        if (is == null) {
+            // No provider found
+            return null;
+        }
+
+        if (DEBUG) debugPrintln("found jar resource=" + serviceId +
+               " using ClassLoader: " + cl);
+
+        // Read the service provider name in UTF-8 as specified in
+        // the jar spec.  Unfortunately this fails in Microsoft
+        // VJ++, which does not implement the UTF-8
+        // encoding. Theoretically, we should simply let it fail in
+        // that case, since the JVM is obviously broken if it
+        // doesn't support such a basic standard.  But since there
+        // are still some users attempting to use VJ++ for
+        // development, we have dropped in a fallback which makes a
+        // second attempt using the platform's default encoding. In
+        // VJ++ this is apparently ASCII, which is a subset of
+        // UTF-8... and since the strings we'll be reading here are
+        // also primarily limited to the 7-bit ASCII range (at
+        // least, in English versions), this should work well
+        // enough to keep us on the air until we're ready to
+        // officially decommit from VJ++. [Edited comment from
+        // jkesselm]
+        BufferedReader rd;
+        try {
+            rd = new BufferedReader(new InputStreamReader(is, "UTF-8"), DEFAULT_LINE_LENGTH);
+        } catch (java.io.UnsupportedEncodingException e) {
+            rd = new BufferedReader(new InputStreamReader(is), DEFAULT_LINE_LENGTH);
+        }
+
+        String factoryClassName = null;
+        try {
+            // XXX Does not handle all possible input as specified by the
+            // Jar Service Provider specification
+            factoryClassName = rd.readLine();
+            rd.close();
+        } catch (IOException x) {
+            // No provider found
+            return null;
+        }
+
+        if (factoryClassName != null &&
+            ! "".equals(factoryClassName)) {
+            if (DEBUG) debugPrintln("found in resource, value="
+                   + factoryClassName);
+
+            // Note: here we do not want to fall back to the current
+            // ClassLoader because we want to avoid the case where the
+            // resource file was found using one ClassLoader and the
+            // provider class was instantiated using a different one.
+            return newInstance(factoryClassName, cl, false);
+        }
+
+        // No provider found
+        return null;
+    }
+
+    //
+    // Classes
+    //
+
+    /**
+     * A configuration error.
+     */
+    static class ConfigurationError
+        extends Error {
+
+        //
+        // Data
+        //
+
+        /** Exception. */
+        private Exception exception;
+
+        //
+        // Constructors
+        //
+
+        /**
+         * Construct a new instance with the specified detail string and
+         * exception.
+         */
+        ConfigurationError(String msg, Exception x) {
+            super(msg);
+            this.exception = x;
+        } // <init>(String,Exception)
+
+        //
+        // methods
+        //
+
+        /** Returns the exception associated to this error. */
+        Exception getException() {
+            return exception;
+        } // getException():Exception
+
+    } // class ConfigurationError
+
+} // class ObjectFactory

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/SecuritySupport.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/SecuritySupport.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/SecuritySupport.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2002,2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.cyberneko.html;
+
+import java.io.*;
+
+/**
+ * This class is duplicated for each JAXP subpackage so keep it in sync.
+ * It is package private and therefore is not exposed as part of the JAXP
+ * API.
+ *
+ * Base class with security related methods that work on JDK 1.1.
+ */
+class SecuritySupport {
+
+    /*
+     * Make this of type Object so that the verifier won't try to
+     * prove its type, thus possibly trying to load the SecuritySupport12
+     * class.
+     */
+    private static final Object securitySupport;
+
+    static {
+	SecuritySupport ss = null;
+	try {
+	    Class c = Class.forName("java.security.AccessController");
+	    // if that worked, we're on 1.2.
+	    /*
+	    // don't reference the class explicitly so it doesn't
+	    // get dragged in accidentally.
+	    c = Class.forName("javax.mail.SecuritySupport12");
+	    Constructor cons = c.getConstructor(new Class[] { });
+	    ss = (SecuritySupport)cons.newInstance(new Object[] { });
+	    */
+	    /*
+	     * Unfortunately, we can't load the class using reflection
+	     * because the class is package private.  And the class has
+	     * to be package private so the APIs aren't exposed to other
+	     * code that could use them to circumvent security.  Thus,
+	     * we accept the risk that the direct reference might fail
+	     * on some JDK 1.1 JVMs, even though we would never execute
+	     * this code in such a case.  Sigh...
+	     */
+	    ss = new SecuritySupport12();
+	} catch (Exception ex) {
+	    // ignore it
+	} finally {
+	    if (ss == null)
+		ss = new SecuritySupport();
+	    securitySupport = ss;
+	}
+    }
+
+    /**
+     * Return an appropriate instance of this class, depending on whether
+     * we're on a JDK 1.1 or J2SE 1.2 (or later) system.
+     */
+    static SecuritySupport getInstance() {
+	return (SecuritySupport)securitySupport;
+    }
+
+    ClassLoader getContextClassLoader() {
+	return null;
+    }
+
+    ClassLoader getSystemClassLoader() {
+        return null;
+    }
+
+    ClassLoader getParentClassLoader(ClassLoader cl) {
+        return null;
+    }
+
+    String getSystemProperty(String propName) {
+        return System.getProperty(propName);
+    }
+
+    FileInputStream getFileInputStream(File file)
+        throws FileNotFoundException
+    {
+        return new FileInputStream(file);
+    }
+
+    InputStream getResourceAsStream(ClassLoader cl, String name) {
+        InputStream ris;
+        if (cl == null) {
+            ris = ClassLoader.getSystemResourceAsStream(name);
+        } else {
+            ris = cl.getResourceAsStream(name);
+        }
+        return ris;
+    }
+
+    boolean getFileExists(File f) {
+        return f.exists();
+    }
+
+    long getLastModified(File f) {
+        return f.lastModified();
+    }
+}

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/SecuritySupport12.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/SecuritySupport12.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/SecuritySupport12.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2002,2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.cyberneko.html;
+
+import java.security.*;
+import java.io.*;
+
+/**
+ * This class is duplicated for each JAXP subpackage so keep it in sync.
+ * It is package private and therefore is not exposed as part of the JAXP
+ * API.
+ *
+ * Security related methods that only work on J2SE 1.2 and newer.
+ */
+class SecuritySupport12 extends SecuritySupport {
+
+    ClassLoader getContextClassLoader() {
+	return (ClassLoader)
+		AccessController.doPrivileged(new PrivilegedAction() {
+	    public Object run() {
+		ClassLoader cl = null;
+		try {
+		    cl = Thread.currentThread().getContextClassLoader();
+		} catch (SecurityException ex) { }
+		return cl;
+	    }
+	});
+    }
+
+    ClassLoader getSystemClassLoader() {
+        return (ClassLoader)
+            AccessController.doPrivileged(new PrivilegedAction() {
+                public Object run() {
+                    ClassLoader cl = null;
+                    try {
+                        cl = ClassLoader.getSystemClassLoader();
+                    } catch (SecurityException ex) {}
+                    return cl;
+                }
+            });
+    }
+
+    ClassLoader getParentClassLoader(final ClassLoader cl) {
+        return (ClassLoader)
+            AccessController.doPrivileged(new PrivilegedAction() {
+                public Object run() {
+                    ClassLoader parent = null;
+                    try {
+                        parent = cl.getParent();
+                    } catch (SecurityException ex) {}
+
+                    // eliminate loops in case of the boot
+                    // ClassLoader returning itself as a parent
+                    return (parent == cl) ? null : parent;
+                }
+            });
+    }
+
+    String getSystemProperty(final String propName) {
+	return (String)
+            AccessController.doPrivileged(new PrivilegedAction() {
+                public Object run() {
+                    return System.getProperty(propName);
+                }
+            });
+    }
+
+    FileInputStream getFileInputStream(final File file)
+        throws FileNotFoundException
+    {
+	try {
+            return (FileInputStream)
+                AccessController.doPrivileged(new PrivilegedExceptionAction() {
+                    public Object run() throws FileNotFoundException {
+                        return new FileInputStream(file);
+                    }
+                });
+	} catch (PrivilegedActionException e) {
+	    throw (FileNotFoundException)e.getException();
+	}
+    }
+
+    InputStream getResourceAsStream(final ClassLoader cl,
+                                           final String name)
+    {
+        return (InputStream)
+            AccessController.doPrivileged(new PrivilegedAction() {
+                public Object run() {
+                    InputStream ris;
+                    if (cl == null) {
+                        ris = ClassLoader.getSystemResourceAsStream(name);
+                    } else {
+                        ris = cl.getResourceAsStream(name);
+                    }
+                    return ris;
+                }
+            });
+    }
+
+    boolean getFileExists(final File f) {
+    return ((Boolean)
+            AccessController.doPrivileged(new PrivilegedAction() {
+                public Object run() {
+                    return new Boolean(f.exists());
+                }
+            })).booleanValue();
+    }
+
+    long getLastModified(final File f) {
+    return ((Long)
+            AccessController.doPrivileged(new PrivilegedAction() {
+                public Object run() {
+                    return new Long(f.lastModified());
+                }
+            })).longValue();
+    }
+
+}

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/DefaultFilter.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/DefaultFilter.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/DefaultFilter.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,437 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html.filters;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+import org.cyberneko.html.HTMLComponent;
+
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLDocumentHandler;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLResourceIdentifier;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLComponentManager;
+import org.apache.xerces.xni.parser.XMLConfigurationException;
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.apache.xerces.xni.parser.XMLDocumentSource;
+
+/**
+ * This class implements a filter that simply passes document
+ * events to the next handler. It can be used as a base class to
+ * simplify the development of new document filters.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: DefaultFilter.java,v 1.7 2005/02/14 03:56:54 andyc Exp $
+ */
+public class DefaultFilter 
+    implements XMLDocumentFilter, HTMLComponent {
+
+    //
+    // Data
+    //
+
+    /** Document handler. */
+    protected XMLDocumentHandler fDocumentHandler;
+
+    /** Document source. */
+    protected XMLDocumentSource fDocumentSource;
+
+    //
+    // XMLDocumentSource methods
+    //
+
+    /** Sets the document handler. */
+    public void setDocumentHandler(XMLDocumentHandler handler) {
+        fDocumentHandler = handler;
+    } // setDocumentHandler(XMLDocumentHandler)
+
+    // @since Xerces 2.1.0
+
+    /** Returns the document handler. */
+    public XMLDocumentHandler getDocumentHandler() {
+        return fDocumentHandler;
+    } // getDocumentHandler():XMLDocumentHandler
+
+    /** Sets the document source. */
+    public void setDocumentSource(XMLDocumentSource source) {
+        fDocumentSource = source;
+    } // setDocumentSource(XMLDocumentSource)
+
+    /** Returns the document source. */
+    public XMLDocumentSource getDocumentSource() {
+        return fDocumentSource;
+    } // getDocumentSource():XMLDocumentSource
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    // since Xerces-J 2.2.0
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, 
+                              NamespaceContext nscontext, Augmentations augs) 
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            try {
+                // NOTE: Hack to allow the default filter to work with
+                //       old and new versions of the XNI document handler
+                //       interface. -Ac
+                Class cls = fDocumentHandler.getClass();
+                Class[] types = {
+                    XMLLocator.class, String.class,
+                    NamespaceContext.class, Augmentations.class
+                };
+                Method method = cls.getMethod("startDocument", types);
+                Object[] params = {
+                    locator, encoding, 
+                    nscontext, augs
+                };
+                method.invoke(fDocumentHandler, params);
+            } 
+            catch (IllegalAccessException e) {
+                throw new XNIException(e);
+            } 
+            catch (InvocationTargetException e) {
+                throw new XNIException(e);                
+            } 
+            catch (NoSuchMethodException e) {
+                try {
+                    // NOTE: Hack to allow the default filter to work with
+                    //       old and new versions of the XNI document handler
+                    //       interface. -Ac
+                    Class cls = fDocumentHandler.getClass();
+                    Class[] types = {
+                        XMLLocator.class, String.class, Augmentations.class
+                    };
+                    Method method = cls.getMethod("startDocument", types);
+                    Object[] params = {
+                        locator, encoding, augs
+                    };
+                    method.invoke(fDocumentHandler, params);
+                } 
+                catch (NoSuchMethodException ex) {
+                    // NOTE: Should not happen!
+                    throw new XNIException(ex);                
+                } 
+                catch (IllegalAccessException ex) {
+                    // NOTE: Should not happen!
+                    throw new XNIException(ex);                
+                } 
+                catch (InvocationTargetException ex) {
+                    // NOTE: Should not happen!
+                    throw new XNIException(ex);                
+                }
+            }
+        }
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    // old methods
+
+    /** XML declaration. */
+    public void xmlDecl(String version, String encoding, String standalone, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.xmlDecl(version, encoding, standalone, augs);
+        }
+    } // xmlDecl(String,String,String,Augmentations)
+
+    /** Doctype declaration. */
+    public void doctypeDecl(String root, String publicId, String systemId, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.doctypeDecl(root, publicId, systemId, augs);
+        }
+    } // doctypeDecl(String,String,String,Augmentations)
+
+    /** Comment. */
+    public void comment(XMLString text, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.comment(text, augs);
+        }
+    } // comment(XMLString,Augmentations)
+
+    /** Processing instruction. */
+    public void processingInstruction(String target, XMLString data, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.processingInstruction(target, data, augs);
+        }
+    } // processingInstruction(String,XMLString,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attributes, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.startElement(element, attributes, augs);
+        }
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.emptyElement(element, attributes, augs);
+        }
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** Characters. */
+    public void characters(XMLString text, Augmentations augs) 
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.characters(text, augs);
+        }
+    } // characters(XMLString,Augmentations)
+
+    /** Ignorable whitespace. */
+    public void ignorableWhitespace(XMLString text, Augmentations augs) 
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.ignorableWhitespace(text, augs);
+        }
+    } // ignorableWhitespace(XMLString,Augmentations)
+
+    /** Start general entity. */
+    public void startGeneralEntity(String name, XMLResourceIdentifier id, String encoding, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.startGeneralEntity(name, id, encoding, augs);
+        }
+    } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
+
+    /** Text declaration. */
+    public void textDecl(String version, String encoding, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.textDecl(version, encoding, augs);
+        }
+    } // textDecl(String,String,Augmentations)
+
+    /** End general entity. */
+    public void endGeneralEntity(String name, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.endGeneralEntity(name, augs);
+        }
+    } // endGeneralEntity(String,Augmentations)
+
+    /** Start CDATA section. */
+    public void startCDATA(Augmentations augs) throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.startCDATA(augs);
+        }
+    } // startCDATA(Augmentations)
+
+    /** End CDATA section. */
+    public void endCDATA(Augmentations augs) throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.endCDATA(augs);
+        }
+    } // endCDATA(Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.endElement(element, augs);
+        }
+    } // endElement(QName,Augmentations)
+
+    /** End document. */
+    public void endDocument(Augmentations augs) throws XNIException {
+        if (fDocumentHandler != null) {
+            fDocumentHandler.endDocument(augs);
+        }
+    } // endDocument(Augmentations)
+
+    // removed since Xerces-J 2.3.0
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, Augmentations augs) 
+        throws XNIException {
+        startDocument(locator, encoding, null, augs);
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    /** Start prefix mapping. */
+    public void startPrefixMapping(String prefix, String uri, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            Class cls = fDocumentHandler.getClass();
+            Class[] types = { String.class, String.class, Augmentations.class };
+            try {
+                Method method = cls.getMethod("startPrefixMapping", types);
+                Object[] args = { prefix, uri, augs };
+                method.invoke(fDocumentHandler, args);
+            }
+            catch (NoSuchMethodException e) {
+                // ignore
+            }
+            catch (IllegalAccessException e) {
+                // ignore
+            }
+            catch (InvocationTargetException e) {
+                // ignore
+            }
+        }
+    } // startPrefixMapping(String,String,Augmentations)
+
+    /** End prefix mapping. */
+    public void endPrefixMapping(String prefix, Augmentations augs)
+        throws XNIException {
+        if (fDocumentHandler != null) {
+            Class cls = fDocumentHandler.getClass();
+            Class[] types = { String.class, Augmentations.class };
+            try {
+                Method method = cls.getMethod("endPrefixMapping", types);
+                Object[] args = { prefix, augs };
+                method.invoke(fDocumentHandler, args);
+            }
+            catch (NoSuchMethodException e) {
+                // ignore
+            }
+            catch (IllegalAccessException e) {
+                // ignore
+            }
+            catch (InvocationTargetException e) {
+                // ignore
+            }
+        }
+    } // endPrefixMapping(String,Augmentations)
+
+    //
+    // HTMLComponent methods
+    //
+
+    /**
+     * Returns a list of feature identifiers that are recognized by
+     * this component. This method may return null if no features
+     * are recognized by this component.
+     */
+    public String[] getRecognizedFeatures() {
+        return null;
+    } // getRecognizedFeatures():String[]
+
+    /**
+     * Returns the default state for a feature, or null if this
+     * component does not want to report a default value for this
+     * feature.
+     */
+    public Boolean getFeatureDefault(String featureId) {
+        return null;
+    } // getFeatureDefault(String):Boolean
+
+    /**
+     * Returns a list of property identifiers that are recognized by
+     * this component. This method may return null if no properties
+     * are recognized by this component.
+     */
+    public String[] getRecognizedProperties() {
+        return null;
+    } // getRecognizedProperties():String[]
+
+    /**
+     * Returns the default state for a property, or null if this
+     * component does not want to report a default value for this
+     * property.
+     */
+    public Object getPropertyDefault(String propertyId) {
+        return null;
+    } // getPropertyDefault(String):Object
+
+    /**
+     * Resets the component. The component can query the component manager
+     * about any features and properties that affect the operation of the
+     * component.
+     *
+     * @param componentManager The component manager.
+     *
+     * @throws XNIException Thrown by component on initialization error.
+     */
+    public void reset(XMLComponentManager componentManager) 
+        throws XMLConfigurationException {
+    } // reset(XMLComponentManager)
+
+    /**
+     * Sets the state of a feature. This method is called by the component
+     * manager any time after reset when a feature changes state.
+     * <p>
+     * <strong>Note:</strong> Components should silently ignore features
+     * that do not affect the operation of the component.
+     *
+     * @param featureId The feature identifier.
+     * @param state     The state of the feature.
+     *
+     * @throws XMLConfigurationException Thrown for configuration error.
+     *                                   In general, components should
+     *                                   only throw this exception if
+     *                                   it is <strong>really</strong>
+     *                                   a critical error.
+     */
+    public void setFeature(String featureId, boolean state) 
+        throws XMLConfigurationException {
+    } // setFeature(String,boolean)
+
+    /**
+     * Sets the value of a property. This method is called by the component
+     * manager any time after reset when a property changes value.
+     * <p>
+     * <strong>Note:</strong> Components should silently ignore properties
+     * that do not affect the operation of the component.
+     *
+     * @param propertyId The property identifier.
+     * @param value      The value of the property.
+     *
+     * @throws XMLConfigurationException Thrown for configuration error.
+     *                                   In general, components should
+     *                                   only throw this exception if
+     *                                   it is <strong>really</strong>
+     *                                   a critical error.
+     */
+    public void setProperty(String propertyId, Object value) 
+        throws XMLConfigurationException {
+    } // setProperty(String,Object)
+
+    //
+    // Protected static methods
+    //
+
+    /** 
+     * Utility method for merging string arrays for recognized features
+     * and recognized properties.
+     */
+    protected static String[] merge(String[] array1, String[] array2) {
+
+        // shortcut merge
+        if (array1 == array2) {
+            return array1;
+        }
+        if (array1 == null) {
+            return array2;
+        }
+        if (array2 == null) {
+            return array1;
+        }
+
+        // full merge
+        String[] array3 = new String[array1.length + array2.length];
+        System.arraycopy(array1, 0, array3, 0, array1.length);
+        System.arraycopy(array2, 0, array3, array1.length, array2.length);
+
+        return array3;
+
+    } // merge(String[],String[]):String[]
+
+} // class DefaultFilter

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/ElementRemover.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/ElementRemover.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/ElementRemover.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,340 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html.filters;
+
+import java.util.Hashtable;
+
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLResourceIdentifier;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+
+/**
+ * This class is a document filter capable of removing specified
+ * elements from the processing stream. There are two options for
+ * processing document elements:
+ * <ul>
+ * <li>specifying those elements which should be accepted and,
+ *     optionally, which attributes of that element should be
+ *     kept; and
+ * <li>specifying those elements whose tags and content should be
+ *     completely removed from the event stream.
+ * </ul>
+ * <p>
+ * The first option allows the application to specify which elements
+ * appearing in the event stream should be accepted and, therefore,
+ * passed on to the next stage in the pipeline. All elements 
+ * <em>not</em> in the list of acceptable elements have their start 
+ * and end tags stripped from the event stream <em>unless</em> those
+ * elements appear in the list of elements to be removed. 
+ * <p>
+ * The second option allows the application to specify which elements
+ * should be completely removed from the event stream. When an element
+ * appears that is to be removed, the element's start and end tag as
+ * well as all of that element's content is removed from the event
+ * stream.
+ * <p>
+ * A common use of this filter would be to only allow rich-text
+ * and linking elements as well as the character content to pass 
+ * through the filter &mdash; all other elements would be stripped.
+ * The following code shows how to configure this filter to perform
+ * this task:
+ * <pre>
+ *  ElementRemover remover = new ElementRemover();
+ *  remover.acceptElement("b", null);
+ *  remover.acceptElement("i", null);
+ *  remover.acceptElement("u", null);
+ *  remover.acceptElement("a", new String[] { "href" });
+ * </pre>
+ * <p>
+ * However, this would still allow the text content of other
+ * elements to pass through, which may not be desirable. In order
+ * to further "clean" the input, the <code>removeElement</code>
+ * option can be used. The following piece of code adds the ability
+ * to completely remove any &lt;SCRIPT&gt; tags and content 
+ * from the stream.
+ * <pre>
+ *  remover.removeElement("script");
+ * </pre>
+ * <p>
+ * <strong>Note:</strong> 
+ * All text and accepted element children of a stripped element is 
+ * retained. To completely remove an element's content, use the
+ * <code>removeElement</code> method.
+ * <p>
+ * <strong>Note:</strong>
+ * Care should be taken when using this filter because the output
+ * may not be a well-balanced tree. Specifically, if the application
+ * removes the &lt;HTML&gt; element (with or without retaining its
+ * children), the resulting document event stream will no longer be
+ * well-formed.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: ElementRemover.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
+ */
+public class ElementRemover
+    extends DefaultFilter {
+
+    //
+    // Constants
+    //
+
+    /** A "null" object. */
+    protected static final Object NULL = new Object();
+
+    //
+    // Data
+    //
+
+    // information
+
+    /** Accepted elements. */
+    protected Hashtable fAcceptedElements = new Hashtable();
+
+    /** Removed elements. */
+    protected Hashtable fRemovedElements = new Hashtable();
+
+    // state
+
+    /** The element depth. */
+    protected int fElementDepth;
+
+    /** The element depth at element removal. */
+    protected int fRemovalElementDepth;
+
+    //
+    // Public methods
+    //
+
+    /** 
+     * Specifies that the given element should be accepted and, optionally,
+     * which attributes of that element should be kept.
+     *
+     * @param element The element to accept.
+     * @param attributes The list of attributes to be kept or null if no
+     *                   attributes should be kept for this element.
+     *
+     * see #removeElement
+     */
+    public void acceptElement(String element, String[] attributes) {
+        Object key = element.toLowerCase();
+        Object value = NULL;
+        if (attributes != null) {
+            String[] newarray = new String[attributes.length];
+            for (int i = 0; i < attributes.length; i++) {
+                newarray[i] = attributes[i].toLowerCase();
+            }
+            value = attributes;
+        }
+        fAcceptedElements.put(key, value);
+    } // acceptElement(String,String[])
+
+    /** 
+     * Specifies that the given element should be completely removed. If an
+     * element is encountered during processing that is on the remove list, 
+     * the element's start and end tags as well as all of content contained
+     * within the element will be removed from the processing stream.
+     *
+     * @param element The element to completely remove.
+     */
+    public void removeElement(String element) {
+        Object key = element.toLowerCase();
+        Object value = NULL;
+        fRemovedElements.put(key, value);
+    } // removeElement(String)
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    // since Xerces-J 2.2.0
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, 
+                              NamespaceContext nscontext, Augmentations augs) 
+        throws XNIException {
+        fElementDepth = 0;
+        fRemovalElementDepth = Integer.MAX_VALUE;
+        super.startDocument(locator, encoding, nscontext, augs);
+    } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
+
+    // old methods
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, Augmentations augs)
+        throws XNIException {
+        startDocument(locator, encoding, null, augs);
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    /** Start prefix mapping. */
+    public void startPrefixMapping(String prefix, String uri, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.startPrefixMapping(prefix, uri, augs);
+        }
+    } // startPrefixMapping(String,String,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attributes, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth && handleOpenTag(element, attributes)) {
+            super.startElement(element, attributes, augs);
+        }
+        fElementDepth++;
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth && handleOpenTag(element, attributes)) {
+            super.emptyElement(element, attributes, augs);
+        }
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** Comment. */
+    public void comment(XMLString text, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.comment(text, augs);
+        }
+    } // comment(XMLString,Augmentations)
+
+    /** Processing instruction. */
+    public void processingInstruction(String target, XMLString data, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.processingInstruction(target, data, augs);
+        }
+    } // processingInstruction(String,XMLString,Augmentations)
+
+    /** Characters. */
+    public void characters(XMLString text, Augmentations augs) 
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.characters(text, augs);
+        }
+    } // characters(XMLString,Augmentations)
+
+    /** Ignorable whitespace. */
+    public void ignorableWhitespace(XMLString text, Augmentations augs) 
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.ignorableWhitespace(text, augs);
+        }
+    } // ignorableWhitespace(XMLString,Augmentations)
+
+    /** Start general entity. */
+    public void startGeneralEntity(String name, XMLResourceIdentifier id, String encoding, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.startGeneralEntity(name, id, encoding, augs);
+        }
+    } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
+
+    /** Text declaration. */
+    public void textDecl(String version, String encoding, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.textDecl(version, encoding, augs);
+        }
+    } // textDecl(String,String,Augmentations)
+
+    /** End general entity. */
+    public void endGeneralEntity(String name, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.endGeneralEntity(name, augs);
+        }
+    } // endGeneralEntity(String,Augmentations)
+
+    /** Start CDATA section. */
+    public void startCDATA(Augmentations augs) throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.startCDATA(augs);
+        }
+    } // startCDATA(Augmentations)
+
+    /** End CDATA section. */
+    public void endCDATA(Augmentations augs) throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.endCDATA(augs);
+        }
+    } // endCDATA(Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth && elementAccepted(element.rawname)) {
+            super.endElement(element, augs);
+        }
+        fElementDepth--;
+        if (fElementDepth == fRemovalElementDepth) {
+            fRemovalElementDepth = Integer.MAX_VALUE;
+        }
+    } // endElement(QName,Augmentations)
+
+    /** End prefix mapping. */
+    public void endPrefixMapping(String prefix, Augmentations augs)
+        throws XNIException {
+        if (fElementDepth <= fRemovalElementDepth) {
+            super.endPrefixMapping(prefix, augs);
+        }
+    } // endPrefixMapping(String,Augmentations)
+
+    //
+    // Protected methods
+    //
+
+    /** Returns true if the specified element is accepted. */
+    protected boolean elementAccepted(String element) {
+        Object key = element.toLowerCase();
+        return fAcceptedElements.containsKey(key);
+    } // elementAccepted(String):boolean
+
+    /** Returns true if the specified element should be removed. */
+    protected boolean elementRemoved(String element) {
+        Object key = element.toLowerCase();
+        return fRemovedElements.containsKey(key);
+    } // elementRemoved(String):boolean
+
+    /** Handles an open tag. */
+    protected boolean handleOpenTag(QName element, XMLAttributes attributes) {
+        if (elementAccepted(element.rawname)) {
+            Object key = element.rawname.toLowerCase();
+            Object value = fAcceptedElements.get(key);
+            if (value != NULL) {
+                String[] anames = (String[])value;
+                int attributeCount = attributes.getLength();
+                LOOP: for (int i = 0; i < attributeCount; i++) {
+                    String aname = attributes.getQName(i).toLowerCase();
+                    for (int j = 0; j < anames.length; j++) {
+                        if (anames[j].equals(aname)) {
+                            continue LOOP;
+                        }
+                    }
+                    attributes.removeAttributeAt(i--);
+                    attributeCount--;
+                }
+            }
+            else {
+                attributes.removeAllAttributes();
+            }
+            return true;
+        }
+        else if (elementRemoved(element.rawname)) {
+            fRemovalElementDepth = fElementDepth;
+        }
+        return false;
+    } // handleOpenTag(QName,XMLAttributes):boolean
+
+} // class DefaultFilter

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Identity.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Identity.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Identity.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,99 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html.filters;
+
+import org.cyberneko.html.HTMLConfiguration;
+import org.cyberneko.html.HTMLEventInfo;
+
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.apache.xerces.xni.parser.XMLParserConfiguration;
+
+/**
+ * This filter performs the identity operation of the original 
+ * document event stream generated by the HTML scanner by removing 
+ * events that are synthesized by the tag balancer. This operation
+ * is essentially the same as turning off tag-balancing in the
+ * parser. However, this filter is useful when you want the tag
+ * balancer to report "errors" but do not want the synthesized
+ * events in the output.
+ * <p>
+ * <strong>Note:</strong>
+ * This filter requires the augmentations feature to be turned on.
+ * For example:
+ * <pre>
+ *  XMLParserConfiguration parser = new HTMLConfiguration();
+ *  parser.setFeature("http://cyberneko.org/html/features/augmentations", true);
+ * </pre>
+ * <p>
+ * <strong>Note:</strong>
+ * This isn't <em>exactly</em> the identify transform because the
+ * element and attributes names may have been modified from the
+ * original document. For example, by default, NekoHTML converts
+ * element names to upper-case and attribute names to lower-case.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: Identity.java,v 1.4 2005/02/14 03:56:54 andyc Exp $
+ */
+public class Identity
+    extends DefaultFilter {
+
+    //
+    // Constants
+    //
+
+    /** Augmentations feature identifier. */
+    protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
+
+    /** Filters property identifier. */
+    protected static final String FILTERS = "http://cyberneko.org/html/properties/filters";
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attributes,
+                             Augmentations augs) throws XNIException {
+        if (augs == null || !synthesized(augs)) {
+            super.startElement(element, attributes, augs);
+        }
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attributes,
+                             Augmentations augs) throws XNIException {
+        if (augs == null || !synthesized(augs)) {
+            super.emptyElement(element, attributes, augs);
+        }
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs) 
+        throws XNIException {
+        if (augs == null || !synthesized(augs)) {
+            super.endElement(element, augs);
+        }
+    } // endElement(QName,XMLAttributes,Augmentations)
+
+    //
+    // Protected static methods
+    //
+
+    /** Returns true if the information provided is synthesized. */
+    protected static boolean synthesized(Augmentations augs) {
+        HTMLEventInfo info = (HTMLEventInfo)augs.getItem(AUGMENTATIONS);
+        return info != null ? info.isSynthesized() : false;
+    } // synthesized(Augmentations):boolean
+
+} // class Identity

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/NamespaceBinder.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/NamespaceBinder.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/NamespaceBinder.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,693 @@
+/* 
+ * (C) Copyright 2004-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html.filters;
+
+import org.cyberneko.html.HTMLElements;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.Enumeration;
+import java.util.Vector;
+
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLComponentManager;
+import org.apache.xerces.xni.parser.XMLConfigurationException;
+
+/**
+ * This filter binds namespaces if namespace processing is turned on
+ * by setting the feature "http://xml.org/sax/features/namespaces" is
+ * set to <code>true</code>.
+ * <p>
+ * This configuration recognizes the following features:
+ * <ul>
+ * <li>http://xml.org/sax/features/namespaces
+ * </ul>
+ * 
+ * @author Andy Clark
+ * 
+ * @version $Id: NamespaceBinder.java,v 1.8 2005/05/30 00:19:28 andyc Exp $
+ */
+public class NamespaceBinder
+    extends DefaultFilter {
+
+    //
+    // Constants
+    //
+
+    // namespace uris
+
+    /** XHTML 1.0 namespace URI (http://www.w3.org/1999/xhtml). */
+    public static final String XHTML_1_0_URI = "http://www.w3.org/1999/xhtml";
+
+    /** XML namespace URI (http://www.w3.org/XML/1998/namespace). */
+    public static final String XML_URI = "http://www.w3.org/XML/1998/namespace";
+
+    /** XMLNS namespace URI (http://www.w3.org/2000/xmlns/). */
+    public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
+
+    // features
+
+    /** Namespaces. */
+    protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces";
+
+    /** Override namespace binding URI. */
+    protected static final String OVERRIDE_NAMESPACES = "http://cyberneko.org/html/features/override-namespaces";
+
+    /** Insert namespace binding URIs. */
+    protected static final String INSERT_NAMESPACES = "http://cyberneko.org/html/features/insert-namespaces";
+
+    /** Recognized features. */
+    private static final String[] RECOGNIZED_FEATURES = { 
+        NAMESPACES, 
+        OVERRIDE_NAMESPACES,
+        INSERT_NAMESPACES,
+    };
+
+    /** Feature defaults. */
+    private static final Boolean[] FEATURE_DEFAULTS = {
+        null,
+        Boolean.FALSE,
+        Boolean.FALSE,
+    };
+
+    // properties
+
+    /** Modify HTML element names: { "upper", "lower", "default" }. */
+    protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
+
+    /** Modify HTML attribute names: { "upper", "lower", "default" }. */
+    protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
+
+    /** Namespaces URI. */
+    protected static final String NAMESPACES_URI = "http://cyberneko.org/html/properties/namespaces-uri";
+
+    /** Recognized properties. */
+    private static final String[] RECOGNIZED_PROPERTIES = new String[] {
+        NAMES_ELEMS,
+        NAMES_ATTRS,
+        NAMESPACES_URI,
+    };
+
+    /** Property defaults. */
+    private static final Object[] PROPERTY_DEFAULTS = {
+        null,
+        null,
+        XHTML_1_0_URI,
+    };
+
+    // modify HTML names
+
+    /** Don't modify HTML names. */
+    protected static final short NAMES_NO_CHANGE = 0;
+
+    /** Uppercase HTML names. */
+    protected static final short NAMES_UPPERCASE = 1;
+
+    /** Lowercase HTML names. */
+    protected static final short NAMES_LOWERCASE = 2;
+
+    //
+    // Data
+    //
+
+    // features
+
+    /** Namespaces. */
+    protected boolean fNamespaces;
+
+    /** Namespace prefixes. */
+    protected boolean fNamespacePrefixes;
+
+    /** Override namespaces. */
+    protected boolean fOverrideNamespaces;
+
+    /** Insert namespaces. */
+    protected boolean fInsertNamespaces;
+
+    // properties
+
+    /** Modify HTML element names. */
+    protected short fNamesElems;
+
+    /** Modify HTML attribute names. */
+    protected short fNamesAttrs;
+
+    /** Namespaces URI. */
+    protected String fNamespacesURI;
+
+    // state
+
+    /** Namespace context. */
+    protected final NamespaceSupport fNamespaceContext = new NamespaceSupport();
+
+    // temp vars
+
+    /** QName. */
+    private final QName fQName = new QName();
+
+    //
+    // HTMLComponent methods
+    //
+
+    /**
+     * Returns a list of feature identifiers that are recognized by
+     * this component. This method may return null if no features
+     * are recognized by this component.
+     */
+    public String[] getRecognizedFeatures() {
+        return merge(super.getRecognizedFeatures(), RECOGNIZED_FEATURES);
+    } // getRecognizedFeatures():String[]
+
+    /**
+     * Returns the default state for a feature, or null if this
+     * component does not want to report a default value for this
+     * feature.
+     */
+    public Boolean getFeatureDefault(String featureId) {
+        for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
+            if (RECOGNIZED_FEATURES[i].equals(featureId)) {
+                return FEATURE_DEFAULTS[i];
+            }
+        }
+        return super.getFeatureDefault(featureId);
+    } // getFeatureDefault(String):Boolean
+
+    /**
+     * Returns a list of property identifiers that are recognized by
+     * this component. This method may return null if no properties
+     * are recognized by this component.
+     */
+    public String[] getRecognizedProperties() {
+        return merge(super.getRecognizedProperties(), RECOGNIZED_PROPERTIES);
+    } // getRecognizedProperties():String[]
+
+    /**
+     * Returns the default value for a property, or null if this
+     * component does not want to report a default value for this
+     * property.
+     */
+    public Object getPropertyDefault(String propertyId) {
+        for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
+            if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
+                return PROPERTY_DEFAULTS[i];
+            }
+        }
+        return super.getPropertyDefault(propertyId);
+    } // getPropertyDefault(String):Object
+
+    /**
+     * Resets the component. The component can query the component manager
+     * about any features and properties that affect the operation of the
+     * component.
+     *
+     * @param manager The component manager.
+     *
+     * @throws XNIException Thrown by component on initialization error.
+     */
+    public void reset(XMLComponentManager manager) 
+        throws XMLConfigurationException {
+        super.reset(manager);
+
+        // features
+        fNamespaces = manager.getFeature(NAMESPACES);
+        fOverrideNamespaces = manager.getFeature(OVERRIDE_NAMESPACES);
+        fInsertNamespaces = manager.getFeature(INSERT_NAMESPACES);
+
+        // get properties
+        fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS)));
+        fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS)));
+        fNamespacesURI = String.valueOf(manager.getProperty(NAMESPACES_URI));
+    
+        // initialize state
+        fNamespaceContext.reset();
+
+    } // reset(XMLComponentManager)
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding,
+                              NamespaceContext nscontext, Augmentations augs)
+        throws XNIException {
+        
+        // perform default handling
+        // NOTE: using own namespace context
+        super.startDocument(locator,encoding,fNamespaceContext,augs);
+
+    } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attrs,
+                             Augmentations augs) throws XNIException {
+        
+        // bind namespaces, if needed
+        if (fNamespaces) {
+            fNamespaceContext.pushContext();
+            bindNamespaces(element, attrs);
+
+            int dcount = fNamespaceContext.getDeclaredPrefixCount();
+            if (fDocumentHandler != null && dcount > 0) {
+                try {
+                    Class cls = fDocumentHandler.getClass();
+                    Class[] types = { String.class, String.class };
+                    Method method = cls.getMethod("startPrefixMapping", types);
+                    for (int i = 0; i < dcount; i++) {
+                        String prefix = fNamespaceContext.getDeclaredPrefixAt(i);
+                        String uri = fNamespaceContext.getURI(prefix);
+                        Object[] args = { prefix, uri };
+                        method.invoke(fDocumentHandler, args);
+                    }
+                }
+                catch (NoSuchMethodException e) {
+                    // ignore
+                }
+                catch (InvocationTargetException e) {
+                    // ignore
+                }
+                catch (IllegalAccessException e) {
+                    // ignore
+                }
+            }
+        }
+
+        // perform default handling
+        super.startElement(element, attrs, augs);
+
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attrs,
+                             Augmentations augs) throws XNIException {
+        
+        // bind namespaces, if needed
+        if (fNamespaces) {
+            fNamespaceContext.pushContext();
+            bindNamespaces(element, attrs);
+
+            int dcount = fNamespaceContext.getDeclaredPrefixCount();
+            if (fDocumentHandler != null && dcount > 0) {
+                try {
+                    Class cls = fDocumentHandler.getClass();
+                    Class[] types = { String.class, String.class };
+                    Method method = cls.getMethod("startPrefixMapping", types);
+                    for (int i = 0; i < dcount; i++) {
+                        String prefix = fNamespaceContext.getDeclaredPrefixAt(i);
+                        String uri = fNamespaceContext.getURI(prefix);
+                        Object[] args = { prefix, uri };
+                        method.invoke(fDocumentHandler, args);
+                    }
+                }
+                catch (NoSuchMethodException e) {
+                    // ignore
+                }
+                catch (InvocationTargetException e) {
+                    // ignore
+                }
+                catch (IllegalAccessException e) {
+                    // ignore
+                }
+            }
+        }
+
+        // perform default handling
+        super.emptyElement(element, attrs, augs);
+
+        // pop context
+        if (fNamespaces) {
+            int dcount = fNamespaceContext.getDeclaredPrefixCount();
+            if (fDocumentHandler != null && dcount > 0) {
+                try {
+                    Class cls = fDocumentHandler.getClass();
+                    Class[] types = { String.class };
+                    Method method = cls.getMethod("endPrefixMapping", types);
+                    for (int i = dcount-1; i >= 0; i--) {
+                        String prefix = fNamespaceContext.getDeclaredPrefixAt(i);
+                        Object[] args = { prefix };
+                        method.invoke(fDocumentHandler, args);
+                    }
+                }
+                catch (NoSuchMethodException e) {
+                    // ignore
+                }
+                catch (InvocationTargetException e) {
+                    // ignore
+                }
+                catch (IllegalAccessException e) {
+                    // ignore
+                }
+            }
+            
+            fNamespaceContext.popContext();
+        }
+
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs)
+        throws XNIException {
+        
+        // bind namespaces, if needed
+        if (fNamespaces) {
+            bindNamespaces(element, null);
+        }
+
+        // perform default handling
+        super.endElement(element, augs);
+
+        // pop context
+        if (fNamespaces) {
+            int dcount = fNamespaceContext.getDeclaredPrefixCount();
+            if (fDocumentHandler != null && dcount > 0) {
+                try {
+                    Class cls = fDocumentHandler.getClass();
+                    Class[] types = { String.class };
+                    Method method = cls.getMethod("endPrefixMapping", types);
+                    for (int i = dcount-1; i >= 0; i--) {
+                        String prefix = fNamespaceContext.getDeclaredPrefixAt(i);
+                        Object[] args = { prefix };
+                        method.invoke(fDocumentHandler, args);
+                    }
+                }
+                catch (NoSuchMethodException e) {
+                    // ignore
+                }
+                catch (InvocationTargetException e) {
+                    // ignore
+                }
+                catch (IllegalAccessException e) {
+                    // ignore
+                }
+            }
+            
+            fNamespaceContext.popContext();
+        }
+
+    } // endElement(QName,Augmentations)
+
+    //
+    // Protected static methods
+    //
+
+    /** Splits a qualified name. */
+    protected static void splitQName(QName qname) {
+        int index = qname.rawname.indexOf(':');
+        if (index != -1) {
+            qname.prefix = qname.rawname.substring(0,index);
+            qname.localpart  = qname.rawname.substring(index+1);
+        }
+    } // splitQName(QName)
+
+    /**
+     * Converts HTML names string value to constant value. 
+     *
+     * @see #NAMES_NO_CHANGE
+     * @see #NAMES_LOWERCASE
+     * @see #NAMES_UPPERCASE
+     */
+    protected static final short getNamesValue(String value) {
+        if (value.equals("lower")) { return NAMES_LOWERCASE; }
+        if (value.equals("upper")) { return NAMES_UPPERCASE; }
+        return NAMES_NO_CHANGE;
+    } // getNamesValue(String):short
+
+    /** Modifies the given name based on the specified mode. */
+    protected static final String modifyName(String name, short mode) {
+        switch (mode) {
+            case NAMES_UPPERCASE: return name.toUpperCase();
+            case NAMES_LOWERCASE: return name.toLowerCase();
+        }
+        return name;
+    } // modifyName(String,short):String
+
+    //
+    // Protected methods
+    //
+
+    /** Binds namespaces. */
+    protected void bindNamespaces(QName element, XMLAttributes attrs) {
+
+        // split element qname
+        splitQName(element);
+
+        // declare namespace prefixes
+        int attrCount = attrs != null ? attrs.getLength() : 0;
+        for (int i = attrCount - 1; i >= 0; i--) {
+            attrs.getName(i, fQName);
+            String aname = fQName.rawname;
+            String ANAME = aname.toUpperCase();
+            if (ANAME.startsWith("XMLNS:") || ANAME.equals("XMLNS")) {
+                int anamelen = aname.length();
+
+                // get parts
+                String aprefix = anamelen > 5 ? aname.substring(0,5) : null;
+                String alocal = anamelen > 5 ? aname.substring(6) : aname;
+                String avalue = attrs.getValue(i);
+                
+                // re-case parts and set them back into attributes
+                if (anamelen > 5) {
+                    aprefix = modifyName(aprefix, NAMES_LOWERCASE);
+                    alocal = modifyName(alocal, fNamesElems);
+                    aname = aprefix + ':' + alocal;
+                }
+                else {
+                    alocal = modifyName(alocal, NAMES_LOWERCASE);
+                    aname = alocal;
+                }
+                fQName.setValues(aprefix, alocal, aname, null);
+                attrs.setName(i, fQName);
+
+                // declare prefix
+                String prefix = alocal != aname ? alocal : "";
+                String uri = avalue.length() > 0 ? avalue : null;
+                if (fOverrideNamespaces && 
+                    prefix.equals(element.prefix) &&
+                    HTMLElements.getElement(element.localpart, null) != null) {
+                    uri = fNamespacesURI;
+                }
+                fNamespaceContext.declarePrefix(prefix, uri);
+            }
+        }
+
+        // bind element
+        String prefix = element.prefix != null ? element.prefix : "";
+        element.uri = fNamespaceContext.getURI(prefix);
+        // REVISIT: The prefix of a qualified element name that is
+        //          bound to a namespace is passed (as recent as
+        //          Xerces 2.4.0) as "" for start elements and null
+        //          for end elements. Why? One of them is a bug,
+        //          clearly. -Ac
+        if (element.uri != null && element.prefix == null) {
+            element.prefix = "";
+        }
+
+        // do we need to insert namespace bindings?
+        if (fInsertNamespaces && 
+            HTMLElements.getElement(element.localpart,null) != null) {
+            if (element.prefix == null || 
+                fNamespaceContext.getURI(element.prefix) == null) {
+                String xmlns = "xmlns" + ((element.prefix != null)
+                             ? ":"+element.prefix : "");
+                fQName.setValues(null, xmlns, xmlns, null);
+                attrs.addAttribute(fQName, "CDATA", fNamespacesURI);
+                bindNamespaces(element, attrs);
+                return;
+            }
+        }
+
+        // bind attributes
+        attrCount = attrs != null ? attrs.getLength() : 0;
+        for (int i = 0; i < attrCount; i++) {
+            attrs.getName(i, fQName);
+            splitQName(fQName);
+            prefix = !fQName.rawname.equals("xmlns")
+                   ? (fQName.prefix != null ? fQName.prefix : "") : "xmlns";
+            // PATCH: Joseph Walton
+            if (!prefix.equals("")) {
+                fQName.uri = prefix.equals("xml") ? XML_URI : fNamespaceContext.getURI(prefix);
+            }
+            // NOTE: You would think the xmlns namespace would be handled
+            //       by NamespaceSupport but it's not. -Ac 
+            if (prefix.equals("xmlns") && fQName.uri == null) {
+                fQName.uri = XMLNS_URI;
+            }
+            attrs.setName(i, fQName);
+        }
+
+    } // bindNamespaces(QName,XMLAttributes)
+
+    //
+    // Classes
+    //
+
+    /**
+     * This namespace context object implements the old and new XNI 
+     * <code>NamespaceContext</code> interface methods so that it can
+     * be used across all versions of Xerces2.
+     */
+    public static class NamespaceSupport
+        implements NamespaceContext {
+
+        //
+        // Data
+        //
+
+        /** Top of the levels list. */
+        protected int fTop = 0;
+
+        /** The levels of the entries. */
+        protected int[] fLevels = new int[10];
+
+        /** The entries. */
+        protected Entry[] fEntries = new Entry[10];
+
+        //
+        // Constructors
+        //
+
+        /** Default constructor. */
+        public NamespaceSupport() {
+            pushContext();
+            declarePrefix("xml", NamespaceContext.XML_URI);
+            declarePrefix("xmlns", NamespaceContext.XMLNS_URI);
+        } // <init>()
+
+        //
+        // NamespaceContext methods
+        //
+
+        // since Xerces 2.0.0-beta2 (old XNI namespaces)
+
+        /** Get URI. */
+        public String getURI(String prefix) {
+            for (int i = fLevels[fTop]-1; i >= 0; i--) {
+                Entry entry = (Entry)fEntries[i];
+                if (entry.prefix.equals(prefix)) {
+                    return entry.uri;
+                }
+            }
+            return null;
+        } // getURI(String):String
+
+        /** Get declared prefix count. */
+        public int getDeclaredPrefixCount() {
+            return fLevels[fTop] - fLevels[fTop-1];
+        } // getDeclaredPrefixCount():int
+
+        /** Get declared prefix at. */
+        public String getDeclaredPrefixAt(int index) {
+            return fEntries[fLevels[fTop-1] + index].prefix;
+        } // getDeclaredPrefixAt(int):String
+
+        /** Get parent context. */
+        public NamespaceContext getParentContext() {
+            return this;
+        } // getParentContext():NamespaceContext
+
+        // since Xerces #.#.# (new XNI namespaces)
+
+        /** Reset. */
+        public void reset() {
+            fLevels[fTop = 1] = fLevels[fTop-1];
+        } // reset()
+
+        /** Push context. */
+        public void pushContext() {
+            if (++fTop == fLevels.length) {
+                int[] iarray = new int[fLevels.length + 10];
+                System.arraycopy(fLevels, 0, iarray, 0, fLevels.length);
+                fLevels = iarray;
+            }
+            fLevels[fTop] = fLevels[fTop-1];
+        } // pushContext()
+
+        /** Pop context. */
+        public void popContext() {
+            fTop--;
+        } // popContext()
+
+        /** Declare prefix. */
+        public boolean declarePrefix(String prefix, String uri) {
+            int count = getDeclaredPrefixCount();
+            for (int i = 0; i < count; i++) {
+                String dprefix = getDeclaredPrefixAt(i);
+                if (dprefix.equals(prefix)) {
+                    return false;
+                }
+            }
+            Entry entry = new Entry(prefix, uri);
+            if (fLevels[fTop] == fEntries.length) {
+                Entry[] earray = new Entry[fEntries.length + 10];
+                System.arraycopy(fEntries, 0, earray, 0, fEntries.length);
+                fEntries = earray;
+            }
+            fEntries[fLevels[fTop]++] = entry;
+            return true;
+        } // declarePrefix(String,String):boolean
+
+        /** Get prefix. */
+        public String getPrefix(String uri) {
+            for (int i = fLevels[fTop]-1; i >= 0; i--) {
+                Entry entry = (Entry)fEntries[i];
+                if (entry.uri.equals(uri)) {
+                    return entry.prefix;
+                }
+            }
+            return null;
+        } // getPrefix(String):String
+
+        /** Get all prefixes. */
+        public Enumeration getAllPrefixes() {
+            Vector prefixes = new Vector();
+            for (int i = fLevels[1]; i < fLevels[fTop]; i++) {
+                String prefix = fEntries[i].prefix;
+                if (!prefixes.contains(prefix)) {
+                    prefixes.addElement(prefix);
+                }
+            }
+            return prefixes.elements();
+        } // getAllPrefixes():Enumeration
+
+        //
+        // Classes
+        //
+
+        /** A namespace binding entry. */
+        static class Entry {
+
+            //
+            // Data
+            //
+
+            /** Prefix. */
+            public String prefix;
+
+            /** URI. */
+            public String uri;
+
+            //
+            // Constructors
+            //
+
+            /** Constructs an entry. */
+            public Entry(String prefix, String uri) {
+                this.prefix = prefix;
+                this.uri = uri;
+            } // <init>(String,String)
+
+        } // class Entry
+
+    } // class NamespaceSupport
+
+} // class NamespaceBinder

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Purifier.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Purifier.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Purifier.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,470 @@
+/* 
+ * (C) Copyright 2004-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html.filters;
+
+import org.cyberneko.html.HTMLAugmentations;
+import org.cyberneko.html.HTMLEventInfo;
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+import org.apache.xerces.util.XMLChar;
+import org.apache.xerces.util.XMLStringBuffer;
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLComponentManager;
+import org.apache.xerces.xni.parser.XMLConfigurationException;
+
+/**
+ * This filter purifies the HTML input to ensure XML well-formedness.
+ * The purification process includes:
+ * <ul>
+ * <li>fixing illegal characters in the document, including
+ *  <ul>
+ *  <li>element and attribute names,
+ *  <li>processing instruction target and data,
+ *  <li>document text;
+ *  </ul>
+ * <li>ensuring the string "--" does not appear in the content of
+ *     a comment;
+ * <li>ensuring the string "]]>" does not appear in the content of
+ *     a CDATA section; 
+ * <li>ensuring that the XML declaration has required pseudo-attributes
+ *     and that the values are correct;
+ * and
+ * <li>synthesized missing namespace bindings.
+ * </ul>
+ * <p>
+ * Illegal characters in XML names are converted to the character 
+ * sequence "_u####_" where "####" is the value of the Unicode 
+ * character represented in hexadecimal. Whereas illegal characters
+ * appearing in document content is converted to the character
+ * sequence "\\u####".
+ * <p>
+ * In comments, the character '-' is replaced by the character
+ * sequence "- " to prevent "--" from ever appearing in the comment
+ * content. For CDATA sections, the character ']' is replaced by
+ * the character sequence "] " to prevent "]]" from appearing.
+ * <p>
+ * The URI used for synthesized namespace bindings is
+ * "http://cyberneko.org/html/ns/synthesized/<i>number</i>" where
+ * <i>number</i> is generated to ensure uniqueness.
+ * 
+ * @author Andy Clark
+ * 
+ * @version $Id: Purifier.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
+ */
+public class Purifier
+    extends DefaultFilter {
+
+    //
+    // Constants
+    //
+
+    /** Synthesized namespace binding prefix. */
+    public static final String SYNTHESIZED_NAMESPACE_PREFX =
+        "http://cyberneko.org/html/ns/synthesized/";
+
+    /** Namespaces. */
+    protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces";
+
+    /** Include infoset augmentations. */
+    protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
+
+    /** Recognized features. */
+    private static final String[] RECOGNIZED_FEATURES = {
+        NAMESPACES,
+        AUGMENTATIONS,
+    };
+
+    /** Recognized features defaults. */
+    private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = {
+        null,
+        null,
+    };
+
+    // static vars
+
+    /** Synthesized event info item. */
+    protected static final HTMLEventInfo SYNTHESIZED_ITEM = 
+        new HTMLEventInfo.SynthesizedItem();
+
+    //
+    // Data
+    //
+
+    // features
+
+    /** Namespaces. */
+    protected boolean fNamespaces;
+
+    /** Augmentations. */
+    protected boolean fAugmentations;
+
+    // state
+
+    /** True if the doctype declaration was seen. */
+    protected boolean fSeenDoctype;
+
+    /** True if root element was seen. */
+    protected boolean fSeenRootElement;
+
+    /** True if inside a CDATA section. */
+    protected boolean fInCDATASection;
+
+    // doctype declaration info
+
+    /** Public identifier of doctype declaration. */
+    protected String fPublicId;
+
+    /** System identifier of doctype declaration. */
+    protected String fSystemId;
+
+    // namespace info
+
+    /** Namespace information. */
+    protected NamespaceContext fNamespaceContext;
+
+    /** Synthesized namespace binding count. */
+    protected int fSynthesizedNamespaceCount;
+
+    // temp vars
+
+    /** Qualified name. */
+    private QName fQName = new QName();
+
+    /** Augmentations. */
+    private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations();
+
+    /** String buffer. */
+    private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
+
+    //
+    // XMLComponent methods
+    //
+
+    public void reset(XMLComponentManager manager) 
+        throws XMLConfigurationException {
+
+        // state
+        fInCDATASection = false;
+
+        // features
+        fNamespaces = manager.getFeature(NAMESPACES);
+        fAugmentations = manager.getFeature(AUGMENTATIONS);
+
+    } // reset(XMLComponentManager)
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding,
+                              Augmentations augs) throws XNIException {
+        fNamespaceContext = fNamespaces 
+                          ? new NamespaceBinder.NamespaceSupport() : null;
+        fSynthesizedNamespaceCount = 0;
+        handleStartDocument();
+        super.startDocument(locator, encoding, augs);
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding,
+                              NamespaceContext nscontext, Augmentations augs)
+        throws XNIException {
+        fNamespaceContext = nscontext;
+        fSynthesizedNamespaceCount = 0;
+        handleStartDocument();
+        super.startDocument(locator, encoding, nscontext, augs);
+    } // startDocument(XMLLocator,NamespaceContext,String,Augmentations)
+
+    /** XML declaration. */
+    public void xmlDecl(String version, String encoding, String standalone,
+                        Augmentations augs) throws XNIException {
+        if (version == null || !version.equals("1.0")) {
+            version = "1.0";
+        }
+        if (encoding != null && encoding.length() == 0) {
+            encoding = null;
+        }
+        if (standalone != null) {
+            if (!standalone.equalsIgnoreCase("true") && 
+                !standalone.equalsIgnoreCase("false")) {
+                standalone = null;
+            }
+            else {
+                standalone = standalone.toLowerCase();
+            }
+        }
+        super.xmlDecl(version,encoding,standalone,augs);
+    } // xmlDecl(String,String,String,Augmentations)
+
+    /** Comment. */
+    public void comment(XMLString text, Augmentations augs)
+        throws XNIException {
+        StringBuffer str = new StringBuffer(purifyText(text).toString());
+        int length = str.length();
+        for (int i = length-1; i >= 0; i--) {
+            char c = str.charAt(i);
+            if (c == '-') {
+                str.insert(i + 1, ' ');
+            }
+        }
+        fStringBuffer.length = 0;
+        fStringBuffer.append(str.toString());
+        text = fStringBuffer;
+        super.comment(text, augs);
+    } // comment(XMLString,Augmentations)
+
+    /** Processing instruction. */
+    public void processingInstruction(String target, XMLString data,
+                                      Augmentations augs)
+        throws XNIException {
+        target = purifyName(target, true);
+        data = purifyText(data);
+        super.processingInstruction(target, data, augs);
+    } // processingInstruction(String,XMLString,Augmentations)
+
+    /** Doctype declaration. */
+    public void doctypeDecl(String root, String pubid, String sysid,
+                            Augmentations augs) throws XNIException {
+        fSeenDoctype = true;
+        // NOTE: It doesn't matter what the root element name is because
+        //       it must match the root element. -Ac
+        fPublicId = pubid;
+        fSystemId = sysid;
+        // NOTE: If the public identifier is specified, then a system
+        //       identifier must also be specified. -Ac
+        if (fPublicId != null && fSystemId == null) {
+            fSystemId = "";
+        }
+        // NOTE: Can't save the augmentations because the object state
+        //       is transient. -Ac
+    } // doctypeDecl(String,String,String,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attrs,
+                             Augmentations augs) throws XNIException {
+        handleStartElement(element, attrs);
+        super.startElement(element, attrs, augs);
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attrs,
+                             Augmentations augs) throws XNIException {
+        handleStartElement(element, attrs);
+        super.emptyElement(element, attrs, augs);
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** Start CDATA section. */
+    public void startCDATA(Augmentations augs) throws XNIException {
+        fInCDATASection = true;
+        super.startCDATA(augs);
+    } // startCDATA(Augmentations)
+
+    /** End CDATA section. */
+    public void endCDATA(Augmentations augs) throws XNIException {
+        fInCDATASection = false;
+        super.endCDATA(augs);
+    } // endCDATA(Augmentations)
+
+    /** Characters. */
+    public void characters(XMLString text, Augmentations augs)
+        throws XNIException {
+        text = purifyText(text);
+        if (fInCDATASection) {
+            StringBuffer str = new StringBuffer(text.toString());
+            int length = str.length();
+            for (int i = length-1; i >= 0; i--) {
+                char c = str.charAt(i);
+                if (c == ']') {
+                    str.insert(i + 1, ' ');
+                }
+            }
+            fStringBuffer.length = 0;
+            fStringBuffer.append(str.toString());
+            text = fStringBuffer;
+        }
+        super.characters(text,augs);
+    } // characters(XMLString,Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs)
+        throws XNIException {
+        element = purifyQName(element);
+        if (fNamespaces) {
+            if (element.prefix != null && element.uri == null) {
+                element.uri = fNamespaceContext.getURI(element.prefix);
+            }
+        }
+        super.endElement(element, augs);
+    } // endElement(QName,Augmentations)
+
+    //
+    // Protected methods
+    //
+
+    /** Handle start document. */
+    protected void handleStartDocument() {
+        fSeenDoctype = false;
+        fSeenRootElement = false;
+    } // handleStartDocument()
+
+    /** Handle start element. */
+    protected void handleStartElement(QName element, XMLAttributes attrs) {
+
+        // handle element and attributes
+        element = purifyQName(element);
+        int attrCount = attrs != null ? attrs.getLength() : 0;
+        for (int i = attrCount-1; i >= 0; i--) {
+            // purify attribute name
+            attrs.getName(i, fQName);
+            attrs.setName(i, purifyQName(fQName));
+
+            // synthesize namespace bindings
+            if (fNamespaces) {
+                if (!fQName.rawname.equals("xmlns") &&
+                    !fQName.rawname.startsWith("xmlns:")) {
+                    // NOTE: Must get attribute name again because the
+                    //       purifyQName method does not guarantee that
+                    //       the same QName object is returned. -Ac
+                    attrs.getName(i, fQName);
+                    if (fQName.prefix != null && fQName.uri == null) {
+                        synthesizeBinding(attrs, fQName.prefix);
+                    }
+                }
+            }
+        }
+
+        // synthesize namespace bindings
+        if (fNamespaces) {
+            if (element.prefix != null && element.uri == null) {
+                synthesizeBinding(attrs, element.prefix);
+            }
+        }
+
+        // synthesize doctype declaration
+        if (!fSeenRootElement && fSeenDoctype) {
+            Augmentations augs = synthesizedAugs();
+            super.doctypeDecl(element.rawname, fPublicId, fSystemId, augs);
+        }
+
+        // mark start element as seen
+        fSeenRootElement = true;
+
+    } // handleStartElement(QName,XMLAttributes)
+
+    /** Synthesize namespace binding. */
+    protected void synthesizeBinding(XMLAttributes attrs, String ns) {
+        String prefix = "xmlns";
+        String localpart = ns;
+        String qname = prefix+':'+localpart;
+        String uri = NamespaceBinder.NAMESPACES_URI;
+        String atype = "CDATA";
+        String avalue = SYNTHESIZED_NAMESPACE_PREFX+fSynthesizedNamespaceCount++;
+        
+        // add attribute
+        fQName.setValues(prefix, localpart, qname, uri);
+        attrs.addAttribute(fQName, atype, avalue);
+
+        // bind namespace
+        fNamespaceContext.declarePrefix(ns, avalue);
+
+    } // synthesizeBinding(XMLAttributes,String)
+
+    /** Returns an augmentations object with a synthesized item added. */
+    protected final Augmentations synthesizedAugs() {
+        HTMLAugmentations augs = null;
+        if (fAugmentations) {
+            augs = fInfosetAugs;
+            augs.removeAllItems();
+            augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM);
+        }
+        return augs;
+    } // synthesizedAugs():Augmentations
+
+    //
+    // Protected methods
+    //
+
+    /** Purify qualified name. */
+    protected QName purifyQName(QName qname) {
+        qname.prefix = purifyName(qname.prefix, true);
+        qname.localpart = purifyName(qname.localpart, true);
+        qname.rawname = purifyName(qname.rawname, false);
+        return qname;
+    } // purifyQName(QName):QName
+
+    /** Purify name. */
+    protected String purifyName(String name, boolean localpart) {
+        if (name == null) {
+            return name;
+        }
+        StringBuffer str = new StringBuffer();
+        int length = name.length();
+        boolean seenColon = localpart;
+        for (int i = 0; i < length; i++) {
+            char c = name.charAt(i);
+            if (i == 0) {
+                if (!XMLChar.isNameStart(c)) {
+                    str.append("_u"+toHexString(c,4)+"_");
+                }
+                else {
+                    str.append(c);
+                }
+            }
+            else {
+                if ((fNamespaces && c == ':' && seenColon) || !XMLChar.isName(c)) {
+                    str.append("_u"+toHexString(c,4)+"_");
+                }
+                else {
+                    str.append(c);
+                }
+                seenColon = seenColon || c == ':';
+            }
+        }
+        return str.toString();
+    } // purifyName(String):String
+
+    /** Purify content. */
+    protected XMLString purifyText(XMLString text) {
+        fStringBuffer.length = 0;
+        for (int i = 0; i < text.length; i++) {
+            char c = text.ch[text.offset+i];
+            if (XMLChar.isInvalid(c)) {
+                fStringBuffer.append("\\u"+toHexString(c,4));
+            }
+            else {
+                fStringBuffer.append(c);
+            }
+        }
+        return fStringBuffer;
+    } // purifyText(XMLString):XMLString
+
+    //
+    // Protected static methods
+    //
+
+    /** Returns a padded hexadecimal string for the given value. */
+    protected static String toHexString(int c, int padlen) {
+        StringBuffer str = new StringBuffer(padlen);
+        str.append(Integer.toHexString(c));
+        int len = padlen - str.length();
+        for (int i = 0; i < len; i++) {
+            str.insert(0, '0');
+        }
+        return str.toString().toUpperCase();
+    } // toHexString(int,int):String
+
+} // class Purifier

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Writer.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Writer.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/filters/Writer.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,474 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html.filters;
+
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.UnsupportedEncodingException;
+
+import org.cyberneko.html.HTMLConfiguration;
+import org.cyberneko.html.HTMLElements;
+import org.cyberneko.html.HTMLEntities;
+import org.cyberneko.html.filters.DefaultFilter;
+
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLResourceIdentifier;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.apache.xerces.xni.parser.XMLParserConfiguration;
+
+/**
+ * An HTML writer written as a filter. Besides serializing the HTML
+ * event stream, the writer also passes the document events to the next
+ * stage in the pipeline. This allows applications to insert writer
+ * filters between other custom filters for debugging purposes.
+ * <p>
+ * Since an HTML document may have specified its encoding using the
+ * &lt;META&gt; tag and http-equiv/content attributes, the writer will
+ * automatically change any character set specified in this tag to
+ * match the encoding of the output stream. Therefore, the character
+ * encoding name used to construct the writer should be an official
+ * <a href='http://www.iana.org/assignments/character-sets'>IANA</a>
+ * encoding name and not a Java encoding name.
+ * <p>
+ * <strong>Note:</strong>
+ * The modified character set in the &lt;META&gt; tag is <em>not</em>
+ * propagated to the next stage in the pipeline. The changed value is
+ * only output to the stream; the original value is sent to the next
+ * stage in the pipeline.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: Writer.java,v 1.7 2005/02/14 04:01:33 andyc Exp $
+ */
+public class Writer 
+    extends DefaultFilter {
+
+    //
+    // Constants
+    //
+
+    /** Notify character entity references. */
+    public static final String NOTIFY_CHAR_REFS = "http://apache.org/xml/features/scanner/notify-char-refs";
+
+    /** Notify built-in entity references. */
+    public static final String NOTIFY_HTML_BUILTIN_REFS = "http://cyberneko.org/html/features/scanner/notify-builtin-refs";
+
+    /** Augmentations feature identifier. */
+    protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
+
+    /** Filters property identifier. */
+    protected static final String FILTERS = "http://cyberneko.org/html/properties/filters";
+
+    //
+    // Data
+    //
+
+    /** The encoding. */
+    protected String fEncoding;
+
+    /** 
+     * The print writer used for serializing the document with the
+     * appropriate character encoding. 
+     */
+    protected PrintWriter fPrinter;
+
+    // state
+
+    /** Seen root element. */
+    protected boolean fSeenRootElement;
+
+    /** Seen http-equiv directive. */
+    protected boolean fSeenHttpEquiv;
+
+    /** Element depth. */
+    protected int fElementDepth;
+
+    /** Normalize character content. */
+    protected boolean fNormalize;
+
+    /** Print characters. */
+    protected boolean fPrintChars;
+
+    //
+    // Constructors
+    //
+
+    /** Constructs a writer filter that prints to standard out. */
+    public Writer() {
+        // Note: UTF-8 should *always* be a supported encoding. Although,
+        //       I've heard of the old M$ JVM not supporting it! Amazing. -Ac
+        try {
+            fEncoding = "UTF-8";
+            fPrinter = new PrintWriter(new OutputStreamWriter(System.out, fEncoding));
+        }
+        catch (UnsupportedEncodingException e) {
+            throw new RuntimeException(e.getMessage());
+        }
+    } // <init>()
+
+    /**
+     * Constructs a writer filter using the specified output stream and
+     * encoding.
+     *
+     * @param outputStream The output stream to write to.
+     * @param encoding The encoding to be used for the output. The encoding name
+     *                 should be an official IANA encoding name.
+     */
+    public Writer(OutputStream outputStream, String encoding) 
+        throws UnsupportedEncodingException {
+        this(new OutputStreamWriter(outputStream, encoding), encoding);
+    } // <init>(OutputStream,String)
+
+    /**
+     * Constructs a writer filter using the specified Java writer and
+     * encoding.
+     *
+     * @param writer The Java writer to write to.
+     * @param encoding The encoding to be used for the output. The encoding name
+     *                 should be an official IANA encoding name.
+     */
+    public Writer(java.io.Writer writer, String encoding) {
+        fEncoding = encoding;
+        if (writer instanceof PrintWriter) {
+            fPrinter = (PrintWriter)writer;
+        }
+        else {
+            fPrinter = new PrintWriter(writer);
+        }
+    } // <init>(java.io.Writer,String)
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    // since Xerces-J 2.2.0
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, 
+                              NamespaceContext nscontext, Augmentations augs)
+        throws XNIException {
+        fSeenRootElement = false;
+        fSeenHttpEquiv = false;
+        fElementDepth = 0;
+        fNormalize = true;
+        fPrintChars = true;
+        super.startDocument(locator, encoding, nscontext, augs);
+    } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
+
+    // old methods
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, Augmentations augs)
+        throws XNIException {
+        startDocument(locator, encoding, null, augs);
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    /** Comment. */
+    public void comment(XMLString text, Augmentations augs) 
+        throws XNIException {
+        if (fSeenRootElement && fElementDepth <= 0) {
+            fPrinter.println();
+        }
+        fPrinter.print("<!--");
+        printCharacters(text, false);
+        fPrinter.print("-->");
+        if (!fSeenRootElement) {
+            fPrinter.println();
+        }
+        fPrinter.flush();
+    } // comment(XMLString,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attributes, Augmentations augs)
+        throws XNIException {
+        fSeenRootElement = true;
+        fElementDepth++;
+        fNormalize = !HTMLElements.getElement(element.rawname).isSpecial();
+        printStartElement(element, attributes);
+        super.startElement(element, attributes, augs);
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs)
+        throws XNIException {
+        fSeenRootElement = true;
+        printStartElement(element, attributes);
+        super.emptyElement(element, attributes, augs);
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** Characters. */
+    public void characters(XMLString text, Augmentations augs) 
+        throws XNIException {
+        if (fPrintChars) {
+            printCharacters(text, fNormalize);
+        }
+        super.characters(text, augs);
+    } // characters(XMLString,Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs)
+        throws XNIException {
+        fElementDepth--;
+        fNormalize = true;
+        /***
+        // NOTE: Not sure if this is what should be done in the case where
+        //       the encoding is not explitly declared within the HEAD. So
+        //       I'm leaving it commented out for now. -Ac
+        if (element.rawname.equalsIgnoreCase("head") && !fSeenHttpEquiv) {
+            boolean capitalize = Character.isUpperCase(element.rawname.charAt(0));
+            String ename = capitalize ? "META" : "meta";
+            QName qname = new QName(null, ename, ename, null);
+            XMLAttributes attrs = new XMLAttributesImpl();
+            QName aname = new QName(null, "http-equiv", "http-equiv", null);
+            attrs.addAttribute(aname, "CDATA", "Content-Type");
+            aname.setValues(null, "content", "content", null);
+            attrs.addAttribute(aname, "CDATA", "text/html; charset="+fEncoding);
+            super.emptyElement(qname, attrs, null);
+        }
+        /***/
+        printEndElement(element);
+        super.endElement(element, augs);
+    } // endElement(QName,Augmentations)
+
+    /** Start general entity. */
+    public void startGeneralEntity(String name, XMLResourceIdentifier id, String encoding, Augmentations augs)
+        throws XNIException {
+        fPrintChars = false;
+        if (name.startsWith("#")) {
+            try {
+                boolean hex = name.startsWith("#x");
+                int offset = hex ? 2 : 1;
+                int base = hex ? 16 : 10;
+                int value = Integer.parseInt(name.substring(offset), base);
+                String entity = HTMLEntities.get(value);
+                if (entity != null) {
+                    name = entity;
+                }
+            }
+            catch (NumberFormatException e) {
+                // ignore
+            }
+        }
+        printEntity(name);
+        super.startGeneralEntity(name, id, encoding, augs);
+    } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
+
+    /** End general entity. */
+    public void endGeneralEntity(String name, Augmentations augs)
+        throws XNIException {
+        fPrintChars = true;
+        super.endGeneralEntity(name, augs);
+    } // endGeneralEntity(String,Augmentations)
+
+    //
+    // Protected methods
+    //
+
+    /** Print attribute value. */
+    protected void printAttributeValue(String text) {
+        int length = text.length();
+        for (int j = 0; j < length; j++) {
+            char c = text.charAt(j);
+            if (c == '"') {
+                fPrinter.print("&quot;");
+            }
+            else {
+                fPrinter.print(c);
+            }
+        }
+        fPrinter.flush();
+    } // printAttributeValue(String)
+
+    /** Print characters. */
+    protected void printCharacters(XMLString text, boolean normalize) {
+        if (normalize) {
+            for (int i = 0; i < text.length; i++) {
+                char c = text.ch[text.offset + i];
+                if (c != '\n') {
+                    String entity = HTMLEntities.get(c);
+                    if (entity != null) {
+                        printEntity(entity);
+                    }
+                    else {
+                        fPrinter.print(c);
+                    }
+                }
+                else {
+                    fPrinter.println();
+                }
+            }
+        }
+        else {
+            for (int i = 0; i < text.length; i++) {
+                char c = text.ch[text.offset + i];
+                fPrinter.print(c);
+            }
+        }
+        fPrinter.flush();
+    } // printCharacters(XMLString,boolean)
+
+    /** Print start element. */
+    protected void printStartElement(QName element, XMLAttributes attributes) {
+
+        // modify META[@http-equiv='content-type']/@content value
+        int contentIndex = -1;
+        String originalContent = null;
+        if (element.rawname.toLowerCase().equals("meta")) {
+            String httpEquiv = null;
+            int length = attributes.getLength();
+            for (int i = 0; i < length; i++) {
+                String aname = attributes.getQName(i).toLowerCase();
+                if (aname.equals("http-equiv")) {
+                    httpEquiv = attributes.getValue(i);
+                }
+                else if (aname.equals("content")) {
+                    contentIndex = i;
+                }
+            }
+            if (httpEquiv != null && httpEquiv.toLowerCase().equals("content-type")) {
+                fSeenHttpEquiv = true;
+                String content = null;
+                if (contentIndex != -1) {
+                    originalContent = attributes.getValue(contentIndex);
+                    content = originalContent.toLowerCase();
+                }
+                if (content != null) {
+                    int charsetIndex = content.indexOf("charset=");
+                    if (charsetIndex != -1) {
+                        content = content.substring(0, charsetIndex + 8);
+                    }
+                    else {
+                        content += ";charset=";
+                    }
+                    content += fEncoding;
+                    attributes.setValue(contentIndex, content);
+                }
+            }
+        }
+
+        // print element
+        fPrinter.print('<');
+        fPrinter.print(element.rawname);
+        int attrCount = attributes != null ? attributes.getLength() : 0;
+        for (int i = 0; i < attrCount; i++) {
+            String aname = attributes.getQName(i);
+            String avalue = attributes.getValue(i);
+            fPrinter.print(' ');
+            fPrinter.print(aname);
+            fPrinter.print("=\"");
+            printAttributeValue(avalue);
+            fPrinter.print('"');
+        }
+        fPrinter.print('>');
+        fPrinter.flush();
+
+        // return original META[@http-equiv]/@content value
+        if (contentIndex != -1) {
+            attributes.setValue(contentIndex, originalContent);
+        }
+
+    } // printStartElement(QName,XMLAttributes)
+
+    /** Print end element. */
+    protected void printEndElement(QName element) {
+        fPrinter.print("</");
+        fPrinter.print(element.rawname);
+        fPrinter.print('>');
+        fPrinter.flush();
+    } // printEndElement(QName)
+
+    /** Print entity. */
+    protected void printEntity(String name) {
+        fPrinter.print('&');
+        fPrinter.print(name);
+        fPrinter.print(';');
+        fPrinter.flush();
+    } // printEntity(String)
+
+    //
+    // MAIN
+    //
+
+    /** Main. */
+    public static void main(String[] argv) throws Exception {
+        if (argv.length == 0) {
+            printUsage();
+            System.exit(1);
+        }
+        XMLParserConfiguration parser = new HTMLConfiguration();
+        parser.setFeature(NOTIFY_CHAR_REFS, true);
+        parser.setFeature(NOTIFY_HTML_BUILTIN_REFS, true);
+        String iencoding = null;
+        String oencoding = "Windows-1252";
+        boolean identity = false;
+        boolean purify = false;
+        for (int i = 0; i < argv.length; i++) {
+            String arg = argv[i];
+            if (arg.equals("-ie")) {
+                iencoding = argv[++i];
+                continue;
+            }
+            if (arg.equals("-e") || arg.equals("-oe")) {
+                oencoding = argv[++i];
+                continue;
+            }
+            if (arg.equals("-i")) {
+                identity = true;
+                continue;
+            }
+            if (arg.equals("-p")) {
+                purify = true;
+                continue;
+            }
+            if (arg.equals("-h")) {
+                printUsage();
+                System.exit(1);
+            }
+            java.util.Vector filtersVector = new java.util.Vector(2);
+            if (identity) {
+                filtersVector.addElement(new Identity());
+            }
+            else if (purify) {
+                filtersVector.addElement(new Purifier());
+            }
+            filtersVector.addElement(new Writer(System.out, oencoding));
+            XMLDocumentFilter[] filters = 
+                new XMLDocumentFilter[filtersVector.size()];
+            filtersVector.copyInto(filters);
+            parser.setProperty(FILTERS, filters);
+            XMLInputSource source = new XMLInputSource(null, arg, null);
+            source.setEncoding(iencoding);
+            parser.parse(source);
+        }
+    } // main(String[])
+
+    /** Print usage. */
+    private static void printUsage() {
+        System.err.println("usage: java "+Writer.class.getName()+" (options) file ...");
+        System.err.println();
+        System.err.println("options:");
+        System.err.println("  -ie name  Specify IANA name of input encoding.");
+        System.err.println("  -oe name  Specify IANA name of output encoding.");
+        System.err.println("  -i        Perform identity transform.");
+        System.err.println("  -p        Purify output to ensure XML well-formedness.");
+        System.err.println("  -h        Display help screen.");
+        System.err.println();
+        System.err.println("notes:");
+        System.err.println("  The -i and -p options are mutually exclusive.");
+        System.err.println("  The -e option has been replaced with -oe.");
+    } // printUsage()
+
+} // class Writer

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/DOMFragmentParser.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/DOMFragmentParser.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/DOMFragmentParser.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,577 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ * ==============================================================
+ * This file contains some code from Apache Xerces-J which is
+ * used in accordance with the Apache license. Please refer to
+ * the LICENSE_apache file for specific details.
+ */
+
+package org.cyberneko.html.parsers;
+
+import org.cyberneko.html.HTMLConfiguration;
+
+import org.apache.xerces.impl.Constants;
+import org.apache.xerces.util.ErrorHandlerWrapper;
+
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLDocumentHandler;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLResourceIdentifier;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+
+import org.apache.xerces.xni.parser.XMLConfigurationException;
+import org.apache.xerces.xni.parser.XMLDocumentSource;
+import org.apache.xerces.xni.parser.XMLErrorHandler;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.apache.xerces.xni.parser.XMLParseException;
+import org.apache.xerces.xni.parser.XMLParserConfiguration;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.Reader;
+
+import org.w3c.dom.Attr;
+import org.w3c.dom.CDATASection;
+import org.w3c.dom.Comment;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.EntityReference;
+import org.w3c.dom.Node;
+import org.w3c.dom.ProcessingInstruction;
+import org.w3c.dom.Text;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+
+/**
+ * A DOM parser for HTML fragments.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: DOMFragmentParser.java,v 1.8 2005/02/14 03:56:54 andyc Exp $
+ */
+public class DOMFragmentParser
+    implements XMLDocumentHandler {
+
+    //
+    // Constants
+    //
+
+    // features
+
+    /** Document fragment balancing only. */
+    protected static final String DOCUMENT_FRAGMENT = 
+        "http://cyberneko.org/html/features/document-fragment";
+
+    /** Recognized features. */
+    protected static final String[] RECOGNIZED_FEATURES = {
+        DOCUMENT_FRAGMENT,
+    };
+
+    // properties
+
+    /** Property identifier: error handler. */
+    protected static final String ERROR_HANDLER =
+        Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_HANDLER_PROPERTY;
+
+    /** Current element node. */
+    protected static final String CURRENT_ELEMENT_NODE =
+        Constants.XERCES_PROPERTY_PREFIX + Constants.CURRENT_ELEMENT_NODE_PROPERTY;
+
+    /** Recognized properties. */
+    protected static final String[] RECOGNIZED_PROPERTIES = {
+        ERROR_HANDLER,
+        CURRENT_ELEMENT_NODE,
+    };
+
+    //
+    // Data
+    //
+
+    /** Parser configuration. */
+    protected XMLParserConfiguration fParserConfiguration;
+
+    /** Document source. */
+    protected XMLDocumentSource fDocumentSource;
+
+    /** DOM document fragment. */
+    protected DocumentFragment fDocumentFragment;
+
+    /** Document. */
+    protected Document fDocument;
+
+    /** Current node. */
+    protected Node fCurrentNode;
+
+    /** True if within a CDATA section. */
+    protected boolean fInCDATASection;
+
+    //
+    // Constructors
+    //
+
+    /** Default constructor. */
+    public DOMFragmentParser() {
+        fParserConfiguration = new HTMLConfiguration();
+        fParserConfiguration.addRecognizedFeatures(RECOGNIZED_FEATURES);
+        fParserConfiguration.addRecognizedProperties(RECOGNIZED_PROPERTIES);
+        fParserConfiguration.setFeature(DOCUMENT_FRAGMENT, true);
+        fParserConfiguration.setDocumentHandler(this);
+    } // <init>()
+
+    //
+    // Public methods
+    //
+
+    /** Parses a document fragment. */
+    public void parse(String systemId, DocumentFragment fragment) 
+        throws SAXException, IOException {
+        parse(new InputSource(systemId), fragment);
+    } // parse(String,DocumentFragment)
+
+    /** Parses a document fragment. */
+    public void parse(InputSource source, DocumentFragment fragment) 
+        throws SAXException, IOException {
+
+        fCurrentNode = fDocumentFragment = fragment;
+        fDocument = fDocumentFragment.getOwnerDocument();
+
+        try {
+            String pubid = source.getPublicId();
+            String sysid = source.getSystemId();
+            String encoding = source.getEncoding();
+            InputStream stream = source.getByteStream();
+            Reader reader = source.getCharacterStream();
+            
+            XMLInputSource inputSource = 
+                new XMLInputSource(pubid, sysid, sysid);
+            inputSource.setEncoding(encoding);
+            inputSource.setByteStream(stream);
+            inputSource.setCharacterStream(reader);
+            
+            fParserConfiguration.parse(inputSource);
+        }
+        catch (XMLParseException e) {
+            Exception ex = e.getException();
+            if (ex != null) {
+                throw new SAXParseException(e.getMessage(), null, ex);
+            }
+            throw new SAXParseException(e.getMessage(), null);
+        }
+
+    } // parse(InputSource,DocumentFragment)
+
+    /**
+     * Allow an application to register an error event handler.
+     *
+     * <p>If the application does not register an error handler, all
+     * error events reported by the SAX parser will be silently
+     * ignored; however, normal processing may not continue.  It is
+     * highly recommended that all SAX applications implement an
+     * error handler to avoid unexpected bugs.</p>
+     *
+     * <p>Applications may register a new or different handler in the
+     * middle of a parse, and the SAX parser must begin using the new
+     * handler immediately.</p>
+     *
+     * @param errorHandler The error handler.
+     * @exception java.lang.NullPointerException If the handler 
+     *            argument is null.
+     * @see #getErrorHandler
+     */
+    public void setErrorHandler(ErrorHandler errorHandler) {
+        fParserConfiguration.setErrorHandler(new ErrorHandlerWrapper(errorHandler));
+    } // setErrorHandler(ErrorHandler)
+
+    /**
+     * Return the current error handler.
+     *
+     * @return The current error handler, or null if none
+     *         has been registered.
+     * @see #setErrorHandler
+     */
+    public ErrorHandler getErrorHandler() {
+
+        ErrorHandler errorHandler = null;
+        try {
+            XMLErrorHandler xmlErrorHandler = 
+                (XMLErrorHandler)fParserConfiguration.getProperty(ERROR_HANDLER);
+            if (xmlErrorHandler != null && 
+                xmlErrorHandler instanceof ErrorHandlerWrapper) {
+                errorHandler = ((ErrorHandlerWrapper)xmlErrorHandler).getErrorHandler();
+            }
+        }
+        catch (XMLConfigurationException e) {
+            // do nothing
+        }
+        return errorHandler;
+
+    } // getErrorHandler():ErrorHandler
+
+    /**
+     * Set the state of any feature in a SAX2 parser.  The parser
+     * might not recognize the feature, and if it does recognize
+     * it, it might not be able to fulfill the request.
+     *
+     * @param featureId The unique identifier (URI) of the feature.
+     * @param state The requested state of the feature (true or false).
+     *
+     * @exception SAXNotRecognizedException If the
+     *            requested feature is not known.
+     * @exception SAXNotSupportedException If the
+     *            requested feature is known, but the requested
+     *            state is not supported.
+     */
+    public void setFeature(String featureId, boolean state)
+        throws SAXNotRecognizedException, SAXNotSupportedException {
+
+        try {
+            fParserConfiguration.setFeature(featureId, state);
+        }
+        catch (XMLConfigurationException e) {
+            String message = e.getMessage();
+            if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
+                throw new SAXNotRecognizedException(message);
+            }
+            else {
+                throw new SAXNotSupportedException(message);
+            }
+        }
+
+    } // setFeature(String,boolean)
+
+    /**
+     * Query the state of a feature.
+     *
+     * Query the current state of any feature in a SAX2 parser.  The
+     * parser might not recognize the feature.
+     *
+     * @param featureId The unique identifier (URI) of the feature
+     *                  being set.
+     * @return The current state of the feature.
+     * @exception org.xml.sax.SAXNotRecognizedException If the
+     *            requested feature is not known.
+     * @exception SAXNotSupportedException If the
+     *            requested feature is known but not supported.
+     */
+    public boolean getFeature(String featureId)
+        throws SAXNotRecognizedException, SAXNotSupportedException {
+
+        try {
+            return fParserConfiguration.getFeature(featureId);
+        }
+        catch (XMLConfigurationException e) {
+            String message = e.getMessage();
+            if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
+                throw new SAXNotRecognizedException(message);
+            }
+            else {
+                throw new SAXNotSupportedException(message);
+            }
+        }
+
+    } // getFeature(String):boolean
+
+    /**
+     * Set the value of any property in a SAX2 parser.  The parser
+     * might not recognize the property, and if it does recognize
+     * it, it might not support the requested value.
+     *
+     * @param propertyId The unique identifier (URI) of the property
+     *                   being set.
+     * @param value      The value to which the property is being set.
+     *
+     * @exception SAXNotRecognizedException If the
+     *            requested property is not known.
+     * @exception SAXNotSupportedException If the
+     *            requested property is known, but the requested
+     *            value is not supported.
+     */
+    public void setProperty(String propertyId, Object value)
+        throws SAXNotRecognizedException, SAXNotSupportedException {
+
+        try {
+            fParserConfiguration.setProperty(propertyId, value);
+        }
+        catch (XMLConfigurationException e) {
+            String message = e.getMessage();
+            if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
+                throw new SAXNotRecognizedException(message);
+            }
+            else {
+                throw new SAXNotSupportedException(message);
+            }
+        }
+
+    } // setProperty(String,Object)
+
+    /**
+     * Query the value of a property.
+     *
+     * Return the current value of a property in a SAX2 parser.
+     * The parser might not recognize the property.
+     *
+     * @param propertyId The unique identifier (URI) of the property
+     *                   being set.
+     * @return The current value of the property.
+     * @exception org.xml.sax.SAXNotRecognizedException If the
+     *            requested property is not known.
+     * @exception SAXNotSupportedException If the
+     *            requested property is known but not supported.
+     */
+    public Object getProperty(String propertyId)
+        throws SAXNotRecognizedException, SAXNotSupportedException {
+
+        if (propertyId.equals(CURRENT_ELEMENT_NODE)) {
+            return (fCurrentNode!=null && 
+                    fCurrentNode.getNodeType() == Node.ELEMENT_NODE)? fCurrentNode:null;
+        }
+
+        try {
+            return fParserConfiguration.getProperty(propertyId);
+        }
+        catch (XMLConfigurationException e) {
+            String message = e.getMessage();
+            if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
+                throw new SAXNotRecognizedException(message);
+            }
+            else {
+                throw new SAXNotSupportedException(message);
+            }
+        }
+
+    } // getProperty(String):Object
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    /** Sets the document source. */
+    public void setDocumentSource(XMLDocumentSource source) {
+        fDocumentSource = source;
+    } // setDocumentSource(XMLDocumentSource)
+
+    /** Returns the document source. */
+    public XMLDocumentSource getDocumentSource() {
+        return fDocumentSource;
+    } // getDocumentSource():XMLDocumentSource
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding,
+                              Augmentations augs) throws XNIException {
+        startDocument(locator, encoding, null, augs);
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    // since Xerces 2.2.0
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding,
+                              NamespaceContext nscontext,
+                              Augmentations augs) throws XNIException {
+        fInCDATASection = false;
+    } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
+
+    /** XML declaration. */
+    public void xmlDecl(String version, String encoding,
+                        String standalone, Augmentations augs)
+        throws XNIException {
+    } // xmlDecl(String,String,String,Augmentations)
+
+    /** Document type declaration. */
+    public void doctypeDecl(String root, String pubid, String sysid,
+                            Augmentations augs) throws XNIException {
+    } // doctypeDecl(String,String,String,Augmentations)
+
+    /** Processing instruction. */
+    public void processingInstruction(String target, XMLString data,
+                                      Augmentations augs)
+        throws XNIException {
+        ProcessingInstruction pi = 
+            fDocument.createProcessingInstruction(target, data.toString());
+        fCurrentNode.appendChild(pi);
+    } // processingInstruction(String,XMLString,Augmentations)
+
+    /** Comment. */
+    public void comment(XMLString text, Augmentations augs)
+        throws XNIException {
+        Comment comment = fDocument.createComment(text.toString());
+        fCurrentNode.appendChild(comment);
+    } // comment(XMLString,Augmentations)
+
+    /** Start prefix mapping. @deprecated Since Xerces 2.2.0. */
+    public void startPrefixMapping(String prefix, String uri,
+                                   Augmentations augs) throws XNIException {
+    } // startPrefixMapping(String,String,Augmentations)
+
+    /** End prefix mapping. @deprecated Since Xerces 2.2.0. */
+    public void endPrefixMapping(String prefix, Augmentations augs)
+        throws XNIException {
+    } // endPrefixMapping(String,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attrs,
+                             Augmentations augs) throws XNIException {
+        Element elementNode = fDocument.createElement(element.rawname);
+        int count = attrs != null ? attrs.getLength() : 0;
+        for (int i = 0; i < count; i++) {
+            String aname = attrs.getQName(i);
+            String avalue = attrs.getValue(i);
+            elementNode.setAttribute(aname, avalue);
+        }
+        fCurrentNode.appendChild(elementNode);
+        fCurrentNode = elementNode;
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attrs,
+                             Augmentations augs) throws XNIException {
+        startElement(element, attrs, augs);
+        endElement(element, augs);
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** Characters. */
+    public void characters(XMLString text, Augmentations augs)
+        throws XNIException {
+
+        if (fInCDATASection) {
+            Node node = fCurrentNode.getLastChild();
+            if (node != null && node.getNodeType() == Node.CDATA_SECTION_NODE) {
+                CDATASection cdata = (CDATASection)node;
+                cdata.appendData(text.toString());
+            }
+            else {
+                CDATASection cdata = fDocument.createCDATASection(text.toString());
+                fCurrentNode.appendChild(cdata);
+            }
+        }
+        else {
+            Node node = fCurrentNode.getLastChild();
+            if (node != null && node.getNodeType() == Node.TEXT_NODE) {
+                Text textNode = (Text)node;
+                textNode.appendData(text.toString());
+            }
+            else {
+                Text textNode = fDocument.createTextNode(text.toString());
+                fCurrentNode.appendChild(textNode);
+            }
+        }
+
+    } // characters(XMLString,Augmentations)
+
+    /** Ignorable whitespace. */
+    public void ignorableWhitespace(XMLString text, Augmentations augs)
+        throws XNIException {
+        characters(text, augs);
+    } // ignorableWhitespace(XMLString,Augmentations)
+
+    /** Start general entity. */
+    public void startGeneralEntity(String name, XMLResourceIdentifier id,
+                                   String encoding, Augmentations augs)
+        throws XNIException {
+        EntityReference entityRef = fDocument.createEntityReference(name);
+        fCurrentNode.appendChild(entityRef);
+        fCurrentNode = entityRef;
+    } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
+
+    /** Text declaration. */
+    public void textDecl(String version, String encoding,
+                         Augmentations augs) throws XNIException {
+    } // textDecl(String,String,Augmentations)
+
+    /** End general entity. */
+    public void endGeneralEntity(String name, Augmentations augs)
+        throws XNIException {
+        fCurrentNode = fCurrentNode.getParentNode();
+    } // endGeneralEntity(String,Augmentations)
+
+    /** Start CDATA section. */
+    public void startCDATA(Augmentations augs) throws XNIException {
+        fInCDATASection = true;
+    } // startCDATA(Augmentations)
+
+    /** End CDATA section. */
+    public void endCDATA(Augmentations augs) throws XNIException {
+        fInCDATASection = false;
+    } // endCDATA(Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs)
+        throws XNIException {
+        fCurrentNode = fCurrentNode.getParentNode();
+    } // endElement(QName,Augmentations)
+
+    /** End document. */
+    public void endDocument(Augmentations augs) throws XNIException {
+    } // endDocument(Augmentations)
+
+    //
+    // DEBUG
+    //
+
+    /***
+    public static void print(Node node) {
+        short type = node.getNodeType();
+        switch (type) {
+            case Node.ELEMENT_NODE: {
+                System.out.print('<');
+                System.out.print(node.getNodeName());
+                org.w3c.dom.NamedNodeMap attrs = node.getAttributes();
+                int attrCount = attrs != null ? attrs.getLength() : 0;
+                for (int i = 0; i < attrCount; i++) {
+                    Node attr = attrs.item(i);
+                    System.out.print(' ');
+                    System.out.print(attr.getNodeName());
+                    System.out.print("='");
+                    System.out.print(attr.getNodeValue());
+                    System.out.print('\'');
+                }
+                System.out.print('>');
+                break;
+            }
+            case Node.TEXT_NODE: {
+                System.out.print(node.getNodeValue());
+                break;
+            }
+        }
+        Node child = node.getFirstChild();
+        while (child != null) {
+            print(child);
+            child = child.getNextSibling();
+        }
+        if (type == Node.ELEMENT_NODE) {
+            System.out.print("</");
+            System.out.print(node.getNodeName());
+            System.out.print('>');
+        }
+        else if (type == Node.DOCUMENT_NODE || type == Node.DOCUMENT_FRAGMENT_NODE) {
+            System.out.println();
+        }
+        System.out.flush();
+    }
+
+    public static void main(String[] argv) throws Exception {
+        DOMFragmentParser parser = new DOMFragmentParser();
+        HTMLDocument document = new org.apache.html.dom.HTMLDocumentImpl();
+        for (int i = 0; i < argv.length; i++) {
+            String sysid = argv[i];
+            System.err.println("# "+sysid);
+            DocumentFragment fragment = document.createDocumentFragment();
+            parser.parse(sysid, fragment);
+            print(fragment);
+        }
+    }
+    /***/
+
+} // class DOMFragmentParser

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/DOMParser.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/DOMParser.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/DOMParser.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,111 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html.parsers;
+
+import org.cyberneko.html.HTMLConfiguration;
+
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.XNIException;
+
+import org.w3c.dom.DOMException;
+
+/**
+ * A DOM parser for HTML documents.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: DOMParser.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
+ */
+public class DOMParser
+    /***/
+    extends org.apache.xerces.parsers.DOMParser {
+    /***
+    // NOTE: It would be better to extend from AbstractDOMParser but
+    //       most users will find it easier if the API is just like the
+    //       Xerces DOM parser. By extending directly from DOMParser,
+    //       users can register SAX error handlers, entity resolvers,
+    //       and the like. -Ac
+    extends org.apache.xerces.parsers.AbstractDOMParser {
+    /***/
+
+    //
+    // Constructors
+    //
+
+    /** Default constructor. */
+    public DOMParser() {
+        super(new HTMLConfiguration());
+        /*** extending DOMParser ***/
+        try {
+            setProperty("http://apache.org/xml/properties/dom/document-class-name",
+                                       "org.apache.html.dom.HTMLDocumentImpl");
+        }
+        catch (org.xml.sax.SAXNotRecognizedException e) {
+            throw new RuntimeException("http://apache.org/xml/properties/dom/document-class-name property not recognized");
+        }
+        catch (org.xml.sax.SAXNotSupportedException e) {
+            throw new RuntimeException("http://apache.org/xml/properties/dom/document-class-name property not supported");
+        }
+        /*** extending AbstractDOMParser ***
+        fConfiguration.setProperty("http://apache.org/xml/properties/dom/document-class-name",
+                                   "org.apache.html.dom.HTMLDocumentImpl");
+        /***/
+    } // <init>()
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    /** Doctype declaration. */
+    public void doctypeDecl(String root, String pubid, String sysid,
+                            Augmentations augs) throws XNIException {
+        
+        // NOTE: Xerces HTML DOM implementation (up to and including
+        //       2.5.0) throws a heirarchy request error exception 
+        //       when a doctype node is appended to the tree. So, 
+        //       don't insert this node into the tree for those 
+        //       versions... -Ac
+
+        String VERSION = org.apache.xerces.impl.Version.fVersion;
+        boolean okay = true;
+        if (VERSION.startsWith("Xerces-J 2.")) {
+            okay = getParserSubVersion() > 5;
+        }
+        // REVISIT: As soon as XML4J is updated with the latest code
+        //          from Xerces, then this needs to be updated to
+        //          check XML4J's version. -Ac
+        else if (VERSION.startsWith("XML4J")) {
+            okay = false;
+        }
+
+        // if okay, insert doctype; otherwise, don't risk it
+        if (okay) {
+            super.doctypeDecl(root, pubid, sysid, augs);
+        }
+
+    } // doctypeDecl(String,String,String,Augmentations)
+
+    //
+    // Private static methods
+    //
+
+    /** Returns the parser's sub-version number. */
+    private static int getParserSubVersion() {
+        try {
+            String VERSION = org.apache.xerces.impl.Version.fVersion;
+            int index1 = VERSION.indexOf('.') + 1;
+            int index2 = VERSION.indexOf('.', index1);
+            if (index2 == -1) { index2 = VERSION.length(); }
+            return Integer.parseInt(VERSION.substring(index1, index2));
+        }
+        catch (Exception e) {
+            return -1;
+        }
+    } // getParserSubVersion():int
+
+} // class DOMParser

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/SAXParser.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/SAXParser.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/parsers/SAXParser.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,32 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package org.cyberneko.html.parsers;
+
+import org.apache.xerces.parsers.AbstractSAXParser;
+import org.cyberneko.html.HTMLConfiguration;
+
+/**
+ * A SAX parser for HTML documents.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: SAXParser.java,v 1.4 2005/02/14 03:56:54 andyc Exp $
+ */
+public class SAXParser
+    extends AbstractSAXParser {
+
+    //
+    // Constructors
+    //
+
+    /** Default constructor. */
+    public SAXParser() {
+        super(new HTMLConfiguration());
+    } // <init>()
+
+} // class SAXParser

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/ErrorMessages.properties
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/ErrorMessages.properties	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/ErrorMessages.properties	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,39 @@
+##
+# NekoHTML error messages.
+#
+# @author Andy Clark
+#
+# @version $Id: ErrorMessages.properties,v 1.4 2004/11/01 00:28:35 andyc Exp $
+
+# internal messages
+HTML0000=General internal error.
+
+# scanner messages
+HTML1000=No character encoding indicator at beginning of document.
+HTML1001=No Java character encoding mapping for IANA character encoding "{0}".
+HTML1002=Unsupported syntax starting with "<!". Skipping to '>'.
+HTML1003=Bare markup character '<' found.
+HTML1004=Bare ampersand found.
+HTML1005=Invalid character entity "{0}".
+HTML1006=Unknown general entity "{0}".
+HTML1007=Premature end of file encountered.
+HTML1008=Skipping processing instruction.
+HTML1009=Missing start element name.
+HTML1010=Unsupported character encoding "{0}". Ignoring charset directive.
+HTML1011=Missing attribute name.
+HTML1012=Missing end element name.
+HTML1013=Missing whitespace before attribute "{0}".
+HTML1014=Missing root element name in DOCTYPE.
+
+# tag balancer messages
+HTML2000=Empty document.
+HTML2001=Element <{0}> not closed properly.
+HTML2002=Missing parent chain. Inserting proper parent <{1}> for element <{0}>.
+HTML2004=Inserting proper parent element <{1}> for element <{0}>.
+HTML2005=Start element <{0}> automatically closes element <{1}>.
+HTML2006=Bare character content found. Inserting parent element <{0}>.
+HTML2007=End element <{0}> automatically closes element <{1}>.
+HTML2008=Re-opening unbalanced inline element <{0}>.
+HTML2009=Character content found within element <{0}>. Inserting proper parent element <{1}>.
+HTML2010=DOCTYPE declaration found inside document content.
+HTML2011=Multiple DOCTYPE declaration.

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLlat1.properties
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLlat1.properties	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLlat1.properties	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,101 @@
+##
+# This file was generated from the HTMLlat1.ent file.
+#
+# @version $Id: HTMLlat1.properties,v 1.1 2004/07/28 09:11:52 andyc Exp $
+
+nbsp=\u00a0
+iexcl=\u00a1
+cent=\u00a2
+pound=\u00a3
+curren=\u00a4
+yen=\u00a5
+brvbar=\u00a6
+sect=\u00a7
+uml=\u00a8
+copy=\u00a9
+ordf=\u00aa
+laquo=\u00ab
+not=\u00ac
+shy=\u00ad
+reg=\u00ae
+macr=\u00af
+deg=\u00b0
+plusmn=\u00b1
+sup2=\u00b2
+sup3=\u00b3
+acute=\u00b4
+micro=\u00b5
+para=\u00b6
+middot=\u00b7
+cedil=\u00b8
+sup1=\u00b9
+ordm=\u00ba
+raquo=\u00bb
+frac14=\u00bc
+frac12=\u00bd
+frac34=\u00be
+iquest=\u00bf
+Agrave=\u00c0
+Aacute=\u00c1
+Acirc=\u00c2
+Atilde=\u00c3
+Auml=\u00c4
+Aring=\u00c5
+AElig=\u00c6
+Ccedil=\u00c7
+Egrave=\u00c8
+Eacute=\u00c9
+Ecirc=\u00ca
+Euml=\u00cb
+Igrave=\u00cc
+Iacute=\u00cd
+Icirc=\u00ce
+Iuml=\u00cf
+ETH=\u00d0
+Ntilde=\u00d1
+Ograve=\u00d2
+Oacute=\u00d3
+Ocirc=\u00d4
+Otilde=\u00d5
+Ouml=\u00d6
+times=\u00d7
+Oslash=\u00d8
+Ugrave=\u00d9
+Uacute=\u00da
+Ucirc=\u00db
+Uuml=\u00dc
+Yacute=\u00dd
+THORN=\u00de
+szlig=\u00df
+agrave=\u00e0
+aacute=\u00e1
+acirc=\u00e2
+atilde=\u00e3
+auml=\u00e4
+aring=\u00e5
+aelig=\u00e6
+ccedil=\u00e7
+egrave=\u00e8
+eacute=\u00e9
+ecirc=\u00ea
+euml=\u00eb
+igrave=\u00ec
+iacute=\u00ed
+icirc=\u00ee
+iuml=\u00ef
+eth=\u00f0
+ntilde=\u00f1
+ograve=\u00f2
+oacute=\u00f3
+ocirc=\u00f4
+otilde=\u00f5
+ouml=\u00f6
+divide=\u00f7
+oslash=\u00f8
+ugrave=\u00f9
+uacute=\u00fa
+ucirc=\u00fb
+uuml=\u00fc
+yacute=\u00fd
+thorn=\u00fe
+yuml=\u00ff

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLspecial.properties
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLspecial.properties	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLspecial.properties	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,37 @@
+##
+# This file was generated from the HTMLspecial.ent file.
+#
+# @version $Id: HTMLspecial.properties,v 1.1 2004/07/28 09:11:52 andyc Exp $
+
+quot=\u0022
+amp=\u0026
+lt=\u003c
+gt=\u003e
+OElig=\u0152
+oelig=\u0153
+Scaron=\u0160
+scaron=\u0161
+Yuml=\u0178
+circ=\u02c6
+tilde=\u02dc
+ensp=\u2002
+emsp=\u2003
+thinsp=\u2009
+zwnj=\u200c
+zwj=\u200d
+lrm=\u200e
+rlm=\u200f
+ndash=\u2013
+mdash=\u2014
+lsquo=\u2018
+rsquo=\u2019
+sbquo=\u201a
+ldquo=\u201c
+rdquo=\u201d
+bdquo=\u201e
+dagger=\u2020
+Dagger=\u2021
+permil=\u2030
+lsaquo=\u2039
+rsaquo=\u203a
+euro=\u20ac

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLsymbol.properties
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLsymbol.properties	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/HTMLsymbol.properties	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,129 @@
+##
+# This file was generated from the HTMLsymbol.ent file.
+#
+# @version $Id: HTMLsymbol.properties,v 1.1 2004/07/28 09:11:52 andyc Exp $
+
+fnof=\u0192
+Alpha=\u0391
+Beta=\u0392
+Gamma=\u0393
+Delta=\u0394
+Epsilon=\u0395
+Zeta=\u0396
+Eta=\u0397
+Theta=\u0398
+Iota=\u0399
+Kappa=\u039a
+Lambda=\u039b
+Mu=\u039c
+Nu=\u039d
+Xi=\u039e
+Omicron=\u039f
+Pi=\u03a0
+Rho=\u03a1
+Sigma=\u03a3
+Tau=\u03a4
+Upsilon=\u03a5
+Phi=\u03a6
+Chi=\u03a7
+Psi=\u03a8
+Omega=\u03a9
+alpha=\u03b1
+beta=\u03b2
+gamma=\u03b3
+delta=\u03b4
+epsilon=\u03b5
+zeta=\u03b6
+eta=\u03b7
+theta=\u03b8
+iota=\u03b9
+kappa=\u03ba
+lambda=\u03bb
+mu=\u03bc
+nu=\u03bd
+xi=\u03be
+omicron=\u03bf
+pi=\u03c0
+rho=\u03c1
+sigmaf=\u03c2
+sigma=\u03c3
+tau=\u03c4
+upsilon=\u03c5
+phi=\u03c6
+chi=\u03c7
+psi=\u03c8
+omega=\u03c9
+thetasym=\u03d1
+upsih=\u03d2
+piv=\u03d6
+bull=\u2022
+hellip=\u2026
+prime=\u2032
+Prime=\u2033
+oline=\u203e
+frasl=\u2044
+weierp=\u2118
+image=\u2111
+real=\u211c
+trade=\u2122
+alefsym=\u2135
+larr=\u2190
+uarr=\u2191
+rarr=\u2192
+darr=\u2193
+harr=\u2194
+crarr=\u21b5
+lArr=\u21d0
+uArr=\u21d1
+rArr=\u21d2
+dArr=\u21d3
+hArr=\u21d4
+forall=\u2200
+part=\u2202
+exist=\u2203
+empty=\u2205
+nabla=\u2207
+isin=\u2208
+notin=\u2209
+ni=\u220b
+prod=\u220f
+sum=\u2211
+minus=\u2212
+lowast=\u2217
+radic=\u221a
+prop=\u221d
+infin=\u221e
+ang=\u2220
+and=\u2227
+or=\u2228
+cap=\u2229
+cup=\u222a
+int=\u222b
+there4=\u2234
+sim=\u223c
+cong=\u2245
+asymp=\u2248
+ne=\u2260
+equiv=\u2261
+le=\u2264
+ge=\u2265
+sub=\u2282
+sup=\u2283
+nsub=\u2284
+sube=\u2286
+supe=\u2287
+oplus=\u2295
+otimes=\u2297
+perp=\u22a5
+sdot=\u22c5
+lceil=\u2308
+rceil=\u2309
+lfloor=\u230a
+rfloor=\u230b
+lang=\u2329
+rang=\u232a
+loz=\u25ca
+spades=\u2660
+clubs=\u2663
+hearts=\u2665
+diams=\u2666

Added: branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/XMLbuiltin.properties
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/XMLbuiltin.properties	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/org/cyberneko/html/res/XMLbuiltin.properties	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,7 @@
+##
+# This file exists because the list of HTML entities does not include
+# apostrophe ("apos") which should be recognized, IMHO. -Ac
+#
+# @version $Id: XMLbuiltin.properties,v 1.1 2004/07/28 09:11:52 andyc Exp $
+
+apos='
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/src/html/sample/HTMLSAXParser.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/sample/HTMLSAXParser.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/sample/HTMLSAXParser.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,33 @@
+/* 
+ * (C) Copyright 2002-2004, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package sample;
+
+import org.apache.xerces.parsers.AbstractSAXParser;
+import org.cyberneko.html.HTMLConfiguration;
+
+/**
+ * This sample shows how to extend a Xerces2 parser class, replacing
+ * the default parser configuration with the NekoHTML configuration.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: HTMLSAXParser.java,v 1.3 2004/02/19 20:00:17 andyc Exp $
+ */
+public class HTMLSAXParser 
+    extends AbstractSAXParser {
+
+    //
+    // Constructors
+    //
+
+    /** Default constructor. */
+    public HTMLSAXParser() {
+        super(new HTMLConfiguration());
+    } // <init>()
+
+} // class HTMLSAXParser

Added: branches/nekohtml/upstream/0.9.5/src/html/sample/RemoveElements.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/sample/RemoveElements.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/sample/RemoveElements.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,69 @@
+/* 
+ * (C) Copyright 2002-2004, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package sample;
+
+import org.cyberneko.html.HTMLConfiguration;
+import org.cyberneko.html.filters.ElementRemover;
+
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.apache.xerces.xni.parser.XMLParserConfiguration;
+
+/**
+ * This is a sample that illustrates how to use the 
+ * <code>ElementRemover</code> filter.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: RemoveElements.java,v 1.3 2004/02/19 20:00:17 andyc Exp $
+ */
+public class RemoveElements {
+
+    //
+    // MAIN
+    //
+
+    /** Main. */
+    public static void main(String[] argv) throws Exception {
+
+        // create element remover filter
+        ElementRemover remover = new ElementRemover();
+
+        // set which elements to accept
+        remover.acceptElement("b", null);
+        remover.acceptElement("i", null);
+        remover.acceptElement("u", null);
+        remover.acceptElement("a", new String[] { "href" });
+
+        // completely remove script elements
+        remover.removeElement("script");
+
+        // create writer filter
+        org.cyberneko.html.filters.Writer writer =
+            new org.cyberneko.html.filters.Writer();
+
+        // setup filter chain
+        XMLDocumentFilter[] filters = {
+            remover,
+            writer,
+        };
+
+        // create HTML parser
+        XMLParserConfiguration parser = new HTMLConfiguration();
+        parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
+
+        // parse documents
+        for (int i = 0; i < argv.length; i++) {
+            String systemId = argv[i];
+            XMLInputSource source = new XMLInputSource(null, systemId, null);
+            parser.parse(source);
+        }
+
+    } // main(String[])
+
+} // class RemoveElements
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/src/html/sample/Script.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/sample/Script.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/sample/Script.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,227 @@
+/* 
+ * (C) Copyright 2002-2004, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package sample;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.io.StringWriter;
+
+import org.cyberneko.html.HTMLConfiguration;
+import org.cyberneko.html.filters.DefaultFilter;
+import org.cyberneko.html.filters.Identity;
+import org.cyberneko.html.filters.Writer;
+
+import org.apache.xerces.util.XMLAttributesImpl;
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.apache.xerces.xni.parser.XMLInputSource;
+
+/**
+ * This sample demonstrates how to use of the <code>pushInputSource</code>
+ * method of the HTMLConfiguration in order to dynamically insert content
+ * into the HTML stream. The typical use for this functionality is to
+ * insert the result of an embedded script into the HTML document in place
+ * of the script.
+ * <p>
+ * This particular example defines a new script language called "NekoHTML"
+ * script that is a tiny subset of the NSGMLS format. The following table 
+ * enumerates the NSGMLS features supported by this script language:
+ * <table border='1' cellspacing='0', cellpadding='3'>
+ * <tr><th>(<i>name</i><td>A start element with the specified <i>name</i>.
+ * <tr><th>"<i>text</i><td>Character content with the specified <i>text</i>.
+ * <tr><th>)<i>name</i><td>An end element with the specified <i>name</i>.
+ * </table>
+ * <p>
+ * In this format, every <i>command</i> is specified on a line by itself.
+ * For example, the following document:
+ * <pre>
+ * &lt;script type='NekoHTML'&gt;
+ * (h1
+ * "Header
+ * )h1
+ * &lt;/script&gt;
+ * </pre>
+ * is equivalent to the following HTML document:
+ * <pre>
+ * &lt;H1&gt;Header&lt;/H1&gt;
+ * </pre>
+ * as seen by document handler registered with the parser, when processed 
+ * by this filter.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: Script.java,v 1.3 2004/02/19 20:00:17 andyc Exp $
+ */
+public class Script
+    extends DefaultFilter {
+
+    //
+    // Constants
+    //
+
+    /** Augmentations feature identifier. */
+    protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
+
+    /** Filters property identifier. */
+    protected static final String FILTERS = "http://cyberneko.org/html/properties/filters";
+
+    /** Script type ("text/x-nekoscript"). */
+    protected static final String SCRIPT_TYPE = "text/x-nekoscript";
+
+    //
+    // Data
+    //
+
+    /** The NekoHTML configuration. */
+    protected HTMLConfiguration fConfiguration;
+
+    /** A string buffer to collect the "script". */
+    protected StringBuffer fBuffer;
+    
+    /** The system identifier of the source document. */
+    protected String fSystemId;
+
+    /** The script count. */
+    protected int fScriptCount;
+
+    //
+    // Constructors
+    //
+
+    /** Constructs a script object with the specified configuration. */
+    public Script(HTMLConfiguration config) {
+        fConfiguration = config;
+    } // <init>(HTMLConfiguration)
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, Augmentations augs) 
+        throws XNIException {
+        fBuffer = null;
+        fSystemId = locator != null ? locator.getLiteralSystemId() : null;
+        fScriptCount = 0;
+        super.startDocument(locator, encoding, augs);
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attrs, Augmentations augs)
+        throws XNIException {
+        if (element.rawname.equalsIgnoreCase("script") && attrs != null) {
+            String value = attrs.getValue("type");
+            if (value != null && value.equalsIgnoreCase(SCRIPT_TYPE)) {
+                fBuffer = new StringBuffer();
+                return;
+            }
+        }
+        super.startElement(element, attrs, augs);
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attrs, Augmentations augs)
+        throws XNIException {
+        if (element.rawname.equalsIgnoreCase("script") && attrs != null) {
+            String value = attrs.getValue("type");
+            if (value != null && value.equalsIgnoreCase(SCRIPT_TYPE)) {
+                return;
+            }
+        }
+        super.emptyElement(element, attrs, augs);
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** Characters. */
+    public void characters(XMLString text, Augmentations augs)
+        throws XNIException {
+        if (fBuffer != null) {
+            fBuffer.append(text.ch, text.offset, text.length);
+        }
+        else {
+            super.characters(text, augs);
+        }
+    } // characters(XMLString,Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs) throws XNIException {
+        if (fBuffer != null) {
+            try {
+                // run "script" and generate HTML output
+                BufferedReader in = new BufferedReader(new StringReader(fBuffer.toString()));
+                StringWriter sout = new StringWriter();
+                PrintWriter out = new PrintWriter(sout);
+                String line;
+                while ((line = in.readLine()) != null) {
+                    line.trim();
+                    if (line.length() == 0) {
+                        continue;
+                    }
+                    switch (line.charAt(0)) {
+                        case '(': {
+                            out.print('<');
+                            out.print(line.substring(1));
+                            out.print('>');
+                            break;
+                        }
+                        case '"': {
+                            out.print(line.substring(1));
+                            break;
+                        }
+                        case ')': {
+                            out.print("</");
+                            out.print(line.substring(1));
+                            out.print('>');
+                            break;
+                        }
+                    }
+                }
+
+                // push new input source
+                String systemId = fSystemId != null ? fSystemId+'_' : "";
+                fScriptCount++;
+                systemId += "script"+fScriptCount;
+                XMLInputSource source = new XMLInputSource(null, systemId, null,
+                                                           new StringReader(sout.toString()),
+                                                           "UTF-8");
+                fConfiguration.pushInputSource(source);
+            }
+            catch (IOException e) {
+                // ignore
+            }
+            finally {
+                fBuffer = null;
+            }
+        }
+        else {
+            super.endElement(element, augs);
+        }
+    } // endElement(QName,Augmentations)
+
+    //
+    // MAIN
+    //
+
+    /** Main. */
+    public static void main(String[] argv) throws Exception {
+        HTMLConfiguration parser = new HTMLConfiguration();
+        parser.setFeature(AUGMENTATIONS, true);
+        XMLDocumentFilter[] filters = { new Script(parser), new Identity(), new Writer() };
+        parser.setProperty(FILTERS, filters);
+        for (int i = 0; i < argv.length; i++) {
+            parser.parse(new XMLInputSource(null, argv[i], null));
+        }
+    } // main(String[])
+
+} // class Script

Added: branches/nekohtml/upstream/0.9.5/src/html/sample/TestHTMLDOM.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/sample/TestHTMLDOM.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/sample/TestHTMLDOM.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,52 @@
+/* 
+ * (C) Copyright 2002-2004, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package sample;
+
+import org.cyberneko.html.parsers.DOMParser;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+
+/**
+ * This program tests the NekoHTML parser's use of the HTML DOM
+ * implementation by printing the class names of all the nodes in
+ * the parsed document.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: TestHTMLDOM.java,v 1.3 2004/02/19 20:00:17 andyc Exp $
+ */
+public class TestHTMLDOM {
+
+    //
+    // MAIN
+    //
+
+    /** Main. */
+    public static void main(String[] argv) throws Exception {
+        DOMParser parser = new DOMParser();
+        for (int i = 0; i < argv.length; i++) {
+            parser.parse(argv[i]);
+            print(parser.getDocument(), "");
+        }
+    } // main(String[])
+
+    //
+    // Public static methods
+    //
+
+    /** Prints a node's class name. */
+    public static void print(Node node, String indent) {
+        System.out.println(indent+node.getClass().getName());
+        Node child = node.getFirstChild();
+        while (child != null) {
+            print(child, indent+" ");
+            child = child.getNextSibling();
+        }
+    } // print(Node)
+
+} // class TestHTMLDOM

Added: branches/nekohtml/upstream/0.9.5/src/html/sample/TestHTMLDOMFragment.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/sample/TestHTMLDOMFragment.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/sample/TestHTMLDOMFragment.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,57 @@
+/* 
+ * (C) Copyright 2002-2004, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+
+package sample;
+
+import org.cyberneko.html.parsers.DOMFragmentParser;
+import org.apache.html.dom.HTMLDocumentImpl;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Node;
+import org.w3c.dom.html.HTMLDocument;
+
+/**
+ * This program tests the NekoHTML parser's use of the HTML DOM
+ * implementation to parse document fragments by printing the
+ * class names of all the nodes in the parsed document.
+ *
+ * @author Andy Clark
+ *
+ * @version $Id: TestHTMLDOMFragment.java,v 1.3 2004/02/19 20:00:17 andyc Exp $
+ */
+public class TestHTMLDOMFragment {
+
+    //
+    // MAIN
+    //
+
+    /** Main. */
+    public static void main(String[] argv) throws Exception {
+        DOMFragmentParser parser = new DOMFragmentParser();
+        HTMLDocument document = new HTMLDocumentImpl();
+        for (int i = 0; i < argv.length; i++) {
+            DocumentFragment fragment = document.createDocumentFragment();
+            parser.parse(argv[i], fragment);
+            print(fragment, "");
+        }
+    } // main(String[])
+
+    //
+    // Public static methods
+    //
+
+    /** Prints a node's class name. */
+    public static void print(Node node, String indent) {
+        System.out.println(indent+node.getClass().getName());
+        Node child = node.getFirstChild();
+        while (child != null) {
+            print(child, indent+" ");
+            child = child.getNextSibling();
+        }
+    } // print(Node)
+
+} // class TestHTMLDOMFragment
\ No newline at end of file

Added: branches/nekohtml/upstream/0.9.5/src/html/test/Tester.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/test/Tester.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/test/Tester.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,256 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+ 
+package test;
+
+import org.cyberneko.html.HTMLConfiguration;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.tools.ant.BuildException;
+import org.apache.tools.ant.DirectoryScanner;
+import org.apache.tools.ant.Project;
+import org.apache.tools.ant.Task;
+import org.apache.tools.ant.types.FileSet;
+
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.apache.xerces.xni.parser.XMLParserConfiguration;
+
+/**
+ * A simple regression tester written as an Ant task. This task
+ * generates canonical output using the <code>Writer</code> class
+ * and compares it against the expected canonical output. Simple
+ * as that.
+ *
+ * @author Andy Clark
+ */
+public class Tester
+    extends Task {
+
+    //
+    // Data
+    //
+
+    /** Canonical test directory. */
+    protected String fCanonicalDir;
+
+    /** Output directory for generated files. */
+    protected String fOutputDir;
+
+    /** List of test filesets. */
+    protected Vector fFileSets = new Vector();
+
+    //
+    // Public methods
+    //
+
+    /** Sets the canonical test directory. */
+    public void setCanonDir(String canondir) {
+        fCanonicalDir = canondir;
+    } // setCanonDir(String)
+
+    /** Sets the output directory for generated files. */
+    public void setOutputDir(String outdir) {
+        fOutputDir = outdir;
+    } // setOutputDir(String)
+
+    /** Adds a fileset to the list of test filesets. */
+    public void addFileSet(FileSet fileset) {
+        fFileSets.addElement(fileset);
+    } // addFileSet(FileSet)
+
+    //
+    // Task methods
+    //
+
+    /** Performs the test. */
+    public void execute() throws BuildException {
+
+        // check params
+        String canonicaldir = fCanonicalDir;
+        if (canonicaldir == null) {
+            canonicaldir = ".";
+            log("Canonical directory not specified. Assuming current directory.",
+                Project.MSG_WARN);
+        }
+        String outputdir = fOutputDir;
+        if (outputdir == null) {
+            outputdir = ".";
+            log("Output directory not specified. Assuming current directory.",
+                Project.MSG_WARN);
+        }
+        if (fFileSets.size() == 0) {
+            throw new BuildException("must specify at least one fileset");
+        }
+
+        // parse input files and produce output files
+        log("Parsing test files and generating output...");
+        File outdir = new File(outputdir);
+        int size = fFileSets.size();
+        for (int i = 0; i < size; i++) {
+            FileSet fileset = (FileSet)fFileSets.elementAt(i);
+            DirectoryScanner dirscanner = fileset.getDirectoryScanner(project);
+            File indir = dirscanner.getBasedir();
+            String[] files = dirscanner.getIncludedFiles();
+            for (int j = 0; j < files.length; j++) {
+                File infile = new File(indir, files[j]);
+                File outfile = new File(outdir, files[j]);
+                log("  "+outfile, Project.MSG_VERBOSE);
+                OutputStream out = null;
+                try {
+                    // create filters
+                    out = new FileOutputStream(outfile);
+                    XMLDocumentFilter[] filters = { new Writer(out) };
+                    
+                    // create parser
+                    XMLParserConfiguration parser = new HTMLConfiguration();
+
+                    // parser settings
+                    parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
+                    String infilename = infile.toString();
+                    File insettings = new File(infilename+".settings");
+                    if (insettings.exists()) {
+                        BufferedReader settings = new BufferedReader(new FileReader(insettings));
+                        String settingline;
+                        while ((settingline = settings.readLine()) != null) {
+                            StringTokenizer tokenizer = new StringTokenizer(settingline);
+                            String type = tokenizer.nextToken();
+                            String id = tokenizer.nextToken();
+                            String value = tokenizer.nextToken();
+                            if (type.equals("feature")) {
+                                parser.setFeature(id, value.equals("true"));
+                            }
+                            else {
+                                parser.setProperty(id, value);
+                            }
+                        }
+                        settings.close();
+                    }
+
+                    // parse
+                    parser.parse(new XMLInputSource(null, infilename, null));
+                }
+                catch (Exception e) {
+                    log("  error parsing input file, "+infile);
+                    throw new BuildException(e);
+                }
+                finally {
+                    try {
+                        out.close();
+                    }
+                    catch (Exception e) {
+                        log("  error closing output file, "+outfile);
+                        throw new BuildException(e);
+                    }
+                }
+            }
+        }
+
+        // compare against canonical output
+        log("Comparing parsed output against canonical output...");
+        File canondir = new File(canonicaldir);
+        int errors = 0;
+        for (int i = 0; i < size; i++) {
+            FileSet fileset = (FileSet)fFileSets.elementAt(i);
+            DirectoryScanner dirscanner = fileset.getDirectoryScanner(project);
+            File indir = dirscanner.getBasedir();
+            String[] files = dirscanner.getIncludedFiles();
+            for (int j = 0; j < files.length; j++) {
+                File canonfile = new File(canondir, files[j]);
+                if (!canonfile.exists()) {
+                    errors++;
+                    log("  canonical file missing, "+canonfile);
+                    continue;
+                }
+                File outfile = new File(outdir, files[j]);
+                if (!outfile.exists()) {
+                    errors++;
+                    log("  output file missing, "+outfile);
+                    continue;
+                }
+                log("  comparing "+canonfile+" and "+outfile, Project.MSG_VERBOSE);
+                try {
+                    if (compare(canonfile, outfile)) {
+                        errors++;
+                    }
+                }
+                catch (IOException e) {
+                    errors++;
+                    log("i/o error");  
+                }
+            }
+        }
+
+        // finished
+        if (errors > 0) {
+            log("Finished with errors.");
+            throw new BuildException();
+        }
+        log("Done.");
+
+    } // execute()
+
+    //
+    // Protected methods
+    //
+
+    /** Compares two files. */
+    protected boolean compare(File f1, File f2) throws IOException {
+        BufferedReader i1 = new BufferedReader(new InputStreamReader(new UTF8BOMSkipper(new FileInputStream(f1)), "UTF8"));
+        BufferedReader i2 = new BufferedReader(new InputStreamReader(new FileInputStream(f2), "UTF8"));
+        String l1;
+        String l2;
+        int errors = 0;
+        long n = 0;
+        while ((l1 = i1.readLine()) != null) {
+            n++;
+            if ((l2 = i2.readLine()) == null) {
+                errors++;
+                log("  file lengths don't match ("+f1+")");
+                break;
+            }
+            if (compare(f1.getName(), n, l1, l2)) {
+                errors++;
+                break;
+            }
+        }
+        if (errors == 0 && (l2 = i2.readLine()) != null) {
+            errors++;
+            log("  file lengths don't match ("+f1+")");
+        }
+        i1.close();
+        i2.close();
+        return errors > 0;
+    } // compare(File,File):boolean
+
+    /** Compares two strings. */
+    protected boolean compare(String f, long n, String s1, String s2) {
+        int l1 = s1.length();
+        int l2 = s2.length();
+        boolean error = false;
+        if (l1 < l2) {
+            error = true;
+            log("  "+f+':'+n+" output string too long");
+        }
+        else if (l1 > l2) {
+            error = true;
+            log("  "+f+':'+n+" output string too short");
+        }
+        else if (!s1.equals(s2)) {
+            error = true;
+            log("  "+f+':'+n+" strings don't match");
+        }
+        if (error) {
+            log("    [in:  "+s1+']');
+            log("    [out: "+s2+']');
+        }
+        return error;
+    } // compare(String,long,String,String):boolean
+
+} // class Tester

Added: branches/nekohtml/upstream/0.9.5/src/html/test/UTF8BOMSkipper.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/test/UTF8BOMSkipper.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/test/UTF8BOMSkipper.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,111 @@
+/* 
+ * (C) Copyright 2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+ 
+package test;
+
+import java.io.*;
+
+/**
+ * This class is an input stream filter that skips the first
+ * three bytes read if they match the UTF-8 byte order mark,
+ * 0xEFBBBF. The UTF-8 BOM is most often generated by Windows&reg;
+ * tools.
+ * 
+ * @author Andy Clark
+ */
+public class UTF8BOMSkipper 
+    extends FilterInputStream {
+
+    //
+    // Data
+    //
+
+    /** Start of reading. */
+    private boolean fStart = true;
+
+    /** Byte offset. */
+    private int fOffset;
+
+    /** First three bytes. */
+    private int[] fFirst3Bytes;
+
+    //
+    // Constructors
+    //
+
+    /** Constructs a UTF-8 BOM skipper. */
+    public UTF8BOMSkipper(InputStream stream) {
+        super(stream);
+    } // <init>(InputStream)
+
+    //
+    // InputStream methods
+    //
+
+    /** Returns the next byte. */
+    public int read() throws IOException {
+
+        // read first three bytes in order to skip UTF-8 BOM, if present
+        if (fStart) {
+            fStart = false;
+            int b1 = super.read();
+            int b2 = super.read();
+            int b3 = super.read();
+            if (b1 != 0xEF || b2 != 0xBB || b3 != 0xBF) {
+                fFirst3Bytes = new int[3];
+                fFirst3Bytes[0] = b1;
+                fFirst3Bytes[1] = b2;
+                fFirst3Bytes[2] = b3;
+            }
+        }
+
+        // return read bytes
+        if (fFirst3Bytes != null) {
+            int b = fFirst3Bytes[fOffset++];
+            if (fOffset == fFirst3Bytes.length) {
+                fFirst3Bytes = null;
+            }
+            return b;
+        }
+
+        // return next char
+        return super.read();
+
+    } // read():int
+
+    /** Reads bytes into specified buffer and returns total bytes read. */
+    public int read(byte[] buffer, int offset, int length) throws IOException {
+
+        if (fStart || fFirst3Bytes != null) {
+            for (int i = 0; i < length; i++) {
+                int b = this.read();
+                if (b == -1) {
+                    return i > 0 ? i : -1;
+                }
+                buffer[offset + i] = (byte)b;
+            }
+            return length;
+        }
+
+        return super.read(buffer, offset, length);
+
+    } // read(byte[],int,int):int
+
+    /** Mark is not supported for this input stream. */
+    public boolean markSupported() {
+        return false;
+    } // markSupported():boolean
+
+    /** Returns the number of bytes available. */
+    public int available() throws IOException {
+        if (fFirst3Bytes != null) {
+            return fFirst3Bytes.length - fOffset;
+        }
+        return super.available();
+    } // available():int
+
+} // class UTF8BOMSkipper

Added: branches/nekohtml/upstream/0.9.5/src/html/test/Writer.java
===================================================================
--- branches/nekohtml/upstream/0.9.5/src/html/test/Writer.java	2006-08-18 12:36:39 UTC (rev 2344)
+++ branches/nekohtml/upstream/0.9.5/src/html/test/Writer.java	2006-08-22 16:00:27 UTC (rev 2345)
@@ -0,0 +1,310 @@
+/* 
+ * (C) Copyright 2002-2005, Andy Clark.  All rights reserved.
+ *
+ * This file is distributed under an Apache style license. Please
+ * refer to the LICENSE file for specific details.
+ */
+ 
+package test;
+
+import org.cyberneko.html.filters.DefaultFilter;
+
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.xerces.util.XMLStringBuffer;
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
+
+/**
+ * This class implements an filter to output "canonical" files for
+ * regression testing.
+ *
+ * @author Andy Clark
+ */
+public class Writer
+    extends DefaultFilter {
+
+    //
+    // Data
+    //
+
+    /** Writer. */
+    protected PrintWriter out = new PrintWriter(System.out);
+
+    // temp vars
+
+    /** String buffer for collecting text content. */
+    private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
+
+    //
+    // Constructors
+    //
+
+    /** 
+     * Creates a writer to the standard output stream using UTF-8 
+     * encoding. 
+     */
+    public Writer() {
+        this(System.out);
+    } // <init>()
+
+    /** 
+     * Creates a writer with the specified output stream using UTF-8 
+     * encoding. 
+     */
+    public Writer(OutputStream stream) {
+        this(stream, "UTF8");
+    } // <init>(OutputStream)
+
+    /** Creates a writer with the specified output stream and encoding. */
+    public Writer(OutputStream stream, String encoding) {
+        try {
+            out = new PrintWriter(new OutputStreamWriter(stream, encoding), true);
+        }
+        catch (UnsupportedEncodingException e) {
+            throw new RuntimeException("JVM must have "+encoding+" decoder");
+        }
+    } // <init>(OutputStream,String)
+
+    /** Creates a writer with the specified Java Writer. */
+    public Writer(java.io.Writer writer) {
+        out = new PrintWriter(writer);
+    } // <init>(java.io.Writer)
+
+    //
+    // XMLDocumentHandler methods
+    //
+
+    // since Xerces-J 2.2.0
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, 
+                              NamespaceContext nscontext, Augmentations augs) throws XNIException {
+        fStringBuffer.clear();
+    } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
+
+    // old methods
+
+    /** Start document. */
+    public void startDocument(XMLLocator locator, String encoding, Augmentations augs) throws XNIException {
+        startDocument(locator, encoding, null, augs);
+    } // startDocument(XMLLocator,String,Augmentations)
+
+    /** XML declaration. */
+    public void xmlDecl(String version, String encoding, String standalone,
+                        Augmentations augs) throws XNIException {
+        if (version!=null) {
+            out.print("xversion ");
+            out.println(version);
+        }
+        if (encoding!=null) {
+            out.print("xencoding ");
+            out.println(encoding);
+        }
+        if (standalone!=null) {
+            out.print("xstandalone ");
+            out.println(standalone);
+        }
+        out.flush();
+    } // xmlDecl(String,String,String,Augmentations)
+
+    /** Doctype declaration. */
+    public void doctypeDecl(String root, String pubid, String sysid, Augmentations augs) throws XNIException {
+        chars();
+        out.print('!');
+        if (root != null) {
+            out.print(root);
+        }
+        out.println();
+        if (pubid != null) {
+            out.print('p');
+            out.print(pubid);
+            out.println();
+        }
+        if (sysid != null) {
+            out.print('s');
+            out.print(sysid);
+            out.println();
+        }
+        out.flush();
+    } // doctypeDecl(String,String,String,Augmentations)
+
+    /** Processing instruction. */
+    public void processingInstruction(String target, XMLString data, Augmentations augs) throws XNIException {
+        chars();
+        out.print('?');
+        out.print(target);
+        if (data != null && data.length > 0) {
+            out.print(' ');
+            print(data.toString());
+        }
+        out.println();
+        out.flush();
+    } // processingInstruction(String,XMLString,Augmentations)
+
+    /** Comment. */
+    public void comment(XMLString text, Augmentations augs) throws XNIException {
+        chars();
+        out.print('#');
+        print(text.toString());
+        out.println();
+        out.flush();
+    } // comment(XMLString,Augmentations)
+
+    /** Start element. */
+    public void startElement(QName element, XMLAttributes attrs, Augmentations augs) throws XNIException {
+        chars();
+        out.print('(');
+        out.print(element.rawname);
+        int acount = attrs != null ? attrs.getLength() : 0;
+        if (acount > 0) {
+            String[] anames = new String[acount];
+            String[] auris = new String[acount];
+            sortAttrNames(attrs, anames, auris);
+            for (int i = 0; i < acount; i++) {
+                String aname = anames[i];
+                out.println();
+                out.flush();
+                out.print('A');
+                if (auris[i] != null) {
+                    out.print('{');
+                    out.print(auris[i]);
+                    out.print('}');
+                }
+                out.print(aname);
+                out.print(' ');
+                print(attrs.getValue(aname));
+            }
+        }
+        out.println();
+        out.flush();
+    } // startElement(QName,XMLAttributes,Augmentations)
+
+    /** End element. */
+    public void endElement(QName element, Augmentations augs) throws XNIException {
+        chars();
+        out.print(')');
+        out.print(element.rawname);
+        out.println();
+        out.flush();
+    } // endElement(QName,Augmentations)
+
+    /** Empty element. */
+    public void emptyElement(QName element, XMLAttributes attrs, Augmentations augs) throws XNIException {
+        startElement(element, attrs, augs);
+        endElement(element, augs);
+    } // emptyElement(QName,XMLAttributes,Augmentations)
+
+    /** Characters. */
+    public void characters(XMLString text, Augmentations augs) throws XNIException {
+        fStringBuffer.append(text);
+    } // characters(XMLString,Augmentations)
+
+    /** Ignorable whitespace. */
+    public void ignorableWhitespace(XMLString text, Augmentations augs) throws XNIException {
+        characters(text, augs);
+    } // ignorableWhitespace(XMLString,Augmentations)
+
+    //
+    // Protected methods
+    //
+
+    /** Prints collected characters. */
+    protected void chars() {
+        if (fStringBuffer.length == 0) {
+            return;
+        }
+        out.print('"');
+        print(fStringBuffer.toString());
+        out.println();
+        out.flush();
+        fStringBuffer.clear();
+    } // chars()
+
+    /** Prints the specified string. */
+    protected void print(String s) {
+        int length = s != null ? s.length() : 0;
+        for (int i = 0; i < length; i++) {
+            char c = s.charAt(i);
+            switch (c) {
+                case '\n': {
+                    out.print("\\n");
+                    break;
+                }
+                case '\r': {
+                    out.print("\\r");
+                    break;
+                }
+                case '\t': {
+                    out.print("\\t");
+                    break;
+                }
+                case '\\': {
+                    out.print("\\\\");
+                    break;
+                }
+                default: {
+                    out.print(c);
+                }
+            }
+        }
+    } // print(String)
+
+    //
+    // Protected static methods
+    //
+
+    /** Sorts the attribute names. */
+    protected static void sortAttrNames(XMLAttributes attrs, 
+                                        String[] anames, String[] auris) {
+        for (int i = 0; i < anames.length; i++) {
+            anames[i] = attrs.getQName(i);
+            auris[i] = attrs.getURI(i);
+        }
+        // NOTE: This is super inefficient but it doesn't really matter. -Ac
+        for (int i = 0; i < anames.length - 1; i++) {
+            int index = i;
+            for (int j = i + 1; j < anames.length; j++) {
+                if (anames[j].compareTo(anames[index]) < 0) {
+                    index = j;
+                }
+            }
+            if (index != i) {
+                String tn = anames[i];
+                anames[i] = anames[index];
+                anames[index] = tn;
+                String tu = auris[i];
+                auris[i] = auris[index];
+                auris[index] = tu;
+            }
+        }
+    } // sortAttrNames(XMLAttributes,String[])
+
+    //
+    // MAIN
+    //
+
+    /** Main program. */
+    public static void main(String[] argv) throws Exception {
+        org.apache.xerces.xni.parser.XMLDocumentFilter[] filters = {
+            new Writer(),
+        };
+        org.apache.xerces.xni.parser.XMLParserConfiguration parser =
+            new org.cyberneko.html.HTMLConfiguration();
+        parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
+        for (int i = 0; i < argv.length; i++) {
+            org.apache.xerces.xni.parser.XMLInputSource source =
+                new org.apache.xerces.xni.parser.XMLInputSource(null, argv[i], null);
+            parser.parse(source);
+        }
+    } // main(String[])
+
+} // class Writer




More information about the pkg-java-commits mailing list