[Git][java-team/nekohtml][upstream] Import Upstream version 1.9.22.noko2

Emmanuel Bourg (@ebourg) gitlab at salsa.debian.org
Fri Sep 29 01:23:05 BST 2023



Emmanuel Bourg pushed to branch upstream at Debian Java Maintainers / nekohtml


Commits:
0c2a35f8 by Emmanuel Bourg at 2023-09-29T02:20:53+02:00
Import Upstream version 1.9.22.noko2
- - - - -


9 changed files:

- build.xml
- data/meta/test-meta-encoding3.html.canonical
- pom.xml
- src/org/cyberneko/html/HTMLScanner.java
- src/org/cyberneko/html/HTMLTagBalancingListener.java
- src/org/cyberneko/html/LostText.java
- test/java/org/cyberneko/html/HTMLScannerTest.java
- test/java/org/cyberneko/html/HTMLTagBalancingListenerTest.java
- test/java/org/cyberneko/html/LocatorEncodingTest.java


Changes:

=====================================
build.xml
=====================================
@@ -4,7 +4,7 @@
  <!-- PROPERTIES -->
  <property file='build-custom.properties' />
  
- <property name='version'   value='1.9.22'/>
+ <property name='version'   value='1.9.22.noko2'/>
  <property name='name'      value='nekohtml'/>
  <property name='fullname'  value='${name}-${version}'/>
  <property name='Title'     value='NekoHTML'/>
@@ -14,8 +14,8 @@
  <property name='copyright' value='(C) Copyright 2002-2014, ${author}. All rights reserved.'/>
  <property name='URL'       value='http://nekohtml.sourceforge.net/index.html'/>
  
- <property name='compile.source' value='1.3' />
- <property name='compile.target' value='1.3' />
+ <property name='compile.source' value='1.7' />
+ <property name='compile.target' value='1.7' />
   
  <property name='src.dir'           value='src' />
  <property name='src.test.dir'      value='test/java' />
@@ -51,7 +51,7 @@
  <!-- DEPENDENCIES -->
  <available property='bcel.available' file='${lib.dir}/bcel-5.2.jar' />
 
- <property name="xerces.version" value="2.9.1"/>
+ <property name="xerces.version" value="2.12.2"/>
 
  <!-- TARGETS --> 
  <target name='all' depends='zip,tgz'/>
@@ -103,13 +103,7 @@
 		unless="compile.skip">
 	<mkdir dir="${build.classes.dir}"/>
 	
-	<compileWith xercesVersion="2.0.2" bridge="2_0"/>
-	<compileWith xercesVersion="2.1.0" bridge="2_1"/>
-	<compileWith xercesVersion="2.2.1" bridge="2_2"/>
-	<compileWith xercesVersion="2.3.0" bridge="2_3"/>
-	<compileWith xercesVersion="2.8.1" bridge="2_3"/>
-	<compileWith xercesVersion="2.9.1" bridge="2_3"/>
-	<compileWith xercesVersion="2.10.0" bridge="2_3"/>
+	<compileWith xercesVersion="${xerces.version}" bridge="2_3"/>
 
 	<copy todir="${build.classes.dir}">
 		<fileset dir='${src.dir}' includes='**' excludes='META-INF/**, **/*.java*' />
@@ -178,8 +172,11 @@
            author='true' version='true' use='true'
            windowtitle="${Name} Implementation"
            doctitle="${Name}" encoding="ISO-8859-1"
-           bottom="${copyright}" 
-  />
+           bottom="${copyright}">
+			<classpath>
+				<fileset dir='${lib.dir}/xerces-${xerces.version}' includes='*.jar' />
+			</classpath>
+  </javadoc>
  </target>
  
  <target name='doc-init'>
@@ -268,7 +265,8 @@ public class Version {
 	  		 source='${compile.source}' target='${compile.target}'
 	  		 includeAntRuntime='true'>
 	  	<classpath>
-	  		<fileset dir='${lib.dir}' includes='xml-apis.jar,xerces*.jar, junit*.jar' />
+				<fileset dir='${lib.dir}' includes='junit*.jar' />
+				<fileset dir='${lib.dir}/xerces-${xerces.version}' includes='xml-apis*.jar,xerces*.jar' />
 		    <pathelement location='${build.classes.dir}' />
 	  	</classpath>
 	    <src path='${src.test.dir}' />
@@ -308,17 +306,7 @@ public class Version {
 		<delete dir="${build.dir}/junit"/>
 		<mkdir dir="${build.dir}/junit"/>
 
-		<testWith xercesVersion="2.11.0"/>
-		<testWith xercesVersion="2.10.0"/>
-		<testWith xercesVersion="2.9.1"/>
-		<testWith xercesVersion="2.8.1"/>
-		<testWith xercesVersion="2.3.0"/>
-		<testWith xercesVersion="2.2.1"/>
-		<testWith xercesVersion="minimal"/>
-		<!--
-		<testWith xercesVersion="2.1.0"/>
-		<testWith xercesVersion="2.0.2"/>
-		-->
+		<testWith xercesVersion="${xerces.version}"/>
 
 		<junitreport todir="${build.dir}/junit">
 			<fileset dir="${build.dir}/junit">


=====================================
data/meta/test-meta-encoding3.html.canonical
=====================================
@@ -8,6 +8,6 @@ Ahttp-equiv Content-Type
 "\n
 )HEAD
 (BODY
-"\n宋体\n\n新\n\n細明體\n\n宋体\n\n浠茇忘�\n\n?\n\n
+"\n宋体\n\n新\n\n細明體\n\n宋体\n\n浠茇忘�\n\n?\n
 )BODY
 )HTML


=====================================
pom.xml
=====================================
@@ -4,7 +4,7 @@
   <artifactId>nekohtml</artifactId>
   <name>Neko HTML</name>
   <description>An HTML parser and tag balancer.</description>
-  <version>1.9.22</version>
+  <version>1.9.22.noko2</version>
   <url>http://nekohtml.sourceforge.net/</url>
   <licenses>
     <license>
@@ -20,7 +20,7 @@
     <dependency>
       <groupId>xerces</groupId>
       <artifactId>xercesImpl</artifactId>
-      <version>2.11.0</version>
+      <version>2.12.2</version>
     </dependency>
   </dependencies>
     <developers>


=====================================
src/org/cyberneko/html/HTMLScanner.java
=====================================
@@ -2588,7 +2588,7 @@ public class HTMLScanner
                     if (c == '?' || c == '/') {
                         char c0 = (char)c;
                         c = fCurrentEntity.read();
-                        if (c == '>') {
+                        if (c == '>' || c == -1) {
                             break;
                         }
                         fStringBuffer.append(c0);
@@ -2888,18 +2888,18 @@ public class HTMLScanner
             else if (c == '>') {
                 return false;
             }
-            else if (c == '<') {
-            	fCurrentEntity.rewind();
-            	return false;
+            else if(c == '<') {
+              fCurrentEntity.rewind();
+              if(fReportErrors) {
+                fErrorReporter.reportError("HTML2001", null);
+              }
+              return false;
             }
             fCurrentEntity.rewind();
             String aname = scanName(false);
-            if (aname == null) {
-                if (fReportErrors) {
-                    fErrorReporter.reportError("HTML1011", null);
-                }
-                empty[0] = skipMarkup(false);
-                return false;
+            if(aname == null) {
+              empty[0] = skipMarkup(false);
+              return false;
             }
             if (!skippedSpaces && fReportErrors) {
                 fErrorReporter.reportError("HTML1013", new Object[] { aname });


=====================================
src/org/cyberneko/html/HTMLTagBalancingListener.java
=====================================
@@ -28,7 +28,7 @@ import org.apache.xerces.xni.XMLAttributes;
  * <li>activate the tag balancing feature</li>
  * </ul>
  * @author Marc Guillemot
- * @version $Id: HTMLTagBalancingListener.java 260 2009-09-02 08:26:01Z mguillem $
+ * @version $Id$
  */
 public interface HTMLTagBalancingListener 
 {


=====================================
src/org/cyberneko/html/LostText.java
=====================================
@@ -28,7 +28,7 @@ import org.apache.xerces.xni.XMLString;
  * in <body>
  * @author Marc Guillemot
  *
- * @version $Id: LostText.java 226 2009-02-09 20:48:44Z mguillem $
+ * @version $Id$
  */
 class LostText
 {


=====================================
test/java/org/cyberneko/html/HTMLScannerTest.java
=====================================
@@ -16,6 +16,7 @@ import org.apache.xerces.xni.XMLAttributes;
 import org.apache.xerces.xni.XNIException;
 import org.apache.xerces.xni.parser.XMLDocumentFilter;
 import org.apache.xerces.xni.parser.XMLInputSource;
+import org.apache.xerces.xni.parser.XMLParseException;
 import org.apache.xerces.xni.parser.XMLParserConfiguration;
 import org.cyberneko.html.filters.DefaultFilter;
 
@@ -40,6 +41,27 @@ public class HTMLScannerTest extends TestCase {
 		assertFalse(scanner.isEncodingCompatible("UTF-16","Cp1252"));
 	}
 
+  public void testInvalidInput() throws Exception {
+    HTMLConfiguration parser = new HTMLConfiguration();
+    parser.setFeature("http://cyberneko.org/html/features/report-errors", true);
+    final ArrayList errors = new ArrayList();
+    parser.setProperty("http://cyberneko.org/html/properties/error-reporter", new HTMLErrorReporter() {
+      public String formatMessage(String key, Object[] args) {
+          return null;
+      }
+
+      public void reportWarning(String key, Object[] args) throws XMLParseException {
+      }
+
+      public void reportError(String key, Object[] args) throws XMLParseException {
+          errors.add(key);
+      }
+    });
+    XMLInputSource source = new XMLInputSource(null, "myTest", null, new StringReader("<div </div>"), "UTF-8");
+    parser.parse(source);
+    assertEquals("Expected to receive an error", 1, errors.size());
+  }
+
 	public void testEvaluateInputSource() throws Exception {
 	    String string = "<html><head><title>foo</title></head>"
 	        + "<body>"


=====================================
test/java/org/cyberneko/html/HTMLTagBalancingListenerTest.java
=====================================
@@ -17,7 +17,7 @@ import org.apache.xerces.xni.parser.XMLInputSource;
 /**
  * Unit tests for {@link HTMLTagBalancingListener}.
  * @author Marc Guillemot
- * @version $Id: HTMLTagBalancingListenerTest.java 320 2013-01-29 07:46:36Z mguillem $
+ * @version $Id$
  */
 public class HTMLTagBalancingListenerTest extends TestCase {
 


=====================================
test/java/org/cyberneko/html/LocatorEncodingTest.java
=====================================
@@ -18,7 +18,7 @@ import org.xml.sax.ext.Locator2;
 /**
  * Regression test for <a href="http://sourceforge.net/tracker/?func=detail&atid=952178&aid=3381270&group_id=195122">Bug 3381270</a>.
  * @author Marc Guillemot
- * @version $Revision: 291 $
+ * @version $Revision$
  */
 public class LocatorEncodingTest extends TestCase  {
 



View it on GitLab: https://salsa.debian.org/java-team/nekohtml/-/commit/0c2a35f8a0025c92eb15c5a760cc29f91379c85f

-- 
View it on GitLab: https://salsa.debian.org/java-team/nekohtml/-/commit/0c2a35f8a0025c92eb15c5a760cc29f91379c85f
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-java-commits/attachments/20230929/a349be6e/attachment.htm>


More information about the pkg-java-commits mailing list