Bug#565992: libwebkit-1.0-2: invalid element name error for http XHTML pages declared as iso-8859-1
Vincent Lefevre
vincent at vinc17.net
Mon Aug 30 14:55:58 UTC 2010
tags 565992 fixed-upstream patch
thanks
On 2010-08-30 16:18:09 +0200, Vincent Lefevre wrote:
> Still occurs, but
>
> http://trac.webkit.org/changeset/66336/
>
> should fix the problem. This patch (for the trunk) doesn't apply
> cleanly to webkit 1.2.3 (still in Debian), but I've modified it
> slightly and I'm rebuilding the webkit packages. If evrything is
> OK, I'll post it.
The attached patch, derived from
http://trac.webkit.org/changeset/66336/
fixes the bug.
--
Vincent Lefèvre <vincent at vinc17.net> - Web: <http://www.vinc17.net/>
100% accessible validated (X)HTML - Blog: <http://www.vinc17.net/blog/>
Work: CR INRIA - computer arithmetic / Arénaire project (LIP, ENS-Lyon)
-------------- next part --------------
--- a/WebCore/dom/XMLTokenizerLibxml2.cpp
+++ b/WebCore/dom/XMLTokenizerLibxml2.cpp
@@ -363,6 +363,17 @@
unsigned m_currentOffset;
};
+static void switchToUTF16(xmlParserCtxtPtr ctxt)
+{
+ // Hack around libxml2's lack of encoding overide support by manually
+ // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
+ // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
+ // and switch encodings, causing the parse to fail.
+ const UChar BOM = 0xFEFF;
+ const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
+ xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
+}
+
static bool shouldAllowExternalLoad(const KURL& url)
{
String urlString = url.string();
@@ -478,9 +489,7 @@
xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
parser->_private = userData;
parser->replaceEntities = true;
- const UChar BOM = 0xFEFF;
- const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
- xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
+ switchToUTF16(parser);
return adoptRef(new XMLParserContext(parser));
}
@@ -637,14 +646,7 @@
// libXML throws an error if you try to switch the encoding for an empty string.
if (parseString.length()) {
- // Hack around libxml2's lack of encoding overide support by manually
- // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
- // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
- // and switch encodings, causing the parse to fail.
- const UChar BOM = 0xFEFF;
- const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
- xmlSwitchEncoding(context->context(), BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
-
+ switchToUTF16(context->context());
XMLTokenizerScope scope(m_doc->docLoader());
xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
}
@@ -1219,6 +1221,7 @@
static void startDocumentHandler(void* closure)
{
xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
+ switchToUTF16(ctxt);
getTokenizer(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
xmlSAX2StartDocument(closure);
}
More information about the Pkg-webkit-maintainers
mailing list