Bug#565992: libwebkit-1.0-2: invalid element name error for http XHTML pages declared as iso-8859-1

Vincent Lefevre vincent at vinc17.net
Mon Aug 30 14:55:58 UTC 2010


tags 565992 fixed-upstream patch
thanks

On 2010-08-30 16:18:09 +0200, Vincent Lefevre wrote:
> Still occurs, but
> 
>   http://trac.webkit.org/changeset/66336/
> 
> should fix the problem. This patch (for the trunk) doesn't apply
> cleanly to webkit 1.2.3 (still in Debian), but I've modified it
> slightly and I'm rebuilding the webkit packages. If evrything is
> OK, I'll post it.

The attached patch, derived from

  http://trac.webkit.org/changeset/66336/

fixes the bug.

-- 
Vincent Lefèvre <vincent at vinc17.net> - Web: <http://www.vinc17.net/>
100% accessible validated (X)HTML - Blog: <http://www.vinc17.net/blog/>
Work: CR INRIA - computer arithmetic / Arénaire project (LIP, ENS-Lyon)
-------------- next part --------------
--- a/WebCore/dom/XMLTokenizerLibxml2.cpp
+++ b/WebCore/dom/XMLTokenizerLibxml2.cpp
@@ -363,6 +363,17 @@
     unsigned m_currentOffset;
 };
 
+static void switchToUTF16(xmlParserCtxtPtr ctxt)
+{
+    // Hack around libxml2's lack of encoding overide support by manually
+    // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
+    // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
+    // and switch encodings, causing the parse to fail.
+    const UChar BOM = 0xFEFF;
+    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
+    xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
+}
+
 static bool shouldAllowExternalLoad(const KURL& url)
 {
     String urlString = url.string();
@@ -478,9 +489,7 @@
     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
     parser->_private = userData;
     parser->replaceEntities = true;
-    const UChar BOM = 0xFEFF;
-    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
-    xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
+    switchToUTF16(parser);
 
     return adoptRef(new XMLParserContext(parser));
 }
@@ -637,14 +646,7 @@
 
     // libXML throws an error if you try to switch the encoding for an empty string.
     if (parseString.length()) {
-        // Hack around libxml2's lack of encoding overide support by manually
-        // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
-        // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks 
-        // and switch encodings, causing the parse to fail.
-        const UChar BOM = 0xFEFF;
-        const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
-        xmlSwitchEncoding(context->context(), BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
-
+        switchToUTF16(context->context());
         XMLTokenizerScope scope(m_doc->docLoader());
         xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
     }
@@ -1219,6 +1221,7 @@
 static void startDocumentHandler(void* closure)
 {
     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
+    switchToUTF16(ctxt);
     getTokenizer(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
     xmlSAX2StartDocument(closure);
 }


More information about the Pkg-webkit-maintainers mailing list