[SCM] WebKit Debian packaging branch, debian/unstable, updated. debian/1.1.15-1-40151-g37bb677

darin darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc
Sat Sep 26 08:52:10 UTC 2009


The following commit has been merged in the debian/unstable branch:
commit a3cce7336bb7d23bffd0c199e1ad2a39e58f71f1
Author: darin <darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Thu Jul 22 20:50:10 2004 +0000

            Reviewed by Dave.
    
            - got rid of QXml classes and changed XML parsing to use libxml directly
    
            This is the first step toward more direct use of libxml and libxslt.
            Dave is planning to build on this to implement XSLT and to improve our handling
            of XML documents (faster and more feature complete, including DTDs).
    
            * khtml/html/html_elementimpl.cpp:
            (HTMLElementImpl::createContextualFragment):
            * khtml/html/htmltokenizer.cpp:
            (khtml::HTMLTokenizer::HTMLTokenizer):
            * khtml/html/htmltokenizer.h:
            * khtml/xml/dom_docimpl.cpp:
            (DocumentImpl::createTokenizer):
            (DocumentImpl::open):
            * khtml/xml/dom_docimpl.h:
            (DOM::DocumentImpl::tokenizer):
            * khtml/xml/dom_xmlimpl.cpp:
            (DOM::ProcessingInstructionImpl::checkStyleSheet):
            * khtml/xml/dom_xmlimpl.h:
            * khtml/xml/xml_tokenizer.h:
            * khtml/xml/xml_tokenizer.cpp: Redid this all to use libxml directly.
    
            * WebCore.pbproj/project.pbxproj: Removed a bunch of files.
    
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@7094 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/WebCore/ChangeLog-2005-08-23 b/WebCore/ChangeLog-2005-08-23
index 23c7f01..072cc0c 100644
--- a/WebCore/ChangeLog-2005-08-23
+++ b/WebCore/ChangeLog-2005-08-23
@@ -1,3 +1,31 @@
+2004-07-22  Darin Adler  <darin at apple.com>
+
+        Reviewed by Dave.
+
+        - got rid of QXml classes and changed XML parsing to use libxml directly
+
+        This is the first step toward more direct use of libxml and libxslt.
+        Dave is planning to build on this to implement XSLT and to improve our handling
+        of XML documents (faster and more feature complete, including DTDs).
+
+        * khtml/html/html_elementimpl.cpp:
+        (HTMLElementImpl::createContextualFragment):
+        * khtml/html/htmltokenizer.cpp:
+        (khtml::HTMLTokenizer::HTMLTokenizer):
+        * khtml/html/htmltokenizer.h:
+        * khtml/xml/dom_docimpl.cpp:
+        (DocumentImpl::createTokenizer):
+        (DocumentImpl::open):
+        * khtml/xml/dom_docimpl.h:
+        (DOM::DocumentImpl::tokenizer):
+        * khtml/xml/dom_xmlimpl.cpp:
+        (DOM::ProcessingInstructionImpl::checkStyleSheet):
+        * khtml/xml/dom_xmlimpl.h:
+        * khtml/xml/xml_tokenizer.h:
+        * khtml/xml/xml_tokenizer.cpp: Redid this all to use libxml directly.
+
+        * WebCore.pbproj/project.pbxproj: Removed a bunch of files.
+
 2004-07-21  Trey Matteson  <trey at apple.com>
 
 	DHTML dragging should use UTI for MIME-pboard type conversion
diff --git a/WebCore/WebCore.pbproj/project.pbxproj b/WebCore/WebCore.pbproj/project.pbxproj
index 42be4be..060e9f4 100644
--- a/WebCore/WebCore.pbproj/project.pbxproj
+++ b/WebCore/WebCore.pbproj/project.pbxproj
@@ -450,9 +450,6 @@
 				F58785D202DE375901EA4122,
 				F587871602DE3B8601EA4122,
 				F58785D502DE375901EA4122,
-				931BFCD403D4AEE5008635CE,
-				931BFCD803D4AEFD008635CE,
-				931BFCD003D4AEDA008635CE,
 				F58785D902DE375901EA4122,
 				F5B2A4FE02E2220F018635CB,
 				F58785DB02DE375901EA4122,
@@ -762,9 +759,6 @@
 				9394E0A703AA5BBE008635CE,
 				9394E0AC03AA60FB008635CE,
 				BCF0192603D3802200B2D04D,
-				931BFCD103D4AEDA008635CE,
-				931BFCD503D4AEE5008635CE,
-				931BFCD903D4AEFD008635CE,
 				93955A4403D72932008635CE,
 				BC7C965203E9EE7000A80004,
 				BC7C965403E9EE7000A80004,
@@ -1570,90 +1564,6 @@
 //932
 //933
 //934
-		931BFCCE03D4AEDA008635CE = {
-			fileEncoding = 4;
-			isa = PBXFileReference;
-			lastKnownFileType = sourcecode.c.h;
-			path = KWQXmlSimpleReader.h;
-			refType = 4;
-			sourceTree = "<group>";
-		};
-		931BFCCF03D4AEDA008635CE = {
-			fileEncoding = 4;
-			isa = PBXFileReference;
-			lastKnownFileType = sourcecode.cpp.objcpp;
-			path = KWQXmlSimpleReader.mm;
-			refType = 4;
-			sourceTree = "<group>";
-		};
-		931BFCD003D4AEDA008635CE = {
-			fileRef = 931BFCCE03D4AEDA008635CE;
-			isa = PBXBuildFile;
-			settings = {
-			};
-		};
-		931BFCD103D4AEDA008635CE = {
-			fileRef = 931BFCCF03D4AEDA008635CE;
-			isa = PBXBuildFile;
-			settings = {
-			};
-		};
-		931BFCD203D4AEE5008635CE = {
-			fileEncoding = 4;
-			isa = PBXFileReference;
-			lastKnownFileType = sourcecode.c.h;
-			path = KWQXmlAttributes.h;
-			refType = 4;
-			sourceTree = "<group>";
-		};
-		931BFCD303D4AEE5008635CE = {
-			fileEncoding = 4;
-			isa = PBXFileReference;
-			lastKnownFileType = sourcecode.cpp.objcpp;
-			path = KWQXmlAttributes.mm;
-			refType = 4;
-			sourceTree = "<group>";
-		};
-		931BFCD403D4AEE5008635CE = {
-			fileRef = 931BFCD203D4AEE5008635CE;
-			isa = PBXBuildFile;
-			settings = {
-			};
-		};
-		931BFCD503D4AEE5008635CE = {
-			fileRef = 931BFCD303D4AEE5008635CE;
-			isa = PBXBuildFile;
-			settings = {
-			};
-		};
-		931BFCD603D4AEFD008635CE = {
-			fileEncoding = 4;
-			isa = PBXFileReference;
-			lastKnownFileType = sourcecode.c.h;
-			path = KWQXmlDefaultHandler.h;
-			refType = 4;
-			sourceTree = "<group>";
-		};
-		931BFCD703D4AEFD008635CE = {
-			fileEncoding = 4;
-			isa = PBXFileReference;
-			lastKnownFileType = sourcecode.cpp.objcpp;
-			path = KWQXmlDefaultHandler.mm;
-			refType = 4;
-			sourceTree = "<group>";
-		};
-		931BFCD803D4AEFD008635CE = {
-			fileRef = 931BFCD603D4AEFD008635CE;
-			isa = PBXBuildFile;
-			settings = {
-			};
-		};
-		931BFCD903D4AEFD008635CE = {
-			fileRef = 931BFCD703D4AEFD008635CE;
-			isa = PBXBuildFile;
-			settings = {
-			};
-		};
 		931C8A160380288B008635CE = {
 			fileEncoding = 4;
 			isa = PBXFileReference;
@@ -10311,12 +10221,6 @@
 				F587853F02DE375901EA4122,
 				F58786C502DE3B8601EA4122,
 				F587854202DE375901EA4122,
-				931BFCD203D4AEE5008635CE,
-				931BFCD303D4AEE5008635CE,
-				931BFCD603D4AEFD008635CE,
-				931BFCD703D4AEFD008635CE,
-				931BFCCE03D4AEDA008635CE,
-				931BFCCF03D4AEDA008635CE,
 			);
 			isa = PBXGroup;
 			name = qt;
diff --git a/WebCore/khtml/html/html_elementimpl.cpp b/WebCore/khtml/html/html_elementimpl.cpp
index 9d6aaef..8da433f 100644
--- a/WebCore/khtml/html/html_elementimpl.cpp
+++ b/WebCore/khtml/html/html_elementimpl.cpp
@@ -758,9 +758,8 @@ DocumentFragmentImpl *HTMLElementImpl::createContextualFragment( const DOMString
     fragment->ref();
     {
         HTMLTokenizer tok( docPtr(), fragment );
-        tok.begin();
         tok.write( html.string(), true );
-        tok.end();
+        tok.finish();
     }
 
     // Exceptions are ignored because none ought to happen here.
diff --git a/WebCore/khtml/html/htmltokenizer.cpp b/WebCore/khtml/html/htmltokenizer.cpp
index 6b49573..6c0748d 100644
--- a/WebCore/khtml/html/htmltokenizer.cpp
+++ b/WebCore/khtml/html/htmltokenizer.cpp
@@ -7,7 +7,7 @@
               (C) 1999 Lars Knoll (knoll at kde.org)
               (C) 1999 Antti Koivisto (koivisto at kde.org)
               (C) 2001 Dirk Mueller (mueller at kde.org)
-    Copyright (C) 2003 Apple Computer, Inc.
+    Copyright (C) 2004 Apple Computer, Inc.
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Library General Public
@@ -240,7 +240,7 @@ HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view)
     onHold = false;
     attrNamePresent = false;
     
-    reset();
+    begin();
 }
 
 HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, DOM::DocumentFragmentImpl *i)
@@ -258,7 +258,7 @@ HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, DOM::DocumentFragmentImpl *
     loadingExtScript = false;
     onHold = false;
 
-    reset();
+    begin();
 }
 
 void HTMLTokenizer::reset()
diff --git a/WebCore/khtml/html/htmltokenizer.h b/WebCore/khtml/html/htmltokenizer.h
index c68479a..c427d9a 100644
--- a/WebCore/khtml/html/htmltokenizer.h
+++ b/WebCore/khtml/html/htmltokenizer.h
@@ -128,13 +128,14 @@ public:
     HTMLTokenizer(DOM::DocumentPtr *, DOM::DocumentFragmentImpl *frag);
     virtual ~HTMLTokenizer();
 
-    void begin();
-    void write(const TokenizerString &str, bool appendData);
-    void end();
-    void finish();
+    virtual void write(const TokenizerString &str, bool appendData);
+    virtual void finish();
     virtual void setOnHold(bool _onHold);
 
 protected:
+    void begin();
+    void end();
+
     void reset();
     void addPending();
     void processToken();
diff --git a/WebCore/khtml/xml/dom_docimpl.cpp b/WebCore/khtml/xml/dom_docimpl.cpp
index 7a54efb..9440b7f 100644
--- a/WebCore/khtml/xml/dom_docimpl.cpp
+++ b/WebCore/khtml/xml/dom_docimpl.cpp
@@ -1248,12 +1248,7 @@ void DocumentImpl::updateSelection()
 
 Tokenizer *DocumentImpl::createTokenizer()
 {
-    return new XMLTokenizer(docPtr(),m_view);
-}
-
-XMLHandler* DocumentImpl::createTokenHandler()
-{
-    return new XMLHandler(docPtr(), m_view);
+    return newXMLTokenizer(docPtr(), m_view);
 }
 
 void DocumentImpl::setPaintDevice( QPaintDevice *dev )
@@ -1276,7 +1271,6 @@ void DocumentImpl::open(  )
     clear();
     m_tokenizer = createTokenizer();
     connect(m_tokenizer,SIGNAL(finishedParsing()),this,SIGNAL(finishedParsing()));
-    m_tokenizer->begin();
 
     if (m_view && m_view->part()->jScript()) {
         m_view->part()->jScript()->setSourceFile(m_url,""); //fixme
diff --git a/WebCore/khtml/xml/dom_docimpl.h b/WebCore/khtml/xml/dom_docimpl.h
index 4bcadb1..7eebaea 100644
--- a/WebCore/khtml/xml/dom_docimpl.h
+++ b/WebCore/khtml/xml/dom_docimpl.h
@@ -317,7 +317,6 @@ public:
     CSSStyleSheetImpl* elementSheet();
     virtual khtml::Tokenizer *createTokenizer();
     khtml::Tokenizer *tokenizer() { return m_tokenizer; }
-    virtual khtml::XMLHandler* createTokenHandler();
     
     QPaintDeviceMetrics *paintDeviceMetrics() { return m_paintDeviceMetrics; }
     QPaintDevice *paintDevice() const { return m_paintDevice; }
diff --git a/WebCore/khtml/xml/dom_xmlimpl.cpp b/WebCore/khtml/xml/dom_xmlimpl.cpp
index 09e0e70..3a47642 100644
--- a/WebCore/khtml/xml/dom_xmlimpl.cpp
+++ b/WebCore/khtml/xml/dom_xmlimpl.cpp
@@ -26,8 +26,11 @@
 #include "xml/dom_stringimpl.h"
 #include "css/css_stylesheetimpl.h"
 #include "misc/loader.h"
+#include "xml/xml_tokenizer.h"
 
-using namespace DOM;
+using khtml::parseAttributes;
+
+namespace DOM {
 
 EntityImpl::EntityImpl(DocumentPtr *doc) : NodeBaseImpl(doc)
 {
@@ -396,23 +399,30 @@ void ProcessingInstructionImpl::checkStyleSheet()
         // ### check that this occurs only in the prolog
         // ### support stylesheet included in a fragment of this (or another) document
         // ### make sure this gets called when adding from javascript
-        XMLAttributeReader attrReader(DOMString(m_data).string());
         bool attrsOk;
-        QXmlAttributes attrs = attrReader.readAttrs(attrsOk);
+        const QMap<QString, QString> attrs = parseAttributes(m_data, attrsOk);
         if (!attrsOk)
             return;
-        if (attrs.value("type") != "text/css" && !attrs.value("type").isEmpty())
+        QMap<QString, QString>::ConstIterator i = attrs.find("type");
+        QString type;
+        if (i != attrs.end())
+            type = *i;
+        if (type != "text/css" && !type.isEmpty())
             return;
 
-        DOMString href = attrs.value("href");
+        i = attrs.find("href");
+        QString href;
+        if (i != attrs.end())
+            href = *i;
 
         if (href.length()>1)
         {
             if (href[0]=='#')
             {
+                DOMString newLocalHref = href.mid(1);
                 if (m_localHref)
                     m_localHref->deref();
-                m_localHref=href.implementation()->split(1);
+                m_localHref = newLocalHref.implementation();
                 if (m_localHref)
                     m_localHref->ref();
             }
@@ -424,7 +434,7 @@ void ProcessingInstructionImpl::checkStyleSheet()
 		    m_loading = true;
 		    getDocument()->addPendingSheet();
 		    if (m_cachedSheet) m_cachedSheet->deref(this);
-		    m_cachedSheet = getDocument()->docLoader()->requestStyleSheet(getDocument()->completeURL(href.string()), QString::null);
+		    m_cachedSheet = getDocument()->docLoader()->requestStyleSheet(getDocument()->completeURL(href), QString::null);
 		    if (m_cachedSheet)
 			m_cachedSheet->ref( this );
 		}
@@ -489,35 +499,4 @@ DOMString ProcessingInstructionImpl::toString() const
     return result;
 }
 
-// -------------------------------------------------------------------------
-
-XMLAttributeReader::XMLAttributeReader(QString _attrString)
-{
-    m_attrString = _attrString;
-}
-
-XMLAttributeReader::~XMLAttributeReader()
-{
-}
-
-QXmlAttributes XMLAttributeReader::readAttrs(bool &ok)
-{
-    // parse xml file
-    QXmlInputSource source;
-    source.setData("<?xml version=\"1.0\"?><attrs "+m_attrString+" />");
-    QXmlSimpleReader reader;
-    reader.setContentHandler( this );
-    ok = reader.parse( source );
-    return attrs;
-}
-
-bool XMLAttributeReader::startElement(const QString& /*namespaceURI*/, const QString& localName,
-                                      const QString& /*qName*/, const QXmlAttributes& atts)
-{
-    if (localName == "attrs") {
-        attrs = atts;
-        return true;
-    }
-    else
-        return false; // we shouldn't have any other elements
-}
+} // namespace
diff --git a/WebCore/khtml/xml/dom_xmlimpl.h b/WebCore/khtml/xml/dom_xmlimpl.h
index 8d08bff..9f37d5e 100644
--- a/WebCore/khtml/xml/dom_xmlimpl.h
+++ b/WebCore/khtml/xml/dom_xmlimpl.h
@@ -26,8 +26,6 @@
 #include "xml/dom_nodeimpl.h"
 #include "misc/loader_client.h"
 
-#include <qxml.h>
-
 namespace khtml {
 class CachedCSSStyleSheet;
 };
@@ -174,19 +172,6 @@ protected:
     bool m_loading;
 };
 
-class XMLAttributeReader : public QXmlDefaultHandler
-{
-public:
-    XMLAttributeReader(QString _attrString);
-    virtual ~XMLAttributeReader();
-    QXmlAttributes readAttrs(bool &ok);
-    bool startElement(const QString& namespaceURI, const QString& localName, const QString& qName, const QXmlAttributes& atts);
-
-protected:
-    QXmlAttributes attrs;
-    QString m_attrString;
-};
-
 }; //namespace
 
 #endif
diff --git a/WebCore/khtml/xml/xml_tokenizer.cpp b/WebCore/khtml/xml/xml_tokenizer.cpp
index fdf73da..8a4cbf2 100644
--- a/WebCore/khtml/xml/xml_tokenizer.cpp
+++ b/WebCore/khtml/xml/xml_tokenizer.cpp
@@ -2,7 +2,7 @@
  * This file is part of the DOM implementation for KDE.
  *
  * Copyright (C) 2000 Peter Kelly (pmk at post.com)
- * Copyright (C) 2003 Apple Computer, Inc.
+ * Copyright (C) 2004 Apple Computer, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -20,73 +20,255 @@
  * Boston, MA 02111-1307, USA.
  */
 
-
 #include "xml_tokenizer.h"
 #include "xml/dom_docimpl.h"
 #include "xml/dom_textimpl.h"
 #include "xml/dom_xmlimpl.h"
 #include "html/html_headimpl.h"
-#include "rendering/render_object.h"
 #include "misc/htmltags.h"
 #include "misc/htmlattrs.h"
 #include "misc/loader.h"
 
 #include "khtmlview.h"
 #include "khtml_part.h"
-#include <qvariant.h>
 #include <kdebug.h>
 #include <klocale.h>
 
-using namespace DOM;
+#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
+
+#include <qptrstack.h>
+
+using DOM::DocumentImpl;
+using DOM::DocumentPtr;
+using DOM::DOMString;
+using DOM::ElementImpl;
+using DOM::HTMLScriptElementImpl;
+using DOM::HTMLTableSectionElementImpl;
+using DOM::Node;
+using DOM::NodeImpl;
+using DOM::ProcessingInstructionImpl;
+using DOM::TextImpl;
 
 namespace khtml {
 
 const int maxErrors = 25;
 
-XMLHandler::XMLHandler(DocumentPtr *_doc, KHTMLView *_view)
-    : errorLine(0), m_errorCount(0)
+// FIXME: Move to the newer libxml API that handles namespaces and dump XMLNamespace, XMLAttributes, and XMLNamespaceStack.
+
+struct XMLNamespace {
+    QString m_prefix;
+    QString m_uri;
+    XMLNamespace* m_parent;
+    
+    int m_ref;
+    
+    XMLNamespace() :m_parent(0), m_ref(0) {}
+    
+    XMLNamespace(const QString& p, const QString& u, XMLNamespace* parent) 
+        :m_prefix(p),
+         m_uri(u),
+         m_parent(parent), 
+         m_ref(0) 
+    { 
+        if (m_parent) m_parent->ref();
+    }
+    
+    QString uriForPrefix(const QString& prefix) {
+        if (prefix == m_prefix)
+            return m_uri;
+        if (m_parent)
+            return m_parent->uriForPrefix(prefix);
+        return "";
+    }
+    
+    void ref() { m_ref++; }
+    void deref() { if (--m_ref == 0) { if (m_parent) m_parent->deref(); delete this; } }
+};
+
+class XMLAttributes {
+public:
+    XMLAttributes() : _ref(0), _length(0), _names(0), _values(0), _uris(0) { }
+    XMLAttributes(const char **expatStyleAttributes);
+    ~XMLAttributes();
+    
+    XMLAttributes(const XMLAttributes &);
+    XMLAttributes &operator=(const XMLAttributes &);
+    
+    int length() const { return _length; }
+    QString qName(int index) const { return _names[index]; }
+    QString localName(int index) const;
+    QString uri(int index) const { if (!_uris) return QString::null; return _uris[index]; }
+    QString value(int index) const { return _values[index]; }
+
+    QString value(const QString &) const;
+
+    void split(XMLNamespace* ns);
+    
+private:
+    mutable int *_ref;
+    int _length;
+    QString *_names;
+    QString *_values;
+    QString *_uris;
+};
+
+class XMLNamespaceStack
 {
-    m_doc = _doc;
-    if ( m_doc ) m_doc->ref();
-    m_view = _view;
-    m_currentNode = _doc->document();
-}
+public:
+    ~XMLNamespaceStack();
+    XMLNamespace *pushNamespaces(XMLAttributes& attributes);
+    void popNamespaces();
+private:
+    QPtrStack<XMLNamespace> m_namespaceStack;
+};
+
+class XMLTokenizer : public Tokenizer, public CachedObjectClient
+{
+public:
+    XMLTokenizer(DocumentPtr *, KHTMLView * = 0);
+    ~XMLTokenizer();
+
+    enum ErrorType { warning, nonFatal, fatal };
+
+    // from Tokenizer
+    virtual void write(const TokenizerString &str, bool);
+    virtual void finish();
+    virtual void setOnHold(bool onHold);
+    virtual bool isWaitingForScripts();
 
+    // from CachedObjectClient
+    virtual void notifyFinished(CachedObject *finishedObj);
 
-XMLHandler::~XMLHandler()
+    // callbacks from parser SAX
+    void error(ErrorType, const char *message, va_list args);
+    void startElement(const xmlChar *name, const xmlChar **libxmlAttributes);
+    void endElement();
+    void characters(const xmlChar *s, int len);
+    void processingInstruction(const xmlChar *target, const xmlChar *data);
+    void cdataBlock(const xmlChar *s, int len);
+    void comment(const xmlChar *s);
+
+private:
+    void end();
+
+    int lineNumber() const;
+    int columnNumber() const;
+    void stopParsing();
+
+    void insertErrorMessageBlock();
+
+    void executeScripts();
+    void addScripts(NodeImpl *n);
+
+    XMLNamespace *pushNamespaces(XMLAttributes& attributes) { return m_namespaceStack.pushNamespaces(attributes); }
+    void popNamespaces() { m_namespaceStack.popNamespaces(); }
+
+    bool enterText();
+    void exitText();
+
+    DocumentPtr *m_doc;
+    KHTMLView *m_view;
+
+    QString m_xmlCode;
+
+    xmlParserCtxtPtr m_context;
+    DOM::NodeImpl *m_currentNode;
+    XMLNamespaceStack m_namespaceStack;
+
+    bool m_sawError;
+    bool m_parserStopped;
+
+    int m_errorCount;
+    int m_lastErrorLine;
+    int m_lastErrorColumn;
+    DOMString m_errorMessages;
+
+    QPtrList<HTMLScriptElementImpl> m_scripts;
+    QPtrListIterator<HTMLScriptElementImpl> *m_scriptsIt;
+    CachedScript *m_cachedScript;
+};
+
+// --------------------------------
+
+static xmlParserCtxtPtr createQStringParser(xmlSAXHandlerPtr handlers, void *userData)
 {
-    if ( m_doc ) m_doc->deref();
-}
+    static bool didInit = false;
+    if (!didInit) {
+        xmlInitParser();
+        didInit = true;
+    }
 
+    xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, userData, NULL, 0, NULL);
+    const QChar BOM(0xFEFF);
+    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char *>(&BOM);
+    xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
+    return parser;
+}
 
-QString XMLHandler::errorProtocol()
+static void parseQString(xmlParserCtxtPtr parser, const QString &string)
 {
-    return errorProt;
+    xmlParseChunk(parser,
+        reinterpret_cast<const char *>(string.unicode()),
+        string.length() * sizeof(QChar), 1);
 }
 
+// --------------------------------
 
-bool XMLHandler::startDocument()
+XMLTokenizer::XMLTokenizer(DocumentPtr *_doc, KHTMLView *_view)
+    : m_doc(_doc), m_view(_view),
+      m_context(NULL), m_currentNode(m_doc->document()),
+      m_sawError(false), m_parserStopped(false), m_errorCount(0),
+      m_lastErrorLine(0), m_scriptsIt(0), m_cachedScript(0)
 {
-    // at the beginning of parsing: do some initialization
-    errorProt = "";
-    m_errorCount = 0;
-    state = StateInit;
+    if (m_doc)
+        m_doc->ref();
+}
 
-    return true;
+XMLTokenizer::~XMLTokenizer()
+{
+    if (m_doc)
+        m_doc->deref();
+    delete m_scriptsIt;
+    if (m_cachedScript)
+        m_cachedScript->deref(this);
 }
 
+void XMLTokenizer::write(const TokenizerString &s, bool /*appendData*/ )
+{
+    m_xmlCode += s.toString();
+}
 
-bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*localName*/, const QString& qName, const QXmlAttributes& atts )
+void XMLTokenizer::setOnHold(bool onHold)
 {
-    if (m_errorCount) return true;
+    // Will we need to implement this when we do incremental XML parsing?
+}
+
+void XMLTokenizer::startElement(const xmlChar *name, const xmlChar **libxmlAttributes)
+{
+    if (m_parserStopped)
+        return;
+
+    XMLAttributes atts(reinterpret_cast<const char **>(libxmlAttributes));
+    XMLNamespace *ns = pushNamespaces(atts);
+    atts.split(ns);
+    
+    QString qName = QString::fromUtf8(reinterpret_cast<const char *>(name));
+    QString uri;
+    QString prefix;
+    int colonPos = qName.find(':');
+    if (colonPos != -1) {
+        prefix = qName.left(colonPos);
+    }
+    uri = ns->uriForPrefix(prefix);
     
     if (m_currentNode->nodeType() == Node::TEXT_NODE)
         exitText();
 
     int exceptioncode = 0;
-    ElementImpl *newElement = m_doc->document()->createElementNS(namespaceURI,qName,exceptioncode);
+    ElementImpl *newElement = m_doc->document()->createElementNS(uri, qName, exceptioncode);
     if (!newElement)
-        return false;
+        return;
 
     int i;
     for (i = 0; i < atts.length(); i++) {
@@ -99,7 +281,7 @@ bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*loc
                                                     false /* allocate */);
         newElement->setAttribute(id, val.implementation(), exceptioncode);
         if (exceptioncode) // exception setting attributes
-            return false;
+            return;
     }
 
     // FIXME: This hack ensures implicit table bodies get constructed in XHTML and XML files.
@@ -120,12 +302,11 @@ bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*loc
         if (m_view && !newElement->attached())
             newElement->attach();
         m_currentNode = newElement;
-        return true;
+        return;
     }
     else {
-        
         delete newElement;
-        return false;
+        return;
     }
 
     // ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a
@@ -133,11 +314,12 @@ bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*loc
     // need to ensure that empty elements always have an empty text child?
 }
 
-
-bool XMLHandler::endElement( const QString& /*namespaceURI*/, const QString& /*localName*/, const QString& /*qName*/ )
+void XMLTokenizer::endElement()
 {
-    if (m_errorCount) return true;
+    if (m_parserStopped) return;
     
+    popNamespaces();
+
     if (m_currentNode->nodeType() == Node::TEXT_NODE)
         exitText();
     if (m_currentNode->parentNode() != 0) {
@@ -147,22 +329,26 @@ bool XMLHandler::endElement( const QString& /*namespaceURI*/, const QString& /*l
         } while (m_currentNode && m_currentNode->implicitNode());
     }
 // ###  else error
-
-    return true;
 }
 
-
-bool XMLHandler::startCDATA()
+void XMLTokenizer::characters(const xmlChar *s, int len)
 {
-    if (m_errorCount) return true;
+    if (m_parserStopped) return;
     
-    if (m_currentNode->nodeType() == Node::TEXT_NODE)
-        exitText();
+    if (m_currentNode->nodeType() == Node::TEXT_NODE ||
+        m_currentNode->nodeType() == Node::CDATA_SECTION_NODE ||
+        enterText()) {
 
-    NodeImpl *newNode = m_doc->document()->createCDATASection("");
+        int exceptioncode = 0;
+        static_cast<TextImpl*>(m_currentNode)->appendData(QString::fromUtf8(reinterpret_cast<const char *>(s), len),
+            exceptioncode);
+    }
+}
+
+bool XMLTokenizer::enterText()
+{
+    NodeImpl *newNode = m_doc->document()->createTextNode("");
     if (m_currentNode->addChild(newNode)) {
-        if (m_view && !newNode->attached())
-            newNode->attach();
         m_currentNode = newNode;
         return true;
     }
@@ -170,295 +356,245 @@ bool XMLHandler::startCDATA()
         delete newNode;
         return false;
     }
-
-}
-
-bool XMLHandler::endCDATA()
-{
-    if (m_errorCount) return true;
-    
-    if (m_currentNode->parentNode() != 0)
-        m_currentNode = m_currentNode->parentNode();
-    return true;
-}
-
-bool XMLHandler::characters( const QString& ch )
-{
-    if (m_errorCount) return true;
-    
-    if (m_currentNode->nodeType() == Node::TEXT_NODE ||
-        m_currentNode->nodeType() == Node::CDATA_SECTION_NODE ||
-        enterText()) {
-
-        int exceptioncode = 0;
-        static_cast<TextImpl*>(m_currentNode)->appendData(ch,exceptioncode);
-        if (exceptioncode)
-            return false;
-        return true;
-    }
-    else
-        return false;
 }
 
-bool XMLHandler::comment(const QString & ch)
+void XMLTokenizer::exitText()
 {
-    if (m_errorCount) return true;
+    if (m_view && m_currentNode && !m_currentNode->attached())
+        m_currentNode->attach();
     
-    if (m_currentNode->nodeType() == Node::TEXT_NODE)
-        exitText();
-    // ### handle exceptions
-    m_currentNode->addChild(m_doc->document()->createComment(ch));
-    return true;
+    NodeImpl* par = m_currentNode->parentNode();
+    if (par != 0)
+        m_currentNode = par;
 }
 
-bool XMLHandler::processingInstruction(const QString &target, const QString &data)
+void XMLTokenizer::error(ErrorType type, const char *message, va_list args)
 {
-    if (m_errorCount) return true;
-    
-    if (m_currentNode->nodeType() == Node::TEXT_NODE)
-        exitText();
-    // ### handle exceptions
-    ProcessingInstructionImpl *pi = m_doc->document()->createProcessingInstruction(target,data);
-    m_currentNode->addChild(pi);
-    // don't load stylesheets for standalone documents
-    if (m_doc->document()->part()) {
-	pi->checkStyleSheet();
+    if (m_parserStopped) {
+        return;
     }
-    return true;
-}
 
+    if (type == fatal || (m_errorCount < maxErrors && m_lastErrorLine != lineNumber() && m_lastErrorColumn != columnNumber())) {
 
-QString XMLHandler::errorString()
-{
+        QString format;
+        switch (type) {
+            case warning:
 #if APPLE_CHANGES
-    // FIXME: Does the user ever see this?
-    return "error";
+                format = QString("warning on line %2 at column %3: %1");
 #else
-    return i18n("the document is not in the correct file format");
+                format = i18n( "warning: %1 in line %2, column %3\n" );
 #endif
-}
-
-bool XMLHandler::warning( const QXmlParseException& exception )
-{
+                break;
+            case fatal:
 #if APPLE_CHANGES
-    errorProt += QString("warning on line %2 at column %3: %1")
+                // fall through
 #else
-    errorProt += i18n( "warning: %1 in line %2, column %3\n" )
+                format = i18n( "fatal error: %1 in line %2, column %3\n" );
+                break;
 #endif
-        .arg( exception.message() )
-        .arg( exception.lineNumber() )
-        .arg( exception.columnNumber() );
-    
-    errorLine = exception.lineNumber();
-    errorCol = exception.columnNumber();
-    
-    return true;
-}
-
-bool XMLHandler::error( const QXmlParseException& exception )
-{
-    if (m_errorCount >= maxErrors) return true;
-    
-    if (errorLine == exception.lineNumber() && errorCol == exception.columnNumber())
-        return true; // Only report 1 error for any given line/col position to reduce noise.
-    
-    m_errorCount++;
-    
+            default:
 #if APPLE_CHANGES
-    errorProt += QString("error on line %2 at column %3: %1")
+                format = QString("error on line %2 at column %3: %1");
 #else
-    errorProt += i18n( "error: %1 in line %2, column %3\n" )
+                format = i18n( "error: %1 in line %2, column %3\n" );
 #endif
-        .arg( exception.message() )
-        .arg( exception.lineNumber() )
-        .arg( exception.columnNumber() );
-    
-    errorLine = exception.lineNumber();
-    errorCol = exception.columnNumber();
-    
-    return true;
+        }
+
+        char *m;
+        vasprintf(&m, message, args);
+        m_errorMessages += format.arg(m).arg(lineNumber()).arg(columnNumber());
+        free(m);
+
+        m_lastErrorLine = lineNumber();
+        m_lastErrorColumn = columnNumber();
+        ++m_errorCount;
+    }
+
+    if (type != warning)
+        m_sawError = true;
+
+    if (type == fatal)
+        stopParsing();
 }
 
-bool XMLHandler::fatalError( const QXmlParseException& exception )
+void XMLTokenizer::processingInstruction(const xmlChar *target, const xmlChar *data)
 {
-#if APPLE_CHANGES
-    errorProt += QString("error on line %2 at column %3: %1")
-#else
-    errorProt += i18n( "fatal error: %1 in line %2, column %3\n" )
-#endif
-        .arg( exception.message() )
-        .arg( exception.lineNumber() )
-        .arg( exception.columnNumber() );
-
-    errorLine = exception.lineNumber();
-    errorCol = exception.columnNumber();
+    if (m_parserStopped) {
+        return;
+    }
 
-    return false;
+    if (m_currentNode->nodeType() == Node::TEXT_NODE)
+        exitText();
+    // ### handle exceptions
+    ProcessingInstructionImpl *pi = m_doc->document()->createProcessingInstruction(
+        QString::fromUtf8(reinterpret_cast<const char *>(target)),
+        QString::fromUtf8(reinterpret_cast<const char *>(data)));
+    m_currentNode->addChild(pi);
+    // don't load stylesheets for standalone documents
+    if (m_doc->document()->part()) {
+	pi->checkStyleSheet();
+    }
 }
 
-bool XMLHandler::enterText()
+void XMLTokenizer::cdataBlock(const xmlChar *s, int len)
 {
-    NodeImpl *newNode = m_doc->document()->createTextNode("");
+    if (m_parserStopped) {
+        return;
+    }
+
+    if (m_currentNode->nodeType() == Node::TEXT_NODE)
+        exitText();
+
+    NodeImpl *newNode = m_doc->document()->createCDATASection("");
     if (m_currentNode->addChild(newNode)) {
+        if (m_view && !newNode->attached())
+            newNode->attach();
         m_currentNode = newNode;
-        return true;
     }
     else {
         delete newNode;
-        return false;
+        return;
     }
-}
 
-void XMLHandler::exitText()
-{
-    if (m_view && m_currentNode && !m_currentNode->attached())
-        m_currentNode->attach();
-    
-    NodeImpl* par = m_currentNode->parentNode();
-    if (par != 0)
-        m_currentNode = par;
+    characters(s, len);
+
+    if (m_currentNode->parentNode() != 0)
+        m_currentNode = m_currentNode->parentNode();
 }
 
-bool XMLHandler::attributeDecl(const QString &/*eName*/, const QString &/*aName*/, const QString &/*type*/,
-                               const QString &/*valueDefault*/, const QString &/*value*/)
+void XMLTokenizer::comment(const xmlChar *s)
 {
-    // qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
-    // value. When it does, we can store these somewhere and have default attributes on elements
-    return true;
+    if (m_parserStopped) return;
+    
+    if (m_currentNode->nodeType() == Node::TEXT_NODE)
+        exitText();
+    // ### handle exceptions
+    m_currentNode->addChild(m_doc->document()->createComment(QString::fromUtf8(reinterpret_cast<const char *>(s))));
 }
 
-bool XMLHandler::externalEntityDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/)
+static void startElementHandler(void *userData, const xmlChar *name, const xmlChar **libxmlAttributes)
 {
-    // ### insert these too - is there anything special we have to do here?
-    return true;
+    static_cast<XMLTokenizer *>(userData)->startElement(name, libxmlAttributes);
 }
 
-bool XMLHandler::internalEntityDecl(const QString &name, const QString &value)
+static void endElementHandler(void *userData, const xmlChar *name)
 {
-    EntityImpl *e = new EntityImpl(m_doc,name);
-    // ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
-    e->addChild(m_doc->document()->createTextNode(value));
-// ### FIXME
-//     if (m_doc->document()->doctype())
-//         static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->entities())->addNode(e);
-    return true;
+    static_cast<XMLTokenizer *>(userData)->endElement();
 }
 
-bool XMLHandler::notationDecl(const QString &name, const QString &publicId, const QString &systemId)
+static void charactersHandler(void *userData, const xmlChar *s, int len)
 {
-// ### FIXME
-//     if (m_doc->document()->doctype()) {
-//         NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
-//         static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n);
-//     }
-    return true;
+    static_cast<XMLTokenizer *>(userData)->characters(s, len);
 }
 
-bool XMLHandler::unparsedEntityDecl(const QString &/*name*/, const QString &/*publicId*/,
-                                    const QString &/*systemId*/, const QString &/*notationName*/)
+static void processingInstructionHandler(void *userData, const xmlChar *target, const xmlChar *data)
 {
-    // ###
-    return true;
+    static_cast<XMLTokenizer *>(userData)->processingInstruction(target, data);
 }
 
-
-//------------------------------------------------------------------------------
-
-XMLTokenizer::XMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view)
+static void cdataBlockHandler(void *userData, const xmlChar *s, int len)
 {
-    m_doc = _doc;
-    if ( m_doc ) m_doc->ref();
-    m_view = _view;
-    m_xmlCode = "";
-    m_scriptsIt = 0;
-    m_cachedScript = 0;
+    static_cast<XMLTokenizer *>(userData)->cdataBlock(s, len);
 }
 
-XMLTokenizer::~XMLTokenizer()
+static void commentHandler(void *userData, const xmlChar *comment)
 {
-    if ( m_doc ) m_doc->deref();
-    if (m_scriptsIt)
-        delete m_scriptsIt;
-    if (m_cachedScript)
-        m_cachedScript->deref(this);
+    static_cast<XMLTokenizer *>(userData)->comment(comment);
 }
 
-
-void XMLTokenizer::begin()
+static void warningHandler(void *userData, const char *message, ...)
 {
+    va_list args;
+    va_start(args, message);
+    static_cast<XMLTokenizer *>(userData)->error(XMLTokenizer::warning, message, args);
+    va_end(args);
 }
 
-void XMLTokenizer::write(const TokenizerString &s, bool /*appendData*/ )
+static void fatalErrorHandler(void *userData, const char *message, ...)
 {
-    m_xmlCode += s.toString();
+    va_list args;
+    va_start(args, message);
+    static_cast<XMLTokenizer *>(userData)->error(XMLTokenizer::fatal, message, args);
+    va_end(args);
 }
 
-void XMLTokenizer::end()
+static void normalErrorHandler(void *userData, const char *message, ...)
 {
-    emit finishedParsing();
+    va_list args;
+    va_start(args, message);
+    static_cast<XMLTokenizer *>(userData)->error(XMLTokenizer::nonFatal, message, args);
+    va_end(args);
 }
 
 void XMLTokenizer::finish()
 {
-    // parse xml file
-    XMLHandler* handler = m_doc->document()->createTokenHandler();
-    QXmlInputSource source;
-    source.setData(m_xmlCode);
-    QXmlSimpleReader reader;
-    reader.setContentHandler( handler );
-    reader.setLexicalHandler( handler );
-    reader.setErrorHandler( handler );
-    reader.setDeclHandler( handler );
-    reader.setDTDHandler( handler );
-    bool ok = reader.parse( source );
-
-    if (!ok) {
-        // One or more errors occurred during parsing of the code. Display an error block to the user above
-        // the normal content (the DOM tree is created manually and includes line/col info regarding 
-        // where the errors are located)
-
-        // Create elements for display
-        int exceptioncode = 0;
-        DocumentImpl *doc = m_doc->document();
-        NodeImpl* root = doc->documentElement();
-        if (!root) {
-            root = doc->createElementNS(XHTML_NAMESPACE, "html", exceptioncode);
-            NodeImpl* body = doc->createElementNS(XHTML_NAMESPACE, "body", exceptioncode);
-            root->appendChild(body, exceptioncode);
-            doc->appendChild(root, exceptioncode);
-            root = body;
-        }
-
-        ElementImpl* reportDiv = doc->createElementNS(XHTML_NAMESPACE, "div", exceptioncode);
-        reportDiv->setAttribute(ATTR_STYLE, "white-space: pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
-        ElementImpl* h3 = doc->createElementNS(XHTML_NAMESPACE, "h3", exceptioncode);
-        h3->appendChild(doc->createTextNode("This page contains the following errors:"), exceptioncode);
-        reportDiv->appendChild(h3, exceptioncode);
-        ElementImpl* fixed = doc->createElementNS(XHTML_NAMESPACE, "div", exceptioncode);
-        fixed->setAttribute(ATTR_STYLE, "font-family:monospace;font-size:12px");
-        NodeImpl* textNode = doc->createTextNode(handler->errorProtocol());
-        fixed->appendChild(textNode, exceptioncode);
-        reportDiv->appendChild(fixed, exceptioncode);
-        h3 = doc->createElementNS(XHTML_NAMESPACE, "h3", exceptioncode);
-        h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), exceptioncode);
-        reportDiv->appendChild(h3, exceptioncode);
-        
-        root->insertBefore(reportDiv, root->firstChild(), exceptioncode);
-
-        m_doc->document()->recalcStyle( NodeImpl::Inherit );
-        m_doc->document()->updateRendering();
-
-        end();
-    }
-    else {
+    xmlSAXHandler sax;
+    memset(&sax, 0, sizeof(sax));
+    sax.error = normalErrorHandler;
+    sax.fatalError = fatalErrorHandler;
+    sax.characters = charactersHandler;
+    sax.endElement = endElementHandler;
+    sax.processingInstruction = processingInstructionHandler;
+    sax.startElement = startElementHandler;
+    sax.cdataBlock = cdataBlockHandler;
+    sax.comment = commentHandler;
+    sax.warning = warningHandler;
+    m_parserStopped = false;
+    m_sawError = false;
+    m_context = createQStringParser(&sax, this);
+    parseQString(m_context, m_xmlCode);
+    xmlFreeParserCtxt(m_context);
+    m_context = NULL;
+
+    if (m_sawError) {
+        insertErrorMessageBlock();
+    } else {
         // Parsing was successful. Now locate all html <script> tags in the document and execute them
-        // one by one
+        // one by one.
         addScripts(m_doc->document());
         m_scriptsIt = new QPtrListIterator<HTMLScriptElementImpl>(m_scripts);
         executeScripts();
     }
 
-    delete handler;
+    emit finishedParsing();
+}
+
+void XMLTokenizer::insertErrorMessageBlock()
+{
+    // One or more errors occurred during parsing of the code. Display an error block to the user above
+    // the normal content (the DOM tree is created manually and includes line/col info regarding 
+    // where the errors are located)
+
+    // Create elements for display
+    int exceptioncode = 0;
+    DocumentImpl *doc = m_doc->document();
+    NodeImpl* root = doc->documentElement();
+    if (!root) {
+        root = doc->createElementNS(XHTML_NAMESPACE, "html", exceptioncode);
+        NodeImpl* body = doc->createElementNS(XHTML_NAMESPACE, "body", exceptioncode);
+        root->appendChild(body, exceptioncode);
+        doc->appendChild(root, exceptioncode);
+        root = body;
+    }
+
+    ElementImpl* reportDiv = doc->createElementNS(XHTML_NAMESPACE, "div", exceptioncode);
+    reportDiv->setAttribute(ATTR_STYLE, "white-space: pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
+    ElementImpl* h3 = doc->createElementNS(XHTML_NAMESPACE, "h3", exceptioncode);
+    h3->appendChild(doc->createTextNode("This page contains the following errors:"), exceptioncode);
+    reportDiv->appendChild(h3, exceptioncode);
+    ElementImpl* fixed = doc->createElementNS(XHTML_NAMESPACE, "div", exceptioncode);
+    fixed->setAttribute(ATTR_STYLE, "font-family:monospace;font-size:12px");
+    NodeImpl* textNode = doc->createTextNode(m_errorMessages);
+    fixed->appendChild(textNode, exceptioncode);
+    reportDiv->appendChild(fixed, exceptioncode);
+    h3 = doc->createElementNS(XHTML_NAMESPACE, "h3", exceptioncode);
+    h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), exceptioncode);
+    reportDiv->appendChild(h3, exceptioncode);
+    
+    root->insertBefore(reportDiv, root->firstChild(), exceptioncode);
+
+    m_doc->document()->recalcStyle( NodeImpl::Inherit );
+    m_doc->document()->updateRendering();
 }
 
 void XMLTokenizer::addScripts(NodeImpl *n)
@@ -516,9 +652,6 @@ void XMLTokenizer::executeScripts()
     // All scripts have finished executing, so calculate the style for the document and close
     // the last element
     m_doc->document()->updateStyleSelector();
-
-    // We are now finished parsing
-    end();
 }
 
 void XMLTokenizer::notifyFinished(CachedObject *finishedObj)
@@ -540,6 +673,272 @@ bool XMLTokenizer::isWaitingForScripts()
     return m_cachedScript != 0;
 }
 
+Tokenizer *newXMLTokenizer(DocumentPtr *d, KHTMLView *v)
+{
+    return new XMLTokenizer(d, v);
+}
+
+int XMLTokenizer::lineNumber() const
+{
+    return m_context->input->line;
+}
+
+int XMLTokenizer::columnNumber() const
+{
+    return m_context->input->col;
+}
+
+void XMLTokenizer::stopParsing()
+{
+    xmlStopParser(m_context);
+    m_parserStopped = true;
+}
+
+#if 0
+
+bool XMLHandler::attributeDecl(const QString &/*eName*/, const QString &/*aName*/, const QString &/*type*/,
+                               const QString &/*valueDefault*/, const QString &/*value*/)
+{
+    // qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
+    // value. When it does, we can store these somewhere and have default attributes on elements
+    return true;
+}
+
+bool XMLHandler::externalEntityDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/)
+{
+    // ### insert these too - is there anything special we have to do here?
+    return true;
+}
+
+bool XMLHandler::internalEntityDecl(const QString &name, const QString &value)
+{
+    EntityImpl *e = new EntityImpl(m_doc,name);
+    // ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
+    e->addChild(m_doc->document()->createTextNode(value));
+// ### FIXME
+//     if (m_doc->document()->doctype())
+//         static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->entities())->addNode(e);
+    return true;
+}
+
+bool XMLHandler::notationDecl(const QString &name, const QString &publicId, const QString &systemId)
+{
+// ### FIXME
+//     if (m_doc->document()->doctype()) {
+//         NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
+//         static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n);
+//     }
+    return true;
+}
+
+#endif
+
+// --------------------------------
+
+XMLNamespaceStack::~XMLNamespaceStack()
+{
+    while (XMLNamespace *ns = m_namespaceStack.pop())
+        ns->deref();
+}
+
+void XMLNamespaceStack::popNamespaces()
+{
+    XMLNamespace *ns = m_namespaceStack.pop();
+    if (ns)
+        ns->deref();
+}
+
+XMLNamespace *XMLNamespaceStack::pushNamespaces(XMLAttributes& attrs)
+{
+    XMLNamespace *ns = m_namespaceStack.current();
+    if (!ns)
+        ns = new XMLNamespace;
+
+    // Search for any xmlns attributes.
+    for (int i = 0; i < attrs.length(); i++) {
+        QString qName = attrs.qName(i);
+        if (qName == "xmlns")
+            ns = new XMLNamespace(QString::null, attrs.value(i), ns);
+        else if (qName.startsWith("xmlns:"))
+            ns = new XMLNamespace(qName.right(qName.length()-6), attrs.value(i), ns);
+    }
+
+    m_namespaceStack.push(ns);
+    ns->ref();
+    return ns;
+}
+
+// --------------------------------
+
+struct AttributeParseState {
+    QMap<QString, QString> attributes;
+    bool gotAttributes;
+};
+
+static void attributesStartElementHandler(void *userData, const xmlChar *name, const xmlChar **libxmlAttributes)
+{
+    if (strcmp(reinterpret_cast<const char *>(name), "attrs") != 0) {
+        return;
+    }
+        
+    AttributeParseState *state = static_cast<AttributeParseState *>(userData);
+    
+    state->gotAttributes = true;
+    
+    XMLAttributes attributes(reinterpret_cast<const char **>(libxmlAttributes));
+    XMLNamespaceStack stack;
+    attributes.split(stack.pushNamespaces(attributes));
+    int length = attributes.length();
+    for (int i = 0; i != length; ++i) {
+        state->attributes.insert(attributes.qName(i), attributes.value(i));
+    }
+}
+
+QMap<QString, QString> parseAttributes(const DOMString &string, bool &attrsOK)
+{
+    AttributeParseState state;
+    state.gotAttributes = false;
+
+    xmlSAXHandler sax;
+    memset(&sax, 0, sizeof(sax));
+    sax.startElement = attributesStartElementHandler;
+    xmlParserCtxtPtr parser = createQStringParser(&sax, &state);
+    parseQString(parser, "<?xml version=\"1.0\"?><attrs " + string.string() + " />");
+    xmlFreeParserCtxt(parser);
+
+    attrsOK = state.gotAttributes;
+    return state.attributes;
+}
+
+// --------------------------------
+
+XMLAttributes::XMLAttributes(const char **saxStyleAttributes)
+    : _ref(0), _uris(0)
+{
+    int length = 0;
+    if (saxStyleAttributes) {
+        for (const char **p = saxStyleAttributes; *p; p += 2) {
+            ++length;
+        }
+    }
+
+    _length = length;
+    if (!length) {
+        _names = 0;
+        _values = 0;
+        _uris = 0;
+    } else {
+        _names = new QString [length];
+        _values = new QString [length];
+    }
+
+    if (saxStyleAttributes) {
+        int i = 0;
+        for (const char **p = saxStyleAttributes; *p; p += 2) {
+            _names[i] = QString::fromUtf8(p[0]);
+            _values[i] = QString::fromUtf8(p[1]);
+            ++i;
+        }
+    }
+}
+
+XMLAttributes::~XMLAttributes()
+{
+    if (_ref && !--*_ref) {
+        delete _ref;
+        _ref = 0;
+    }
+    if (!_ref) {
+        delete [] _names;
+        delete [] _values;
+        delete [] _uris;
+    }
+}
+
+XMLAttributes::XMLAttributes(const XMLAttributes &other)
+    : _ref(other._ref)
+    , _length(other._length)
+    , _names(other._names)
+    , _values(other._values)
+    , _uris(other._uris)
+{
+    if (!_ref) {
+        _ref = new int (2);
+        other._ref = _ref;
+    } else {
+        ++*_ref;
+    }
+}
+
+XMLAttributes &XMLAttributes::operator=(const XMLAttributes &other)
+{
+    if (_ref && !--*_ref) {
+        delete _ref;
+        _ref = 0;
+    }
+    if (!_ref) {
+        delete [] _names;
+        delete [] _values;
+        delete [] _uris;
+    }
+
+    _ref = other._ref;
+    _length = other._length;
+    _names = other._names;
+    _values = other._values;
+    _uris = other._uris;
+
+    if (!_ref) {
+        _ref = new int (2);
+        other._ref = _ref;
+    } else {
+        ++*_ref;
+    }
+    
+    return *this;
+}
+
+QString XMLAttributes::localName(int index) const
+{
+    int colonPos = _names[index].find(':');
+    if (colonPos != -1)
+        // Peel off the prefix to return the localName.
+        return _names[index].right(_names[index].length() - colonPos - 1);
+    return _names[index];
+}
+
+QString XMLAttributes::value(const QString &name) const
+{
+    for (int i = 0; i != _length; ++i) {
+        if (name == _names[i]) {
+            return _values[i];
+        }
+    }
+    return QString::null;
+}
+
+void XMLAttributes::split(XMLNamespace* ns)
+{
+    for (int i = 0; i < _length; ++i) {
+        int colonPos = _names[i].find(':');
+        if (colonPos != -1) {
+            QString prefix = _names[i].left(colonPos);
+            QString uri;
+            if (prefix == "xmlns") {
+                // FIXME: The URI is the xmlns namespace? I seem to recall DOM lvl 3 saying something about this.
+            }
+            else
+                uri = ns->uriForPrefix(prefix);
+            
+            if (!uri.isEmpty()) {
+                if (!_uris)
+                    _uris = new QString[_length];
+                _uris[i] = uri;
+            }
+        }
+    }
+}
+
 }
 
 #include "xml_tokenizer.moc"
diff --git a/WebCore/khtml/xml/xml_tokenizer.h b/WebCore/khtml/xml/xml_tokenizer.h
index 20e78ff..013afd8 100644
--- a/WebCore/khtml/xml/xml_tokenizer.h
+++ b/WebCore/khtml/xml/xml_tokenizer.h
@@ -2,7 +2,7 @@
  * This file is part of the DOM implementation for KDE.
  *
  * Copyright (C) 2000 Peter Kelly (pmk at post.com)
- * Copyright (C) 2003 Apple Computer, Inc.
+ * Copyright (C) 2004 Apple Computer, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -24,10 +24,8 @@
 #ifndef _XML_Tokenizer_h_
 #define _XML_Tokenizer_h_
 
-#include <qxml.h>
-#include <qptrlist.h>
 #include <qobject.h>
-#include "misc/loader_client.h"
+#include <qmap.h>
 #include "misc/stringit.h"
 
 #if APPLE_CHANGES
@@ -37,91 +35,26 @@
 class KHTMLView;
 
 namespace DOM {
-    class DocumentImpl;
-    class NodeImpl;
-    class HTMLScriptElementImpl;
     class DocumentPtr;
-    class HTMLScriptElementImpl;
+    class NodeImpl;
 };
 
 namespace khtml {
-    
-class CachedObject;
-class CachedScript;
-
-class XMLHandler : public QXmlDefaultHandler
-{
-public:
-    XMLHandler(DOM::DocumentPtr *_doc, KHTMLView *_view);
-    virtual ~XMLHandler();
-
-    // return the error protocol if parsing failed
-    QString errorProtocol();
-
-    // overloaded handler functions
-    bool startDocument();
-    bool startElement(const QString& namespaceURI, const QString& localName, const QString& qName, const QXmlAttributes& atts);
-    bool endElement(const QString& namespaceURI, const QString& localName, const QString& qName);
-    bool startCDATA();
-    bool endCDATA();
-    bool characters(const QString& ch);
-    bool comment(const QString & ch);
-    bool processingInstruction(const QString &target, const QString &data);
-
-
-    // from QXmlDeclHandler
-    bool attributeDecl(const QString &eName, const QString &aName, const QString &type, const QString &valueDefault, const QString &value);
-    bool externalEntityDecl(const QString &name, const QString &publicId, const QString &systemId);
-    bool internalEntityDecl(const QString &name, const QString &value);
-
-    // from QXmlDTDHandler
-    bool notationDecl(const QString &name, const QString &publicId, const QString &systemId);
-    bool unparsedEntityDecl(const QString &name, const QString &publicId, const QString &systemId, const QString &notationName);
-
-    bool enterText();
-    void exitText();
-
-    QString errorString();
 
-    bool error( const QXmlParseException& exception );
-    bool fatalError( const QXmlParseException& exception );
-    bool warning( const QXmlParseException& exception );
-    
-    int errorLine;
-    int errorCol;
-
-protected:
-    QString errorProt;
-    int m_errorCount;
-    DOM::DocumentPtr *m_doc;
-    KHTMLView *m_view;
-    DOM::NodeImpl *m_currentNode;
-    DOM::NodeImpl *m_rootNode;
-
-    enum State {
-        StateInit,
-        StateDocument,
-        StateQuote,
-        StateLine,
-        StateHeading,
-        StateP
-    };
-    State state;
-};
+class TokenizerString;
 
 class Tokenizer : public QObject
 {
     Q_OBJECT
+
 public:
-    virtual void begin() = 0;
     // script output must be prepended, while new data
     // received during executing a script must be appended, hence the
     // extra bool to be able to distinguish between both cases. document.write()
     // always uses false, while khtmlpart uses true
     virtual void write(const TokenizerString &str, bool appendData) = 0;
-    virtual void end() = 0;
     virtual void finish() = 0;
-    virtual void setOnHold(bool /*_onHold*/) {}
+    virtual void setOnHold(bool onHold) = 0;
     virtual bool isWaitingForScripts() = 0;
 
 signals:
@@ -135,32 +68,8 @@ private:
 #endif
 };
 
-class XMLTokenizer : public Tokenizer, public CachedObjectClient
-{
-public:
-    XMLTokenizer(DOM::DocumentPtr *, KHTMLView * = 0);
-    virtual ~XMLTokenizer();
-    virtual void begin();
-    virtual void write(const TokenizerString &str, bool);
-    virtual void end();
-    virtual void finish();
-
-    // from CachedObjectClient
-    void notifyFinished(CachedObject *finishedObj);
-
-    virtual bool isWaitingForScripts();
-protected:
-    DOM::DocumentPtr *m_doc;
-    KHTMLView *m_view;
-
-    void executeScripts();
-    void addScripts(DOM::NodeImpl *n);
-
-    QString m_xmlCode;
-    QPtrList<DOM::HTMLScriptElementImpl> m_scripts;
-    QPtrListIterator<DOM::HTMLScriptElementImpl> *m_scriptsIt;
-    CachedScript *m_cachedScript;
-};
+Tokenizer *newXMLTokenizer(DOM::DocumentPtr *, KHTMLView * = 0);
+QMap<QString, QString> parseAttributes(const DOM::DOMString &, bool &attrsOK);
 
 }
 

-- 
WebKit Debian packaging



More information about the Pkg-webkit-commits mailing list