[SCM] WebKit Debian packaging branch, debian/unstable, updated. debian/1.1.15-1-40151-g37bb677

darin darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc
Sat Sep 26 07:41:23 UTC 2009


The following commit has been merged in the debian/unstable branch:
commit 3ec8571a7700b24ef8154719b186d9065655c176
Author: darin <darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Fri May 9 17:17:14 2003 +0000

            Reviewed by John.
    
            - fixed 3191943 -- XML parsing error occurs when loading shift-JIS Japanese XML file
    
            * khtml/misc/decoder.h: Add a new type of encoding, EncodingFromXMLHeader.
            * khtml/misc/decoder.cpp:
            (findXMLEncoding): Added.
            (Decoder::decode): Call findXMLEncoding to extract the encoding from the XML header.
    
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@4331 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/WebCore/ChangeLog-2003-10-25 b/WebCore/ChangeLog-2003-10-25
index b6ac01f..9282c14 100644
--- a/WebCore/ChangeLog-2003-10-25
+++ b/WebCore/ChangeLog-2003-10-25
@@ -1,3 +1,14 @@
+2003-05-09  Darin Adler  <darin at apple.com>
+
+        Reviewed by John.
+
+        - fixed 3191943 -- XML parsing error occurs when loading shift-JIS Japanese XML file
+
+        * khtml/misc/decoder.h: Add a new type of encoding, EncodingFromXMLHeader.
+        * khtml/misc/decoder.cpp:
+        (findXMLEncoding): Added.
+        (Decoder::decode): Call findXMLEncoding to extract the encoding from the XML header.
+
 2003-05-08  Darin Adler  <darin at apple.com>
 
         Reviewed by John and Ken.
diff --git a/WebCore/ChangeLog-2005-08-23 b/WebCore/ChangeLog-2005-08-23
index b6ac01f..9282c14 100644
--- a/WebCore/ChangeLog-2005-08-23
+++ b/WebCore/ChangeLog-2005-08-23
@@ -1,3 +1,14 @@
+2003-05-09  Darin Adler  <darin at apple.com>
+
+        Reviewed by John.
+
+        - fixed 3191943 -- XML parsing error occurs when loading shift-JIS Japanese XML file
+
+        * khtml/misc/decoder.h: Add a new type of encoding, EncodingFromXMLHeader.
+        * khtml/misc/decoder.cpp:
+        (findXMLEncoding): Added.
+        (Decoder::decode): Call findXMLEncoding to extract the encoding from the XML header.
+
 2003-05-08  Darin Adler  <darin at apple.com>
 
         Reviewed by John and Ken.
diff --git a/WebCore/khtml/misc/decoder.cpp b/WebCore/khtml/misc/decoder.cpp
index 100e587..9c89817 100644
--- a/WebCore/khtml/misc/decoder.cpp
+++ b/WebCore/khtml/misc/decoder.cpp
@@ -364,6 +364,47 @@ static void skipComment(const char *&ptr, const char *pEnd)
     ptr = p;
 }
 
+// Returns the position of the encoding string.
+static int findXMLEncoding(const QCString &str, int &encodingLength)
+{
+    int len = str.length();
+
+    int pos = str.find("encoding");
+    if (pos == -1)
+        return -1;
+    pos += 8;
+    
+    // Skip spaces and stray control characters.
+    while (str[pos] <= ' ' && pos != len)
+        ++pos;
+
+    // Skip equals sign.
+    if (str[pos] != '=')
+        return -1;
+    ++pos;
+
+    // Skip spaces and stray control characters.
+    while (str[pos] <= ' ' && pos != len)
+        ++pos;
+
+    // Skip quotation mark.
+    char quoteMark = str[pos];
+    if (quoteMark != '"' && quoteMark != '\'')
+        return -1;
+    ++pos;
+
+    // Find the trailing quotation mark.
+    int end = pos;
+    while (str[end] != quoteMark)
+        ++end;
+
+    if (end == len)
+        return -1;
+    
+    encodingLength = end - pos;
+    return pos;
+}
+
 QString Decoder::decode(const char *data, int len)
 {
     // Check for UTF-16 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.
@@ -445,11 +486,30 @@ QString Decoder::decode(const char *data, int len)
                 if(*ptr == '<') {
                     bool end = false;
                     ptr++;
+
+                    // Handle comments.
                     if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') {
                         ptr += 3;
                         skipComment(ptr, pEnd);
                         continue;
                     }
+                    
+                    // Handle XML header, which can have encoding in it.
+                    if (ptr[0] == '?' && ptr[1] == 'x' && ptr[2] == 'm' && ptr[3] == 'l') {
+                        const char *end = ptr;
+                        while (*end != '>' && *end != '\0') end++;
+                        if (*end == '\0')
+                            break;
+                        QCString str(ptr, end - ptr);
+                        int len;
+                        int pos = findXMLEncoding(str, len);
+                        if (pos != -1) {
+                            setEncoding(str.mid(pos, len), EncodingFromXMLHeader);
+                            if (m_type == EncodingFromXMLHeader)
+                                goto found;
+                        }
+                    }
+
                     if(*ptr == '/') ptr++, end=true;
                     char tmp[20];
                     int len = 0;
@@ -498,11 +558,10 @@ QString Decoder::decode(const char *data, int len)
                                    (str[endpos] != ' ' && str[endpos] != '"' && str[endpos] != '\''
                                     && str[endpos] != ';' && str[endpos] != '>') )
 				endpos++;
-			    enc = str.mid(pos, endpos-pos);
 #ifdef DECODE_DEBUG
-			    kdDebug( 6005 ) << "Decoder: found charset: " << enc.data() << endl;
+			    kdDebug( 6005 ) << "Decoder: found charset: " << str.mid(pos, endpos-pos) << endl;
 #endif
-			    setEncoding(enc, EncodingFromMetaTag);
+			    setEncoding(str.mid(pos, endpos-pos), EncodingFromMetaTag);
 			    if( m_type == EncodingFromMetaTag ) goto found;
 
                             if ( endpos >= str.length() || str[endpos] == '/' || str[endpos] == '>' ) break;
diff --git a/WebCore/khtml/misc/decoder.h b/WebCore/khtml/misc/decoder.h
index 96f8833..4672544 100644
--- a/WebCore/khtml/misc/decoder.h
+++ b/WebCore/khtml/misc/decoder.h
@@ -36,6 +36,7 @@ public:
     enum EncodingType {
         DefaultEncoding,
         AutoDetectedEncoding,
+        EncodingFromXMLHeader,
         EncodingFromMetaTag,
         EncodingFromHTTPHeader,
         UserChosenEncoding
@@ -58,11 +59,13 @@ public:
 protected:
     // codec used for decoding. default is Latin1.
     QTextCodec *m_codec;
-    QTextDecoder *m_decoder; // only used for utf16
+    QTextDecoder *m_decoder;
     QCString enc;
     EncodingType m_type;
 
 #if APPLE_CHANGES
+    // Our version of QString works well for all-8-bit characters, and allows null characters.
+    // This works better than QCString when there are null characters involved.
     QString buffer;
 #else
     QCString buffer;
@@ -73,5 +76,6 @@ protected:
     bool visualRTL;
 };
 
-};
+}
+
 #endif

-- 
WebKit Debian packaging



More information about the Pkg-webkit-commits mailing list