[SCM] WebKit Debian packaging branch, debian/unstable, updated. debian/1.1.15-1-40151-g37bb677

Sat Sep 26 07:40:48 UTC 2009

The following commit has been merged in the debian/unstable branch:
commit 45265223087ae0165e382f0b06bcac0b356a27b9
Author: darin <darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Wed May 7 16:01:49 2003 +0000

            Reviewed by Ken.
    
    	- fixed 3109258 -- Unicode above U+FFFF not handled correctly when expressed as numeric entities
    
            * khtml/html/htmltokenizer.cpp: (HTMLTokenizer::parseEntity): Keep track of the entity in
            an unsigned integer that can hold 32 bits rather than a QChar. Convert to a pair of Unicode
            surrogate code points if necessary.
    
            * khtml/misc/stringit.h: Add a second pushed character, used for the case where we have a pair of
            surrogate characters for a single numeric entity.
    
            * khtml/html/htmltokenizer.h: Add missing DOM:: that was compiling only due to "using namespace DOM"
            in stringit.h. Replace EntityChar with EntityUnicodeValue.
    
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@4302 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/WebCore/ChangeLog-2003-10-25 b/WebCore/ChangeLog-2003-10-25
index c5e9ce7..504c336 100644
--- a/WebCore/ChangeLog-2003-10-25
+++ b/WebCore/ChangeLog-2003-10-25
@@ -1,3 +1,19 @@
+2003-05-07  Darin Adler  <darin at apple.com>
+
+        Reviewed by Ken.
+
+	- fixed 3109258 -- Unicode above U+FFFF not handled correctly when expressed as numeric entities
+
+        * khtml/html/htmltokenizer.cpp: (HTMLTokenizer::parseEntity): Keep track of the entity in
+        an unsigned integer that can hold 32 bits rather than a QChar. Convert to a pair of Unicode
+        surrogate code points if necessary.
+        
+        * khtml/misc/stringit.h: Add a second pushed character, used for the case where we have a pair of
+        surrogate characters for a single numeric entity.
+
+        * khtml/html/htmltokenizer.h: Add missing DOM:: that was compiling only due to "using namespace DOM"
+        in stringit.h. Replace EntityChar with EntityUnicodeValue.
+
 2003-05-06  Darin Adler  <darin at apple.com>
 
         Reviewed by Chris.
diff --git a/WebCore/ChangeLog-2005-08-23 b/WebCore/ChangeLog-2005-08-23
index c5e9ce7..504c336 100644
--- a/WebCore/ChangeLog-2005-08-23
+++ b/WebCore/ChangeLog-2005-08-23
@@ -1,3 +1,19 @@
+2003-05-07  Darin Adler  <darin at apple.com>
+
+        Reviewed by Ken.
+
+	- fixed 3109258 -- Unicode above U+FFFF not handled correctly when expressed as numeric entities
+
+        * khtml/html/htmltokenizer.cpp: (HTMLTokenizer::parseEntity): Keep track of the entity in
+        an unsigned integer that can hold 32 bits rather than a QChar. Convert to a pair of Unicode
+        surrogate code points if necessary.
+        
+        * khtml/misc/stringit.h: Add a second pushed character, used for the case where we have a pair of
+        surrogate characters for a single numeric entity.
+
+        * khtml/html/htmltokenizer.h: Add missing DOM:: that was compiling only due to "using namespace DOM"
+        in stringit.h. Replace EntityChar with EntityUnicodeValue.
+
 2003-05-06  Darin Adler  <darin at apple.com>
 
         Reviewed by Chris.
diff --git a/WebCore/khtml/html/htmltokenizer.cpp b/WebCore/khtml/html/htmltokenizer.cpp
index 7af628a..8b14257 100644
--- a/WebCore/khtml/html/htmltokenizer.cpp
+++ b/WebCore/khtml/html/htmltokenizer.cpp
@@ -694,6 +694,7 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
     {
         cBufferPos = 0;
         Entity = SearchEntity;
+        EntityUnicodeValue = 0;
     }
 
     while( src.length() )
@@ -701,9 +702,9 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
         ushort cc = src->unicode();
         switch(Entity) {
         case NoEntity:
+            assert(Entity != NoEntity);
             return;
-
-            break;
+        
         case SearchEntity:
             if(cc == '#') {
                 cBuffer[cBufferPos++] = cc;
@@ -730,7 +731,6 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
 
         case Hexadecimal:
         {
-            int uc = EntityChar.unicode();
             int ll = kMin(src.length(), 9-cBufferPos);
             while(ll--) {
                 QChar csrc(src->lower());
@@ -740,17 +740,15 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
                     Entity = SearchSemicolon;
                     break;
                 }
-                uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));
+                EntityUnicodeValue = EntityUnicodeValue*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));
                 cBuffer[cBufferPos++] = cc;
                 ++src;
             }
-            EntityChar = QChar(uc);
             if(cBufferPos == 9) Entity = SearchSemicolon;
             break;
         }
         case Decimal:
         {
-            int uc = EntityChar.unicode();
             int ll = kMin(src.length(), 9-cBufferPos);
             while(ll--) {
                 cc = src->cell();
@@ -760,11 +758,10 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
                     break;
                 }
 
-                uc = uc * 10 + (cc - '0');
+                EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
                 cBuffer[cBufferPos++] = cc;
                 ++src;
             }
-            EntityChar = QChar(uc);
             if(cBufferPos == 9)  Entity = SearchSemicolon;
             break;
         }
@@ -789,11 +786,11 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
                 if(cBufferPos > 1) {
                     const entity *e = findEntity(cBuffer, cBufferPos);
                     if(e)
-                        EntityChar = e->code;
+                        EntityUnicodeValue = e->code;
 
                     // be IE compatible
-                    if(tag && EntityChar.unicode() > 255 && *src != ';')
-                        EntityChar = QChar::null;
+                    if(tag && EntityUnicodeValue > 255 && *src != ';')
+                        EntityUnicodeValue = 0;
                 }
             }
             else
@@ -801,17 +798,29 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
         }
         case SearchSemicolon:
 
-            //kdDebug( 6036 ) << "ENTITY " << EntityChar.unicode() << ", " << res << endl;
+            //kdDebug( 6036 ) << "ENTITY " << EntityUnicodeValue << ", " << res << endl;
 
-            fixUpChar(EntityChar);
-
-            if ( EntityChar != QChar::null ) {
-                checkBuffer();
-                // Just insert it
+            // Don't allow surrogate code points, or values that are more than 21 bits.
+            if ((EntityUnicodeValue > 0 && EntityUnicodeValue < 0xD800)
+                    || (EntityUnicodeValue >= 0xE000 && EntityUnicodeValue <= 0x1FFFFF)) {
+            
                 if (*src == ';')
                     ++src;
 
-                src.push( EntityChar );
+                if (EntityUnicodeValue <= 0xFFFF) {
+                    QChar c(EntityUnicodeValue);
+                    fixUpChar(c);
+                    checkBuffer();
+                    src.push(c);
+                } else {
+                    // Convert to UTF-16, using surrogate code points.
+                    QChar c1(0xD800 | (((EntityUnicodeValue >> 16) - 1) << 6) | ((EntityUnicodeValue >> 10) & 0x3F));
+                    QChar c2(0xDC00 | (EntityUnicodeValue & 0x3FF));
+                    checkBuffer(2);
+                    src.push(c1);
+                    src.push(c2);
+                }
+
             } else {
 #ifdef TOKEN_DEBUG
                 kdDebug( 6036 ) << "unknown entity!" << endl;
@@ -822,15 +831,13 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
                 for(unsigned int i = 0; i < cBufferPos; i++)
                     dest[i] = cBuffer[i];
                 dest += cBufferPos;
-                Entity = NoEntity;
                 if (pre)
                     prePos += cBufferPos+1;
             }
 
             Entity = NoEntity;
-            EntityChar = QChar::null;
             return;
-        };
+        }
     }
 }
 
diff --git a/WebCore/khtml/html/htmltokenizer.h b/WebCore/khtml/html/htmltokenizer.h
index 86ff9b5..3396fcb 100644
--- a/WebCore/khtml/html/htmltokenizer.h
+++ b/WebCore/khtml/html/htmltokenizer.h
@@ -83,14 +83,14 @@ namespace khtml {
             if(attrs) attrs->deref();
             if(text) text->deref();
         }
-        void addAttribute(DocumentImpl* doc, QChar* buffer, const QString& attrName, const DOMString& v)
+        void addAttribute(DOM::DocumentImpl* doc, QChar* buffer, const QString& attrName, const DOM::DOMString& v)
         {
-            AttributeImpl* a = 0;
+            DOM::AttributeImpl* a = 0;
             if(buffer->unicode())
-                a = new AttributeImpl(buffer->unicode(), v.implementation());
+                a = new DOM::AttributeImpl(buffer->unicode(), v.implementation());
             else if ( !attrName.isEmpty() && attrName != "/" )
-                a = new AttributeImpl(doc->attrId(0, DOMString(attrName).implementation(), false),
-                                      v.implementation());
+                a = new DOM::AttributeImpl(doc->attrId(0, DOM::DOMString(attrName).implementation(), false),
+                                           v.implementation());
 
             if (a) {
                 if(!attrs) {
@@ -114,7 +114,7 @@ namespace khtml {
             flat = false;
         }
         DOM::NamedAttrMapImpl* attrs;
-        DOMStringImpl* text;
+        DOM::DOMStringImpl* text;
         ushort id;
         bool flat;
     };
@@ -244,12 +244,11 @@ protected:
         EntityName,
         SearchSemicolon
     } Entity;
+    unsigned EntityUnicodeValue;
 
     // are we in a <script> ... </script block
     bool script;
 
-    QChar EntityChar;
-
     // Are we in a <pre> ... </pre> block
     bool pre;
 
diff --git a/WebCore/khtml/misc/stringit.h b/WebCore/khtml/misc/stringit.h
index d3c4d8f..51f36fa 100644
--- a/WebCore/khtml/misc/stringit.h
+++ b/WebCore/khtml/misc/stringit.h
@@ -29,7 +29,7 @@
 
 #include <qstring.h>
 
-using namespace DOM;
+#include <assert.h>
 
 namespace khtml
 {
@@ -43,40 +43,55 @@ public:
 	{ s = str, l = len; lines = 0; }
     DOMStringIt(const QString &str)
 	{ s = str.unicode(); l = str.length(); lines = 0; }
-    DOMStringIt(const DOMString &str)
+    DOMStringIt(const DOM::DOMString &str)
 	{ s = str.unicode(); l = str.length(); lines = 0; }
 
     DOMStringIt *operator++()
     {
-        if(!pushedChar.isNull())
-            pushedChar=0;
-        else if(l > 0 ) {
+        if (!pushedChar1.isNull()) {
+            pushedChar1 = pushedChar2;
+            pushedChar2 = 0;
+        } else if (l > 0) {
             if (*s == '\n')
                 lines++;
 	    s++, l--;
         }
 	return this;
     }
-public:
-    void push(const QChar& c) { /* assert(pushedChar.isNull());*/  pushedChar = c; }
 
-    const QChar& operator*() const  { return pushedChar.isNull() ? *s : pushedChar; }
-    const QChar* operator->() const { return pushedChar.isNull() ? s : &pushedChar; }
+    void push(const QChar& c) {
+        if (pushedChar1.isNull())
+            pushedChar1 = c;
+        else {
+            assert(pushedChar2.isNull());
+            pushedChar2 = c;
+        }
+    }
 
-    bool escaped() const { return !pushedChar.isNull(); }
-    uint length() const { return l+(!pushedChar.isNull()); }
+    const QChar *current() const {
+        if (!pushedChar1.isNull())
+            return &pushedChar1;
+        if (!pushedChar2.isNull())
+            return &pushedChar2;
+        return s;
+    }
+    
+    const QChar& operator*() const { return *current(); }
+    const QChar* operator->() const { return current(); }
+
+    bool escaped() const { return !pushedChar1.isNull(); }
+    uint length() const { return l + !pushedChar1.isNull() + !pushedChar2.isNull(); }
 
-    const QChar *current() const { return pushedChar.isNull() ? s : &pushedChar; }
     int lineCount() const { return lines; }
 
 protected:
-    QChar pushedChar;
+    QChar pushedChar1;
+    QChar pushedChar2;
     const QChar *s;
     int l;
     int lines;
 };
 
-
-};
+}
 
 #endif

-- 
WebKit Debian packaging