[SCM] WebKit Debian packaging branch, debian/unstable, updated. debian/1.1.15-1-40151-g37bb677
darin
darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc
Sat Sep 26 07:40:48 UTC 2009
The following commit has been merged in the debian/unstable branch:
commit 45265223087ae0165e382f0b06bcac0b356a27b9
Author: darin <darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Wed May 7 16:01:49 2003 +0000
Reviewed by Ken.
- fixed 3109258 -- Unicode above U+FFFF not handled correctly when expressed as numeric entities
* khtml/html/htmltokenizer.cpp: (HTMLTokenizer::parseEntity): Keep track of the entity in
an unsigned integer that can hold 32 bits rather than a QChar. Convert to a pair of Unicode
surrogate code points if necessary.
* khtml/misc/stringit.h: Add a second pushed character, used for the case where we have a pair of
surrogate characters for a single numeric entity.
* khtml/html/htmltokenizer.h: Add missing DOM:: that was compiling only due to "using namespace DOM"
in stringit.h. Replace EntityChar with EntityUnicodeValue.
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@4302 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/ChangeLog-2003-10-25 b/WebCore/ChangeLog-2003-10-25
index c5e9ce7..504c336 100644
--- a/WebCore/ChangeLog-2003-10-25
+++ b/WebCore/ChangeLog-2003-10-25
@@ -1,3 +1,19 @@
+2003-05-07 Darin Adler <darin at apple.com>
+
+ Reviewed by Ken.
+
+ - fixed 3109258 -- Unicode above U+FFFF not handled correctly when expressed as numeric entities
+
+ * khtml/html/htmltokenizer.cpp: (HTMLTokenizer::parseEntity): Keep track of the entity in
+ an unsigned integer that can hold 32 bits rather than a QChar. Convert to a pair of Unicode
+ surrogate code points if necessary.
+
+ * khtml/misc/stringit.h: Add a second pushed character, used for the case where we have a pair of
+ surrogate characters for a single numeric entity.
+
+ * khtml/html/htmltokenizer.h: Add missing DOM:: that was compiling only due to "using namespace DOM"
+ in stringit.h. Replace EntityChar with EntityUnicodeValue.
+
2003-05-06 Darin Adler <darin at apple.com>
Reviewed by Chris.
diff --git a/WebCore/ChangeLog-2005-08-23 b/WebCore/ChangeLog-2005-08-23
index c5e9ce7..504c336 100644
--- a/WebCore/ChangeLog-2005-08-23
+++ b/WebCore/ChangeLog-2005-08-23
@@ -1,3 +1,19 @@
+2003-05-07 Darin Adler <darin at apple.com>
+
+ Reviewed by Ken.
+
+ - fixed 3109258 -- Unicode above U+FFFF not handled correctly when expressed as numeric entities
+
+ * khtml/html/htmltokenizer.cpp: (HTMLTokenizer::parseEntity): Keep track of the entity in
+ an unsigned integer that can hold 32 bits rather than a QChar. Convert to a pair of Unicode
+ surrogate code points if necessary.
+
+ * khtml/misc/stringit.h: Add a second pushed character, used for the case where we have a pair of
+ surrogate characters for a single numeric entity.
+
+ * khtml/html/htmltokenizer.h: Add missing DOM:: that was compiling only due to "using namespace DOM"
+ in stringit.h. Replace EntityChar with EntityUnicodeValue.
+
2003-05-06 Darin Adler <darin at apple.com>
Reviewed by Chris.
diff --git a/WebCore/khtml/html/htmltokenizer.cpp b/WebCore/khtml/html/htmltokenizer.cpp
index 7af628a..8b14257 100644
--- a/WebCore/khtml/html/htmltokenizer.cpp
+++ b/WebCore/khtml/html/htmltokenizer.cpp
@@ -694,6 +694,7 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
{
cBufferPos = 0;
Entity = SearchEntity;
+ EntityUnicodeValue = 0;
}
while( src.length() )
@@ -701,9 +702,9 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
ushort cc = src->unicode();
switch(Entity) {
case NoEntity:
+ assert(Entity != NoEntity);
return;
-
- break;
+
case SearchEntity:
if(cc == '#') {
cBuffer[cBufferPos++] = cc;
@@ -730,7 +731,6 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
case Hexadecimal:
{
- int uc = EntityChar.unicode();
int ll = kMin(src.length(), 9-cBufferPos);
while(ll--) {
QChar csrc(src->lower());
@@ -740,17 +740,15 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
Entity = SearchSemicolon;
break;
}
- uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));
+ EntityUnicodeValue = EntityUnicodeValue*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));
cBuffer[cBufferPos++] = cc;
++src;
}
- EntityChar = QChar(uc);
if(cBufferPos == 9) Entity = SearchSemicolon;
break;
}
case Decimal:
{
- int uc = EntityChar.unicode();
int ll = kMin(src.length(), 9-cBufferPos);
while(ll--) {
cc = src->cell();
@@ -760,11 +758,10 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
break;
}
- uc = uc * 10 + (cc - '0');
+ EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
cBuffer[cBufferPos++] = cc;
++src;
}
- EntityChar = QChar(uc);
if(cBufferPos == 9) Entity = SearchSemicolon;
break;
}
@@ -789,11 +786,11 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
if(cBufferPos > 1) {
const entity *e = findEntity(cBuffer, cBufferPos);
if(e)
- EntityChar = e->code;
+ EntityUnicodeValue = e->code;
// be IE compatible
- if(tag && EntityChar.unicode() > 255 && *src != ';')
- EntityChar = QChar::null;
+ if(tag && EntityUnicodeValue > 255 && *src != ';')
+ EntityUnicodeValue = 0;
}
}
else
@@ -801,17 +798,29 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
}
case SearchSemicolon:
- //kdDebug( 6036 ) << "ENTITY " << EntityChar.unicode() << ", " << res << endl;
+ //kdDebug( 6036 ) << "ENTITY " << EntityUnicodeValue << ", " << res << endl;
- fixUpChar(EntityChar);
-
- if ( EntityChar != QChar::null ) {
- checkBuffer();
- // Just insert it
+ // Don't allow surrogate code points, or values that are more than 21 bits.
+ if ((EntityUnicodeValue > 0 && EntityUnicodeValue < 0xD800)
+ || (EntityUnicodeValue >= 0xE000 && EntityUnicodeValue <= 0x1FFFFF)) {
+
if (*src == ';')
++src;
- src.push( EntityChar );
+ if (EntityUnicodeValue <= 0xFFFF) {
+ QChar c(EntityUnicodeValue);
+ fixUpChar(c);
+ checkBuffer();
+ src.push(c);
+ } else {
+ // Convert to UTF-16, using surrogate code points.
+ QChar c1(0xD800 | (((EntityUnicodeValue >> 16) - 1) << 6) | ((EntityUnicodeValue >> 10) & 0x3F));
+ QChar c2(0xDC00 | (EntityUnicodeValue & 0x3FF));
+ checkBuffer(2);
+ src.push(c1);
+ src.push(c2);
+ }
+
} else {
#ifdef TOKEN_DEBUG
kdDebug( 6036 ) << "unknown entity!" << endl;
@@ -822,15 +831,13 @@ void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start)
for(unsigned int i = 0; i < cBufferPos; i++)
dest[i] = cBuffer[i];
dest += cBufferPos;
- Entity = NoEntity;
if (pre)
prePos += cBufferPos+1;
}
Entity = NoEntity;
- EntityChar = QChar::null;
return;
- };
+ }
}
}
diff --git a/WebCore/khtml/html/htmltokenizer.h b/WebCore/khtml/html/htmltokenizer.h
index 86ff9b5..3396fcb 100644
--- a/WebCore/khtml/html/htmltokenizer.h
+++ b/WebCore/khtml/html/htmltokenizer.h
@@ -83,14 +83,14 @@ namespace khtml {
if(attrs) attrs->deref();
if(text) text->deref();
}
- void addAttribute(DocumentImpl* doc, QChar* buffer, const QString& attrName, const DOMString& v)
+ void addAttribute(DOM::DocumentImpl* doc, QChar* buffer, const QString& attrName, const DOM::DOMString& v)
{
- AttributeImpl* a = 0;
+ DOM::AttributeImpl* a = 0;
if(buffer->unicode())
- a = new AttributeImpl(buffer->unicode(), v.implementation());
+ a = new DOM::AttributeImpl(buffer->unicode(), v.implementation());
else if ( !attrName.isEmpty() && attrName != "/" )
- a = new AttributeImpl(doc->attrId(0, DOMString(attrName).implementation(), false),
- v.implementation());
+ a = new DOM::AttributeImpl(doc->attrId(0, DOM::DOMString(attrName).implementation(), false),
+ v.implementation());
if (a) {
if(!attrs) {
@@ -114,7 +114,7 @@ namespace khtml {
flat = false;
}
DOM::NamedAttrMapImpl* attrs;
- DOMStringImpl* text;
+ DOM::DOMStringImpl* text;
ushort id;
bool flat;
};
@@ -244,12 +244,11 @@ protected:
EntityName,
SearchSemicolon
} Entity;
+ unsigned EntityUnicodeValue;
// are we in a <script> ... </script block
bool script;
- QChar EntityChar;
-
// Are we in a <pre> ... </pre> block
bool pre;
diff --git a/WebCore/khtml/misc/stringit.h b/WebCore/khtml/misc/stringit.h
index d3c4d8f..51f36fa 100644
--- a/WebCore/khtml/misc/stringit.h
+++ b/WebCore/khtml/misc/stringit.h
@@ -29,7 +29,7 @@
#include <qstring.h>
-using namespace DOM;
+#include <assert.h>
namespace khtml
{
@@ -43,40 +43,55 @@ public:
{ s = str, l = len; lines = 0; }
DOMStringIt(const QString &str)
{ s = str.unicode(); l = str.length(); lines = 0; }
- DOMStringIt(const DOMString &str)
+ DOMStringIt(const DOM::DOMString &str)
{ s = str.unicode(); l = str.length(); lines = 0; }
DOMStringIt *operator++()
{
- if(!pushedChar.isNull())
- pushedChar=0;
- else if(l > 0 ) {
+ if (!pushedChar1.isNull()) {
+ pushedChar1 = pushedChar2;
+ pushedChar2 = 0;
+ } else if (l > 0) {
if (*s == '\n')
lines++;
s++, l--;
}
return this;
}
-public:
- void push(const QChar& c) { /* assert(pushedChar.isNull());*/ pushedChar = c; }
- const QChar& operator*() const { return pushedChar.isNull() ? *s : pushedChar; }
- const QChar* operator->() const { return pushedChar.isNull() ? s : &pushedChar; }
+ void push(const QChar& c) {
+ if (pushedChar1.isNull())
+ pushedChar1 = c;
+ else {
+ assert(pushedChar2.isNull());
+ pushedChar2 = c;
+ }
+ }
- bool escaped() const { return !pushedChar.isNull(); }
- uint length() const { return l+(!pushedChar.isNull()); }
+ const QChar *current() const {
+ if (!pushedChar1.isNull())
+ return &pushedChar1;
+ if (!pushedChar2.isNull())
+ return &pushedChar2;
+ return s;
+ }
+
+ const QChar& operator*() const { return *current(); }
+ const QChar* operator->() const { return current(); }
+
+ bool escaped() const { return !pushedChar1.isNull(); }
+ uint length() const { return l + !pushedChar1.isNull() + !pushedChar2.isNull(); }
- const QChar *current() const { return pushedChar.isNull() ? s : &pushedChar; }
int lineCount() const { return lines; }
protected:
- QChar pushedChar;
+ QChar pushedChar1;
+ QChar pushedChar2;
const QChar *s;
int l;
int lines;
};
-
-};
+}
#endif
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list