[SCM] WebKit Debian packaging branch, debian/unstable, updated. debian/1.1.15-1-40151-g37bb677
darin
darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc
Sat Sep 26 07:23:33 UTC 2009
The following commit has been merged in the debian/unstable branch:
commit e43bb6f2b63c42d5ed1e06bdb6aa8fa74238cfd0
Author: darin <darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Tue Feb 4 21:34:46 2003 +0000
Reviewed by Maciej.
- fixed 3142813 -- Safari displays a blank page when trying to open html file encoded in little-endian UTF-16
* kwq/KWQString.h: Remove fromStringWithEncoding and convertToQCString,
add append that takes a QChar * and length.
* kwq/KWQString.mm:
(QString::fromUtf8): Use QTextCodec instead of fromStringWithEncoding.
(QString::append): Implement by calling insert.
* kwq/KWQTextCodec.h: Added an encoding() member function to QTextCodec
and made QTextDecoder an abstract base class to pave the way for putting
more state in it to handle multibyte encodings.
* kwq/KWQTextCodec.mm: Add KWQTextDecoder, a concrete subclass of QTextDecoder.
(codecForCFStringEncoding): Moved the static inside this function for clarity.
(QTextCodec::makeDecoder): Make a KWQTextDecoder.
(QTextCodec::toUnicode): Make a KWQTextDecoder and use it do the decoding instead
of calling fromStringWithEncoding.
(KWQTextDecoder::convertUTF16): Added. Decodes Unicode, using the BOM to decide
whether it's little-endian or big-endian.
(KWQTextDecoder::convertUsingTEC): Added. Here's where the code from
QString::fromStringWithEncoding was moved. This needs to be changed to handle
state between multiple buffers passed in eventually, but not to fix this bug.
(KWQTextDecoder::toUnicode): Added. Calls convertUTF16 or convertUsingTEC.
* kwq/WebCoreBridge.mm: (+[WebCoreBridge stringWithData:textEncoding:]):
Use QTextCodec instead of fromStringWithEncoding.
* khtml/misc/decoder.cpp: (Decoder::decode): Rearranged the #if to make
things easier to read.
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@3564 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/ChangeLog-2003-10-25 b/WebCore/ChangeLog-2003-10-25
index 73edfbe..8d9d44b 100644
--- a/WebCore/ChangeLog-2003-10-25
+++ b/WebCore/ChangeLog-2003-10-25
@@ -1,5 +1,38 @@
2003-02-04 Darin Adler <darin at apple.com>
+ Reviewed by Maciej.
+
+ - fixed 3142813 -- Safari displays a blank page when trying to open html file encoded in little-endian UTF-16
+
+ * kwq/KWQString.h: Remove fromStringWithEncoding and convertToQCString,
+ add append that takes a QChar * and length.
+ * kwq/KWQString.mm:
+ (QString::fromUtf8): Use QTextCodec instead of fromStringWithEncoding.
+ (QString::append): Implement by calling insert.
+
+ * kwq/KWQTextCodec.h: Added an encoding() member function to QTextCodec
+ and made QTextDecoder an abstract base class to pave the way for putting
+ more state in it to handle multibyte encodings.
+ * kwq/KWQTextCodec.mm: Add KWQTextDecoder, a concrete subclass of QTextDecoder.
+ (codecForCFStringEncoding): Moved the static inside this function for clarity.
+ (QTextCodec::makeDecoder): Make a KWQTextDecoder.
+ (QTextCodec::toUnicode): Make a KWQTextDecoder and use it do the decoding instead
+ of calling fromStringWithEncoding.
+ (KWQTextDecoder::convertUTF16): Added. Decodes Unicode, using the BOM to decide
+ whether it's little-endian or big-endian.
+ (KWQTextDecoder::convertUsingTEC): Added. Here's where the code from
+ QString::fromStringWithEncoding was moved. This needs to be changed to handle
+ state between multiple buffers passed in eventually, but not to fix this bug.
+ (KWQTextDecoder::toUnicode): Added. Calls convertUTF16 or convertUsingTEC.
+
+ * kwq/WebCoreBridge.mm: (+[WebCoreBridge stringWithData:textEncoding:]):
+ Use QTextCodec instead of fromStringWithEncoding.
+
+ * khtml/misc/decoder.cpp: (Decoder::decode): Rearranged the #if to make
+ things easier to read.
+
+2003-02-04 Darin Adler <darin at apple.com>
+
Reviewed by Trey.
- fixed 3150252 -- files with question marks in their names fail to upload as attachments
diff --git a/WebCore/ChangeLog-2005-08-23 b/WebCore/ChangeLog-2005-08-23
index 73edfbe..8d9d44b 100644
--- a/WebCore/ChangeLog-2005-08-23
+++ b/WebCore/ChangeLog-2005-08-23
@@ -1,5 +1,38 @@
2003-02-04 Darin Adler <darin at apple.com>
+ Reviewed by Maciej.
+
+ - fixed 3142813 -- Safari displays a blank page when trying to open html file encoded in little-endian UTF-16
+
+ * kwq/KWQString.h: Remove fromStringWithEncoding and convertToQCString,
+ add append that takes a QChar * and length.
+ * kwq/KWQString.mm:
+ (QString::fromUtf8): Use QTextCodec instead of fromStringWithEncoding.
+ (QString::append): Implement by calling insert.
+
+ * kwq/KWQTextCodec.h: Added an encoding() member function to QTextCodec
+ and made QTextDecoder an abstract base class to pave the way for putting
+ more state in it to handle multibyte encodings.
+ * kwq/KWQTextCodec.mm: Add KWQTextDecoder, a concrete subclass of QTextDecoder.
+ (codecForCFStringEncoding): Moved the static inside this function for clarity.
+ (QTextCodec::makeDecoder): Make a KWQTextDecoder.
+ (QTextCodec::toUnicode): Make a KWQTextDecoder and use it do the decoding instead
+ of calling fromStringWithEncoding.
+ (KWQTextDecoder::convertUTF16): Added. Decodes Unicode, using the BOM to decide
+ whether it's little-endian or big-endian.
+ (KWQTextDecoder::convertUsingTEC): Added. Here's where the code from
+ QString::fromStringWithEncoding was moved. This needs to be changed to handle
+ state between multiple buffers passed in eventually, but not to fix this bug.
+ (KWQTextDecoder::toUnicode): Added. Calls convertUTF16 or convertUsingTEC.
+
+ * kwq/WebCoreBridge.mm: (+[WebCoreBridge stringWithData:textEncoding:]):
+ Use QTextCodec instead of fromStringWithEncoding.
+
+ * khtml/misc/decoder.cpp: (Decoder::decode): Rearranged the #if to make
+ things easier to read.
+
+2003-02-04 Darin Adler <darin at apple.com>
+
Reviewed by Trey.
- fixed 3150252 -- files with question marks in their names fail to upload as attachments
diff --git a/WebCore/khtml/misc/decoder.cpp b/WebCore/khtml/misc/decoder.cpp
index 17f2590..9bceebc 100644
--- a/WebCore/khtml/misc/decoder.cpp
+++ b/WebCore/khtml/misc/decoder.cpp
@@ -526,12 +526,14 @@ QString Decoder::decode(const char *data, int len)
QString out;
#if APPLE_CHANGES
- // We don't need to special-case unicode like this, and doing so
- // will drop the buffer which is really bad!
- if(!buffer.isEmpty()) {
+ if (!buffer.isEmpty()) {
+ out = m_decoder->toUnicode(buffer.latin1(), buffer.length());
+ buffer.truncate(0);
+ } else {
+ out = m_decoder->toUnicode(data, len);
+ }
#else
if(!buffer.isEmpty() && enc != "ISO-10646-UCS-2") {
-#endif
out = m_decoder->toUnicode(buffer.latin1(), buffer.length());
buffer = "";
} else {
@@ -547,9 +549,6 @@ QString Decoder::decode(const char *data, int len)
}
}
out = m_decoder->toUnicode(data, len);
-#if APPLE_CHANGES
- } // need to balance braces for benefit of ChangeLog script
-#else
}
#endif
diff --git a/WebCore/kwq/KWQString.h b/WebCore/kwq/KWQString.h
index 81062f4..a03af37 100644
--- a/WebCore/kwq/KWQString.h
+++ b/WebCore/kwq/KWQString.h
@@ -337,7 +337,6 @@ public:
static QString fromLatin1(const char *, int len);
static QString fromUtf8(const char *);
static QString fromUtf8(const char *, int len);
- static QString fromStringWithEncoding(const char *, int, CFStringEncoding);
static QString fromCFString(CFStringRef);
static QString fromNSString(NSString *);
@@ -464,6 +463,7 @@ public:
void setBufferFromCFString(CFStringRef);
+ QString &append(const QChar *, uint length);
QString &prepend(const QChar *, uint length);
QString &insert(uint position, const QChar *, uint length);
@@ -479,8 +479,6 @@ private:
QChar *forceUnicode();
void setLength(uint);
- QCString convertToQCString(CFStringEncoding) const;
-
KWQStringData **dataHandle;
KWQStringData internalData;
diff --git a/WebCore/kwq/KWQString.mm b/WebCore/kwq/KWQString.mm
index 393f79b..74e53fa 100644
--- a/WebCore/kwq/KWQString.mm
+++ b/WebCore/kwq/KWQString.mm
@@ -32,6 +32,7 @@
#import "KWQLogging.h"
#import "KWQString.h"
#import "KWQRegExp.h"
+#import "KWQTextCodec.h"
#define CHECK_FOR_HANDLE_LEAKS 0
@@ -653,93 +654,12 @@ void QString::setBufferFromCFString(CFStringRef cfs)
QString QString::fromUtf8(const char *chs)
{
- return fromStringWithEncoding(chs, strlen(chs), kCFStringEncodingUTF8);
+ return QTextCodec(kCFStringEncodingUTF8).toUnicode(chs, strlen(chs));
}
QString QString::fromUtf8(const char *chs, int len)
{
- return fromStringWithEncoding(chs, len, kCFStringEncodingUTF8);
-}
-
-// This function is used by the decoder.
-QString QString::fromStringWithEncoding(const char *chs, int len, CFStringEncoding encoding)
-{
- ASSERT_ARG(len, len >= 0);
- ASSERT_ARG(encoding, encoding != kCFStringEncodingInvalidId);
-
- if (len <= 0) {
- return QString::null;
- }
-
- // Get a converter for the passed-in encoding.
- static TECObjectRef converter;
- static CFStringEncoding converterEncoding = kCFStringEncodingInvalidId;
- OSStatus status;
- if (encoding != converterEncoding) {
- TECObjectRef newConverter;
- status = TECCreateConverter(&newConverter, encoding,
- CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
- if (status) {
- ERROR("the Text Encoding Converter won't convert from text encoding 0x%X, error %d", encoding, status);
- return QString::null;
- }
- if (converter) {
- TECDisposeConverter(converter);
- }
- converter = newConverter;
- } else {
- TECClearConverterContextInfo(converter);
- }
-
- const UInt8 *sourcePointer = (UInt8 *)chs;
- unsigned long sourceLength = len;
-
- QString result;
- int resultLength = 0;
-
- for (;;) {
- UniChar buffer[4096];
- unsigned long bytesWritten = 0;
- bool doingFlush = sourceLength == 0;
- if (doingFlush) {
- status = TECFlushText(converter,
- (UInt8 *)buffer, sizeof(buffer), &bytesWritten);
- } else {
- unsigned long bytesRead = 0;
- status = TECConvertText(converter, sourcePointer, sourceLength, &bytesRead,
- (UInt8 *)buffer, sizeof(buffer), &bytesWritten);
- sourcePointer += bytesRead;
- sourceLength -= bytesRead;
- }
- if (bytesWritten) {
- ASSERT(bytesWritten % sizeof(UniChar) == 0);
- result.setLength(resultLength + bytesWritten / sizeof(UniChar));
- memcpy(result.forceUnicode() + resultLength, buffer, bytesWritten);
- resultLength += bytesWritten / sizeof(UniChar);
- }
- if (status == kTextMalformedInputErr || status == kTextUndefinedElementErr) {
- // FIXME: Put in FFFD character here?
- TECClearConverterContextInfo(converter);
- if (sourceLength) {
- sourcePointer += 1;
- sourceLength -= 1;
- }
- status = noErr;
- }
- if (status == kTECOutputBufferFullStatus) {
- continue;
- }
- if (status != noErr) {
- ERROR("text decoding failed with error %d", status);
- break;
- }
- if (doingFlush) {
- // Done.
- break;
- }
- }
-
- return result;
+ return QTextCodec(kCFStringEncodingUTF8).toUnicode(chs, len);
}
QString QString::fromCFString(CFStringRef cfs)
@@ -2072,6 +1992,10 @@ QString &QString::append(const QString &qs)
return insert(dataHandle[0]->_length, qs);
}
+QString &QString::append(const QChar *characters, uint length)
+{
+ return insert(dataHandle[0]->_length, characters, length);
+}
QString &QString::insert(uint index, const char *insertChars, uint insertLength)
{
diff --git a/WebCore/kwq/KWQTextCodec.h b/WebCore/kwq/KWQTextCodec.h
index 36e5211..452d1b1 100644
--- a/WebCore/kwq/KWQTextCodec.h
+++ b/WebCore/kwq/KWQTextCodec.h
@@ -37,7 +37,7 @@ public:
static QTextCodec *codecForName(const char *);
static QTextCodec *codecForLocale();
- QTextCodec(CFStringEncoding e) : encoding(e) { }
+ QTextCodec(CFStringEncoding e) : _encoding(e) { }
const char* name() const;
int mibEnum() const;
@@ -48,18 +48,16 @@ public:
QString toUnicode(const char *, int) const;
QString toUnicode(const QByteArray &, int) const;
- QString toUnicode(const char *) const;
+
+ CFStringEncoding encoding() const { return _encoding; }
private:
- CFStringEncoding encoding;
+ CFStringEncoding _encoding;
};
class QTextDecoder {
public:
- QTextDecoder(const QTextCodec *c) : textCodec(*c) { }
- QString toUnicode(const char *chs, int len) { return textCodec.toUnicode(chs, len); }
-private:
- QTextCodec textCodec;
+ virtual QString toUnicode(const char *, int) = 0;
};
#endif
diff --git a/WebCore/kwq/KWQTextCodec.mm b/WebCore/kwq/KWQTextCodec.mm
index e47131b..4d8127d 100644
--- a/WebCore/kwq/KWQTextCodec.mm
+++ b/WebCore/kwq/KWQTextCodec.mm
@@ -25,19 +25,38 @@
#import "KWQTextCodec.h"
+#import "KWQAssertions.h"
#import "KWQCharsets.h"
-static CFMutableDictionaryRef encodingToCodec = NULL;
+class KWQTextDecoder : public QTextDecoder {
+public:
+ KWQTextDecoder(const QTextCodec &c) : _codec(c), _state(atStart), _haveBufferedByte(false) { }
+ QString toUnicode(const char *chs, int len);
+
+private:
+ QString convertUTF16(const unsigned char *chs, int len);
+ QString convertUsingTEC(const UInt8 *chs, int len);
+
+ QTextCodec _codec;
+
+ // State for Unicode decoding.
+ enum UnicodeEndianState {
+ atStart,
+ littleEndian,
+ bigEndian
+ };
+ UnicodeEndianState _state;
+ bool _haveBufferedByte;
+ char _bufferedByte;
+};
static QTextCodec *codecForCFStringEncoding(CFStringEncoding encoding)
{
if (encoding == kCFStringEncodingInvalidId) {
- return NULL;
+ return 0;
}
- if (encodingToCodec == NULL) {
- encodingToCodec = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
- }
+ static CFMutableDictionaryRef encodingToCodec = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
const void *value;
if (CFDictionaryGetValueIfPresent(encodingToCodec, (void *)encoding, &value)) {
@@ -65,17 +84,17 @@ QTextCodec *QTextCodec::codecForLocale()
const char *QTextCodec::name() const
{
- return KWQCFStringEncodingToIANACharsetName(encoding);
+ return KWQCFStringEncodingToIANACharsetName(_encoding);
}
int QTextCodec::mibEnum() const
{
- return KWQCFStringEncodingToMIB(encoding);
+ return KWQCFStringEncodingToMIB(_encoding);
}
QTextDecoder *QTextCodec::makeDecoder() const
{
- return new QTextDecoder(this);
+ return new KWQTextDecoder(*this);
}
QCString QTextCodec::fromUnicode(const QString &qcs) const
@@ -83,24 +102,196 @@ QCString QTextCodec::fromUnicode(const QString &qcs) const
CFStringRef cfs = qcs.getCFString();
CFRange range = CFRangeMake(0, CFStringGetLength(cfs));
CFIndex bufferLength;
- CFStringGetBytes(cfs, range, encoding, '?', false, NULL, 0x7FFFFFFF, &bufferLength);
+ CFStringGetBytes(cfs, range, _encoding, '?', false, NULL, 0x7FFFFFFF, &bufferLength);
QCString result(bufferLength + 1);
- CFStringGetBytes(cfs, range, encoding, '?', false, (UInt8 *)result.data(), bufferLength, &bufferLength);
+ CFStringGetBytes(cfs, range, _encoding, '?', false, (UInt8 *)result.data(), bufferLength, &bufferLength);
result[bufferLength] = 0;
return result;
}
QString QTextCodec::toUnicode(const char *chs, int len) const
{
- return QString::fromStringWithEncoding(chs, len, encoding);
+ return KWQTextDecoder(*this).toUnicode(chs, len);
}
QString QTextCodec::toUnicode(const QByteArray &qba, int len) const
{
- return QString::fromStringWithEncoding(qba, len, encoding);
+ return KWQTextDecoder(*this).toUnicode(qba, len);
}
-QString QTextCodec::toUnicode(const char *chs) const
+QString KWQTextDecoder::convertUTF16(const unsigned char *s, int length)
{
- return QString::fromStringWithEncoding(chs, -1, encoding);
+ ASSERT(length > 0);
+
+ const unsigned char *p = s;
+ unsigned len = length;
+
+ // Check for the BOM.
+ if (_state == atStart) {
+ unsigned char bom0;
+ unsigned char bom1;
+ if (_haveBufferedByte) {
+ bom0 = _bufferedByte;
+ bom1 = p[0];
+ } else {
+ if (len == 1) {
+ _haveBufferedByte = true;
+ _bufferedByte = p[0];
+ return QString::null;
+ }
+ bom0 = p[0];
+ bom1 = p[1];
+ }
+ if (bom0 == 0xFF && bom1 == 0xFE) {
+ _state = littleEndian;
+ if (_haveBufferedByte) {
+ _haveBufferedByte = false;
+ p += 1;
+ len -= 1;
+ } else {
+ p += 2;
+ len -= 2;
+ }
+ } else if (bom0 == 0xFE && bom1 == 0xFF) {
+ _state = bigEndian;
+ if (_haveBufferedByte) {
+ _haveBufferedByte = false;
+ p += 1;
+ len -= 1;
+ } else {
+ p += 2;
+ len -= 2;
+ }
+ } else {
+ _state = bigEndian;
+ }
+ }
+
+ QString result;
+
+ if (_haveBufferedByte && len) {
+ UniChar c;
+ if (_state == littleEndian) {
+ c = _bufferedByte | (p[0] << 8);
+ } else {
+ c = (_bufferedByte << 8) | p[0];
+ }
+ result.append(reinterpret_cast<QChar *>(&c), 1);
+ _haveBufferedByte = false;
+ p += 1;
+ len -= 1;
+ }
+
+ while (len > 1) {
+ UniChar buffer[4096];
+ int runLength = MIN(len / 2, sizeof(buffer) / sizeof(buffer[0]));
+ if (_state == littleEndian) {
+ for (int i = 0; i < runLength; ++i) {
+ buffer[i] = p[0] | (p[1] << 8);
+ p += 2;
+ }
+ } else {
+ for (int i = 0; i < runLength; ++i) {
+ buffer[i] = (p[0] << 8) | p[1];
+ p += 2;
+ }
+ }
+ result.append(reinterpret_cast<QChar *>(buffer), runLength);
+ len -= runLength * 2;
+ }
+
+ if (len) {
+ _haveBufferedByte = true;
+ _bufferedByte = p[0];
+ }
+
+ return result;
+}
+
+QString KWQTextDecoder::convertUsingTEC(const UInt8 *chs, int len)
+{
+ // FIXME: This discards state between calls, which won't work for multibyte encodings.
+
+ // Get a converter for the passed-in encoding.
+ static TECObjectRef converter;
+ static CFStringEncoding converterEncoding = kCFStringEncodingInvalidId;
+ OSStatus status;
+ if (_codec.encoding() != converterEncoding) {
+ TECObjectRef newConverter;
+ status = TECCreateConverter(&newConverter, _codec.encoding(),
+ CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
+ if (status) {
+ ERROR("the Text Encoding Converter won't convert from text encoding 0x%X, error %d", _codec.encoding(), status);
+ return QString::null;
+ }
+ if (converter) {
+ TECDisposeConverter(converter);
+ }
+ converter = newConverter;
+ } else {
+ TECClearConverterContextInfo(converter);
+ }
+
+ QString result;
+
+ const UInt8 *sourcePointer = chs;
+ unsigned long sourceLength = len;
+
+ for (;;) {
+ UniChar buffer[4096];
+ unsigned long bytesWritten = 0;
+ bool doingFlush = sourceLength == 0;
+ if (doingFlush) {
+ status = TECFlushText(converter,
+ reinterpret_cast<UInt8 *>(buffer), sizeof(buffer), &bytesWritten);
+ } else {
+ unsigned long bytesRead = 0;
+ status = TECConvertText(converter, sourcePointer, sourceLength, &bytesRead,
+ reinterpret_cast<UInt8 *>(buffer), sizeof(buffer), &bytesWritten);
+ sourcePointer += bytesRead;
+ sourceLength -= bytesRead;
+ }
+ if (bytesWritten) {
+ ASSERT(bytesWritten % sizeof(UniChar) == 0);
+ result.append(reinterpret_cast<QChar *>(buffer), bytesWritten / sizeof(UniChar));
+ }
+ if (status == kTextMalformedInputErr || status == kTextUndefinedElementErr) {
+ // FIXME: Put in FFFD character here?
+ TECClearConverterContextInfo(converter);
+ if (sourceLength) {
+ sourcePointer += 1;
+ sourceLength -= 1;
+ }
+ status = noErr;
+ }
+ if (status == kTECOutputBufferFullStatus) {
+ continue;
+ }
+ if (status != noErr) {
+ ERROR("text decoding failed with error %d", status);
+ break;
+ }
+ if (doingFlush) {
+ // Done.
+ break;
+ }
+ }
+
+ return result;
+}
+
+QString KWQTextDecoder::toUnicode(const char *chs, int len)
+{
+ ASSERT_ARG(chs, chs);
+ ASSERT_ARG(len, len >= 0);
+
+ if (len <= 0) {
+ return QString::null;
+ }
+
+ if (_codec.encoding() == kCFStringEncodingUnicode) {
+ return convertUTF16(reinterpret_cast<const unsigned char *>(chs), len);
+ }
+
+ return convertUsingTEC(reinterpret_cast<const UInt8 *>(chs), len);
}
diff --git a/WebCore/kwq/WebCoreBridge.mm b/WebCore/kwq/WebCoreBridge.mm
index d76eb5b..d86e556 100644
--- a/WebCore/kwq/WebCoreBridge.mm
+++ b/WebCore/kwq/WebCoreBridge.mm
@@ -690,8 +690,7 @@ static bool initializedObjectCacheSize = FALSE;
if (textEncoding == kCFStringEncodingInvalidId || textEncoding == kCFStringEncodingISOLatin1) {
textEncoding = kCFStringEncodingWindowsLatin1;
}
- QString string = QString::fromStringWithEncoding((const char*)[data bytes], [data length], textEncoding);
- return string.getNSString();
+ return QTextCodec(textEncoding).toUnicode((const char*)[data bytes], [data length]).getNSString();
}
+ (NSString *)stringWithData:(NSData *)data textEncodingName:(NSString *)textEncodingName
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list