[SCM] WebKit Debian packaging branch, debian/unstable, updated. debian/1.1.15-1-40151-g37bb677
darin
darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc
Sat Sep 26 07:47:18 UTC 2009
The following commit has been merged in the debian/unstable branch:
commit d3fe47015f29129c9c409ded77210bd3fb748dc7
Author: darin <darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Sun Jul 13 21:41:19 2003 +0000
Reviewed by Maciej.
- fixed 3132021 -- certain Japanese characters are misrendered because of incorrect partial-character handling
We didn't have code to handle kTECPartialCharErr.
* kwq/KWQTextCodec.mm:
(KWQTextDecoder::createTECConverter): Added. Broken out from convertUsingTEC.
(KWQTextDecoder::appendOmittingBOMs): Added. Broken out from convertUsingTEC.
(KWQTextDecoder::convertOneChunkUsingTEC): Added. Broken out from convertUsingTEC.
This also has much of the partially-decoded character handling.
(KWQTextDecoder::convertUsingTEC): Simplified by breaking into functions, and added
handling to use the same buffer we use for the other decoders to hold partially-decoded
characters.
(KWQTextDecoder::convert): Add a partial-character handling test mode where the decoder
is passed only a single byte at a time; controlled by an #if.
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@4640 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/ChangeLog-2003-10-25 b/WebCore/ChangeLog-2003-10-25
index d8edf0b..7262517 100644
--- a/WebCore/ChangeLog-2003-10-25
+++ b/WebCore/ChangeLog-2003-10-25
@@ -1,3 +1,22 @@
+2003-07-11 Darin Adler <darin at apple.com>
+
+ Reviewed by Maciej.
+
+ - fixed 3132021 -- certain Japanese characters are misrendered because of incorrect partial-character handling
+
+ We didn't have code to handle kTECPartialCharErr.
+
+ * kwq/KWQTextCodec.mm:
+ (KWQTextDecoder::createTECConverter): Added. Broken out from convertUsingTEC.
+ (KWQTextDecoder::appendOmittingBOMs): Added. Broken out from convertUsingTEC.
+ (KWQTextDecoder::convertOneChunkUsingTEC): Added. Broken out from convertUsingTEC.
+ This also has much of the partially-decoded character handling.
+ (KWQTextDecoder::convertUsingTEC): Simplified by breaking into functions, and added
+ handling to use the same buffer we use for the other decoders to hold partially-decoded
+ characters.
+ (KWQTextDecoder::convert): Add a partial-character handling test mode where the decoder
+ is passed only a single byte at a time; controlled by an #if.
+
2003-07-11 Dave Hyatt <hyatt at apple.com>
Fix for 3187101, before/after content not dynamic. This patch
diff --git a/WebCore/ChangeLog-2005-08-23 b/WebCore/ChangeLog-2005-08-23
index d8edf0b..7262517 100644
--- a/WebCore/ChangeLog-2005-08-23
+++ b/WebCore/ChangeLog-2005-08-23
@@ -1,3 +1,22 @@
+2003-07-11 Darin Adler <darin at apple.com>
+
+ Reviewed by Maciej.
+
+ - fixed 3132021 -- certain Japanese characters are misrendered because of incorrect partial-character handling
+
+ We didn't have code to handle kTECPartialCharErr.
+
+ * kwq/KWQTextCodec.mm:
+ (KWQTextDecoder::createTECConverter): Added. Broken out from convertUsingTEC.
+ (KWQTextDecoder::appendOmittingBOMs): Added. Broken out from convertUsingTEC.
+ (KWQTextDecoder::convertOneChunkUsingTEC): Added. Broken out from convertUsingTEC.
+ This also has much of the partially-decoded character handling.
+ (KWQTextDecoder::convertUsingTEC): Simplified by breaking into functions, and added
+ handling to use the same buffer we use for the other decoders to hold partially-decoded
+ characters.
+ (KWQTextDecoder::convert): Add a partial-character handling test mode where the decoder
+ is passed only a single byte at a time; controlled by an #if.
+
2003-07-11 Dave Hyatt <hyatt at apple.com>
Fix for 3187101, before/after content not dynamic. This patch
diff --git a/WebCore/kwq/KWQTextCodec.mm b/WebCore/kwq/KWQTextCodec.mm
index 0024f46..8bfc304 100644
--- a/WebCore/kwq/KWQTextCodec.mm
+++ b/WebCore/kwq/KWQTextCodec.mm
@@ -38,9 +38,16 @@ public:
QString toUnicode(const char *chs, int len, bool flush);
private:
- QString convert(const char *chs, int len, bool flush);
+ QString convert(const char *chs, int len, bool flush)
+ { return convert(reinterpret_cast<const unsigned char *>(chs), len, flush); }
+ QString convert(const unsigned char *chs, int len, bool flush);
QString convertUTF16(const unsigned char *chs, int len);
- QString convertUsingTEC(const UInt8 *chs, int len, bool flush);
+ QString convertUsingTEC(const unsigned char *chs, int len, bool flush);
+
+ OSStatus createTECConverter();
+ OSStatus convertOneChunkUsingTEC(const unsigned char *inputBuffer, int inputBufferLength, int &inputLength,
+ void *outputBuffer, int outputBufferLength, int &outputLength);
+ static void appendOmittingBOMs(QString &s, const UniChar *characters, int byteCount);
KWQTextDecoder(const KWQTextDecoder &);
KWQTextDecoder &operator=(const KWQTextDecoder &);
@@ -48,9 +55,11 @@ private:
CFStringEncoding _encoding;
bool _littleEndian;
bool _atStart;
- int _numBufferedBytes;
- char _bufferedBytes[2];
-
+ bool _error;
+
+ unsigned _numBufferedBytes;
+ unsigned char _bufferedBytes[16]; // bigger than any single multi-byte character
+
// State for TEC decoding.
TECObjectRef _converter;
static TECObjectRef _cachedConverter;
@@ -144,7 +153,7 @@ QCString QTextCodec::fromUnicode(const QString &qcs) const
CFIndex bufferLength;
CFStringGetBytes(cfs, range, encoding, '?', FALSE, NULL, 0x7FFFFFFF, &bufferLength);
QCString result(bufferLength + 1);
- CFStringGetBytes(cfs, range, encoding, '?', FALSE, reinterpret_cast<UInt8 *>(result.data()), bufferLength, &bufferLength);
+ CFStringGetBytes(cfs, range, encoding, '?', FALSE, reinterpret_cast<unsigned char *>(result.data()), bufferLength, &bufferLength);
result[bufferLength] = 0;
return result;
}
@@ -211,7 +220,8 @@ QTextDecoder::~QTextDecoder()
// ================
KWQTextDecoder::KWQTextDecoder(CFStringEncoding e, KWQEncodingFlags f)
- : _encoding(e), _littleEndian(f & ::LittleEndian), _atStart(true), _numBufferedBytes(0), _converter(0)
+ : _encoding(e), _littleEndian(f & ::LittleEndian), _atStart(true), _error(false)
+ , _numBufferedBytes(0), _converter(0)
{
}
@@ -284,122 +294,195 @@ QString KWQTextDecoder::convertUTF16(const unsigned char *s, int length)
return result;
}
-QString KWQTextDecoder::convertUsingTEC(const UInt8 *chs, int len, bool flush)
+OSStatus KWQTextDecoder::createTECConverter()
{
- OSStatus status;
+ const CFStringEncoding encoding = effectiveEncoding(_encoding);
+
+ if (_cachedConverterEncoding == encoding) {
+ _converter = _cachedConverter;
+ _cachedConverter = 0;
+ _cachedConverterEncoding = kCFStringEncodingInvalidId;
+ TECClearConverterContextInfo(_converter);
+ } else {
+ OSStatus status = TECCreateConverter(&_converter, encoding,
+ CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
+ if (status) {
+ ERROR("the Text Encoding Converter won't convert from text encoding 0x%X, error %d", encoding, status);
+ return status;
+ }
+
+ TECSetBasicOptions(_converter, kUnicodeForceASCIIRangeMask);
+ }
- CFStringEncoding encoding = effectiveEncoding(_encoding);
+ return noErr;
+}
- // Get a converter for the passed-in encoding.
- if (!_converter) {
- if (_cachedConverterEncoding == encoding) {
- _converter = _cachedConverter;
- _cachedConverter = 0;
- _cachedConverterEncoding = kCFStringEncodingInvalidId;
- TECClearConverterContextInfo(_converter);
- } else {
- status = TECCreateConverter(&_converter, encoding,
- CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
- if (status) {
- ERROR("the Text Encoding Converter won't convert from text encoding 0x%X, error %d", encoding, status);
- return QString();
+void KWQTextDecoder::appendOmittingBOMs(QString &s, const UniChar *characters, int byteCount)
+{
+ ASSERT(byteCount % sizeof(UniChar) == 0);
+ int start = 0;
+ int characterCount = byteCount / sizeof(UniChar);
+ for (int i = 0; i != characterCount; ++i) {
+ if (characters[i] == BOM) {
+ if (start != i) {
+ s.append(reinterpret_cast<const QChar *>(&characters[start]), i - start);
}
+ start = i + 1;
+ }
+ }
+ if (start != characterCount) {
+ s.append(reinterpret_cast<const QChar *>(&characters[start]), characterCount - start);
+ }
+}
+
+OSStatus KWQTextDecoder::convertOneChunkUsingTEC(const unsigned char *inputBuffer, int inputBufferLength, int &inputLength,
+ void *outputBuffer, int outputBufferLength, int &outputLength)
+{
+ OSStatus status;
+ unsigned long bytesRead = 0;
+ unsigned long bytesWritten = 0;
- TECSetBasicOptions(_converter, kUnicodeForceASCIIRangeMask);
+ if (_numBufferedBytes != 0) {
+ // Finish converting a partial character that's in our buffer.
+
+ // First, fill the partial character buffer with as many bytes as are available.
+ ASSERT(_numBufferedBytes < sizeof(_bufferedBytes));
+ const int spaceInBuffer = sizeof(_bufferedBytes) - _numBufferedBytes;
+ const int bytesToPutInBuffer = MIN(spaceInBuffer, inputBufferLength);
+ ASSERT(bytesToPutInBuffer != 0);
+ memcpy(_bufferedBytes + _numBufferedBytes, inputBuffer, bytesToPutInBuffer);
+
+ // Now, do a conversion on the buffer.
+ status = TECConvertText(_converter, _bufferedBytes, _numBufferedBytes + bytesToPutInBuffer, &bytesRead,
+ reinterpret_cast<unsigned char *>(outputBuffer), outputBufferLength, &bytesWritten);
+
+ if (status == kTECPartialCharErr && bytesRead == 0) {
+ // Handle the case where the partial character was not converted.
+ if (bytesToPutInBuffer >= spaceInBuffer) {
+ ERROR("TECConvertText gave a kTECPartialCharErr but read none of the %u bytes in the buffer", sizeof(_bufferedBytes));
+ _numBufferedBytes = 0;
+ status = kTECUnmappableElementErr; // should never happen, but use this error code
+ } else {
+ // Tell the caller we read all the source bytes and keep them in the buffer.
+ _numBufferedBytes += bytesToPutInBuffer;
+ bytesRead = bytesToPutInBuffer;
+ status = noErr;
+ }
+ } else {
+ // We are done with the partial character buffer.
+ // Also, we have read some of the bytes from the main buffer.
+ if (bytesRead > _numBufferedBytes) {
+ bytesRead -= _numBufferedBytes;
+ } else {
+ ERROR("TECConvertText accepted some bytes it previously rejected with kTECPartialCharErr");
+ bytesRead = 0;
+ }
+ _numBufferedBytes = 0;
}
+ } else {
+ status = TECConvertText(_converter, inputBuffer, inputBufferLength, &bytesRead,
+ static_cast<unsigned char *>(outputBuffer), outputBufferLength, &bytesWritten);
+ }
+
+ inputLength = bytesRead;
+ outputLength = bytesWritten;
+ return status;
+}
+
+QString KWQTextDecoder::convertUsingTEC(const unsigned char *chs, int len, bool flush)
+{
+ // Get a converter for the passed-in encoding.
+ if (!_converter && createTECConverter() != noErr) {
+ return QString();
}
QString result;
- const UInt8 *sourcePointer = chs;
- unsigned long sourceLength = len;
-
- for (;;) {
- UniChar buffer[4096];
- unsigned long bytesWritten = 0;
- bool doingFlush = false;
+ const unsigned char *sourcePointer = chs;
+ int sourceLength = len;
+ UniChar buffer[4096];
+
+ while (sourceLength) {
+ int bytesRead;
+ int bytesWritten;
+ OSStatus status = convertOneChunkUsingTEC(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten);
+ ASSERT(bytesRead <= sourceLength);
+ sourcePointer += bytesRead;
+ sourceLength -= bytesRead;
- if (sourceLength == 0) {
- if (!flush) {
- // Done.
+ switch (status) {
+ case noErr:
+ case kTECOutputBufferFullStatus:
break;
- }
- doingFlush = true;
- }
-
- if (doingFlush) {
- status = TECFlushText(_converter,
- reinterpret_cast<UInt8 *>(buffer), sizeof(buffer), &bytesWritten);
- } else {
- unsigned long bytesRead = 0;
- status = TECConvertText(_converter, sourcePointer, sourceLength, &bytesRead,
- reinterpret_cast<UInt8 *>(buffer), sizeof(buffer), &bytesWritten);
- sourcePointer += bytesRead;
- sourceLength -= bytesRead;
- }
- if (bytesWritten) {
- ASSERT(bytesWritten % sizeof(UniChar) == 0);
- int start = 0;
- int characterCount = bytesWritten / sizeof(UniChar);
- for (int i = 0; i != characterCount; ++i) {
- if (buffer[i] == BOM) {
- if (start != i) {
- result.append(reinterpret_cast<QChar *>(&buffer[start]), i - start);
- }
- start = i + 1;
+ case kTextMalformedInputErr:
+ case kTextUndefinedElementErr:
+ // FIXME: Put in FFFD character into the output string?
+ TECClearConverterContextInfo(_converter);
+ if (sourceLength) {
+ sourcePointer += 1;
+ sourceLength -= 1;
}
+ break;
+ case kTECPartialCharErr: {
+ // Put the partial character into the buffer.
+ ASSERT(_numBufferedBytes == 0);
+ const int bufferSize = sizeof(_numBufferedBytes);
+ if (sourceLength < bufferSize) {
+ memcpy(_bufferedBytes, sourcePointer, sourceLength);
+ _numBufferedBytes = sourceLength;
+ } else {
+ ERROR("TECConvertText gave a kTECPartialCharErr, but left %u bytes in the buffer", sourceLength);
+ }
+ sourceLength = 0;
+ break;
}
- if (start != characterCount) {
- result.append(reinterpret_cast<QChar *>(&buffer[start]), characterCount - start);
- }
- }
- if (status == kTextMalformedInputErr || status == kTextUndefinedElementErr) {
- // FIXME: Put in FFFD character here?
- TECClearConverterContextInfo(_converter);
- if (sourceLength) {
- sourcePointer += 1;
- sourceLength -= 1;
- }
- status = noErr;
- }
- if (status == kTECOutputBufferFullStatus) {
- continue;
- }
- if (status != noErr) {
- ERROR("text decoding failed with error %d", status);
- break;
- }
-
- if (doingFlush) {
- // Done.
- break;
+ default:
+ ERROR("text decoding failed with error %d", status);
+ _error = true;
+ return QString();
}
+
+ appendOmittingBOMs(result, buffer, bytesWritten);
}
+ if (flush) {
+ unsigned long bytesWritten = 0;
+ TECFlushText(_converter, reinterpret_cast<unsigned char *>(buffer), sizeof(buffer), &bytesWritten);
+ appendOmittingBOMs(result, buffer, bytesWritten);
+ }
+
// Workaround for a bug in the Text Encoding Converter (see bug 3225472).
// Simplified Chinese pages use the code U+A3A0 to mean "full-width space".
// But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
// To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
- if (encoding == kCFStringEncodingGB_18030_2000) {
+ if (_encoding == kCFStringEncodingGB_18030_2000) {
result.replace(0xE5E5, 0x3000);
}
return result;
}
-QString KWQTextDecoder::convert(const char *chs, int len, bool flush)
+QString KWQTextDecoder::convert(const unsigned char *chs, int len, bool flush)
{
if (_encoding == kCFStringEncodingUnicode) {
- return convertUTF16(reinterpret_cast<const unsigned char *>(chs), len);
+ return convertUTF16(chs, len);
}
- return convertUsingTEC(reinterpret_cast<const UInt8 *>(chs), len, flush);
+
+#if TEST_PARTIAL_CHARACTER_HANDLING
+ QString result;
+ for (int i = 0; i != len; ++i)
+ result += convertUsingTEC(chs + i, 1, flush && i == len - 1);
+ return result;
+#else
+ return convertUsingTEC(chs, len, flush);
+#endif
}
QString KWQTextDecoder::toUnicode(const char *chs, int len, bool flush)
{
ASSERT_ARG(len, len >= 0);
- if (!chs || len <= 0) {
+ if (_error || !chs || len <= 0) {
return QString();
}
@@ -412,8 +495,8 @@ QString KWQTextDecoder::toUnicode(const char *chs, int len, bool flush)
int numBufferedBytes = _numBufferedBytes;
int buf1Len = numBufferedBytes;
int buf2Len = len;
- const char *buf1 = _bufferedBytes;
- const char *buf2 = chs;
+ const unsigned char *buf1 = _bufferedBytes;
+ const unsigned char *buf2 = reinterpret_cast<const unsigned char *>(chs);
unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
@@ -447,7 +530,7 @@ QString KWQTextDecoder::toUnicode(const char *chs, int len, bool flush)
if (numBufferedBytes == 0) {
return convert(chs, len, flush);
}
- char bufferedBytes[sizeof(_bufferedBytes)];
+ unsigned char bufferedBytes[sizeof(_bufferedBytes)];
memcpy(bufferedBytes, _bufferedBytes, numBufferedBytes);
_numBufferedBytes = 0;
return convert(bufferedBytes, numBufferedBytes, false) + convert(chs, len, flush);
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list