[PATCH] Fix unwanted ISO-8859-1 -> UTF-8 conversion in CGI::Util::escape(). (Closes: #555733)

Wed Nov 11 19:07:17 UTC 2009

---
 debian/changelog                     |    7 +++
 debian/patches/cgi-util-escape.patch |   89 ++++++++++++++++++++++++++++++++++
 debian/patches/series                |    1 +
 3 files changed, 97 insertions(+), 0 deletions(-)
 create mode 100644 debian/patches/cgi-util-escape.patch

diff --git a/debian/changelog b/debian/changelog
index fc24cc2..69cedf9 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+libcgi-pm-perl (3.38-2lenny1) UNRELEASED; urgency=low
+
+  * Fix unwanted ISO-8859-1 -> UTF-8 conversion in CGI::Util::escape().
+    (Closes: #555733)
+
+ -- Niko Tyni <ntyni at debian.org>  Wed, 11 Nov 2009 21:06:40 +0200
+
 libcgi-pm-perl (3.38-2) unstable; urgency=medium
 
   * Do not ship CGI/Fast.pm, avoiding conflict with libcgi-fast-perl. Instead,
diff --git a/debian/patches/cgi-util-escape.patch b/debian/patches/cgi-util-escape.patch
new file mode 100644
index 0000000..5d7ccbb
--- /dev/null
+++ b/debian/patches/cgi-util-escape.patch
@@ -0,0 +1,89 @@
+fix unwanted ISO-8859-1 -> UTF-8 conversion in CGI::Util::escape().
+
+Closes: #555733
+
+Fix from upstream 3.43.
+
+diff --git a/CGI/Util.pm b/CGI/Util.pm
+index 9230eb9..c19fde5 100644
+--- a/CGI/Util.pm
++++ b/CGI/Util.pm
+@@ -211,16 +211,24 @@ sub unescape {
+ }
+ 
+ # URL-encode data
++#
++# We cannot use the %u escapes, they were rejected by W3C, so the official
++# way is %XX-escaped utf-8 encoding.
++# Naturally, Unicode strings have to be converted to their utf-8 byte
++# representation.  (No action is required on 5.6.)
++# Byte strings were traditionally used directly as a sequence of octets.
++# This worked if they actually represented binary data (i.e. in CGI::Compress).
++# This also worked if these byte strings were actually utf-8 encoded; e.g.,
++# when the source file used utf-8 without the apropriate "use utf8;".
++# This fails if the byte string is actually a Latin 1 encoded string, but it
++# was always so and cannot be fixed without breaking the binary data case.
++# -- Stepan Kasal <skasal at redhat.com>
++#
+ sub escape {
+   shift() if @_ > 1 and ( ref($_[0]) || (defined $_[1] && $_[0] eq $CGI::DefaultClass));
+   my $toencode = shift;
+   return undef unless defined($toencode);
+-  $toencode = eval { pack("C*", unpack("U0C*", $toencode))} || pack("C*", unpack("C*", $toencode));
+-
+-  # force bytes while preserving backward compatibility -- dankogai
+-  # but commented out because it was breaking CGI::Compress -- lstein
+-  # $toencode = eval { pack("U*", unpack("U0C*", $toencode))} || pack("C*", unpack("C*", $toencode));
+-
++  utf8::encode($toencode) if ($] > 5.007 && utf8::is_utf8($toencode));
+     if ($EBCDIC) {
+       $toencode=~s/([^a-zA-Z0-9_.~-])/uc sprintf("%%%02x",$E2A[ord($1)])/eg;
+     } else {
+diff --git a/t/util-58.t b/t/util-58.t
+index 70a6189..7dc81c5 100644
+--- a/t/util-58.t
++++ b/t/util-58.t
+@@ -1,16 +1,29 @@
++# test CGI::Util::escape
++use Test::More tests => 4;
++use_ok("CGI::Util");
++
++# Byte strings should be escaped byte by byte:
++# 1) not a valid utf-8 sequence:
++my $uri = "pe\x{f8}\x{ed}\x{e8}ko.ogg";
++is(CGI::Util::escape($uri), "pe%F8%ED%E8ko.ogg", "Escape a Latin-2 string");
++
++# 2) is a valid utf-8 sequence, but not an UTF-8-flagged string
++#    This happens often: people write utf-8 strings to source, but forget
++#    to tell perl about it by "use utf8;"--this is obviously wrong, but we
++#    have to handle it gracefully, for compatibility with GCI.pm under
++#    perl-5.8.x
+ #
++$uri = "pe\x{c5}\x{99}\x{c3}\x{ad}\x{c4}\x{8d}ko.ogg";
++is(CGI::Util::escape($uri), "pe%C5%99%C3%AD%C4%8Dko.ogg",
++	"Escape an utf-8 byte string");
++
++SKIP:
++{
+ # This tests CGI::Util::escape() when fed with UTF-8-flagged string
+ # -- dankogai
+-BEGIN {
+-    if ($] < 5.008) {
+-       print "1..0 # \$] == $] < 5.008\n";
+-       exit(0);
+-    }
+-}
+-
+-use Test::More tests => 2;
+-use_ok("CGI::Util");
+-my $uri = "\x{5c0f}\x{98fc} \x{5f3e}.txt"; # KOGAI, Dan, in Kanji
++	skip("Unicode strings not available in $]", 1) if ($] < 5.008);
++	$uri = "\x{5c0f}\x{98fc} \x{5f3e}.txt"; # KOGAI, Dan, in Kanji
+ is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt",
+-   "# Escape string with UTF-8 flag");
++   		"Escape string with UTF-8 flag");
++}
+ __END__
+-- 
+1.5.6.5
+
diff --git a/debian/patches/series b/debian/patches/series
index e27d884..2d6b8c2 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1 +1,2 @@
 man-cgi-fast.patch
+cgi-util-escape.patch
-- 
1.5.6.5


--4Ckj6UjgE2iN1+kY--