[Pkg-puppet-devel] [SCM] Puppet packaging for Debian branch, experimental, updated. debian/2.6.8-1-844-g7ec39d5

Jesse Wolfe jes5199 at gmail.com
Tue May 10 08:02:18 UTC 2011


The following commit has been merged in the experimental branch:
commit 095675711a89d836f4d0f10978ed5759b93fe76f
Author: Jesse Wolfe <jes5199 at gmail.com>
Date:   Mon Nov 22 15:17:51 2010 -0800

    Fix #5261 Don't escape Unicode characters in PSON
    
    This patch removes the escaping of valid UTF-8 sequences as "\uXXXX".
    This code was unreliable, as it relied on Iconv's ability to convert
    those codepoints between UTF-8 and UTF-16, but some versions of Iconv
    barf on some valid codepoints.
    
    Invalid UTF-8 sequences are still passed through unchanged. We believe
    that this is fine; if you are concerned about complience with the JSON
    standard, what we are doing is equivalent to:
      * interpreting binary files as Latin-1 encoded character sequences
      * JSON-encoding those characters according to RFC 4627
      * outputting the JSON as Latin-1
    This allows all raw binary files to be transmitted losslessly.
    
    Paired-With: Paul Berry <paul at puppetlabs.com>

diff --git a/lib/puppet/external/pson/pure/generator.rb b/lib/puppet/external/pson/pure/generator.rb
index 4180be5..89a0c62 100644
--- a/lib/puppet/external/pson/pure/generator.rb
+++ b/lib/puppet/external/pson/pure/generator.rb
@@ -44,34 +44,13 @@ module PSON
       string << '' # XXX workaround: avoid buffer sharing
       string.force_encoding(Encoding::ASCII_8BIT)
       string.gsub!(/["\\\x0-\x1f]/) { MAP[$MATCH] }
-      string.gsub!(/(
-        (?:
-          [\xc2-\xdf][\x80-\xbf]    |
-          [\xe0-\xef][\x80-\xbf]{2} |
-          [\xf0-\xf4][\x80-\xbf]{3}
-            )+ |
-            [\x80-\xc1\xf5-\xff]       # invalid
-              )/nx) { |c|
-                c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'"
-                s = PSON::UTF8toUTF16.iconv(c).unpack('H*')[0]
-                s.gsub!(/.{4}/n, '\\\\u\&')
-              }
-      string.force_encoding(Encoding::UTF_8)
       string
     rescue Iconv::Failure => e
       raise GeneratorError, "Caught #{e.class}: #{e}"
     end
   else
     def utf8_to_pson(string) # :nodoc:
-      string.
-        gsub(/["\\\x0-\x1f]/n) { MAP[$MATCH] }.
-        gsub(/((?:
-          [\xc2-\xdf][\x80-\xbf]    |
-          [\xe0-\xef][\x80-\xbf]{2} |
-          [\xf0-\xf4][\x80-\xbf]{3}
-            )+)/nx) { |c|
-        PSON::UTF8toUTF16.iconv(c).unpack('H*')[0].gsub(/.{4}/n, '\\\\u\&')
-      }
+      string.gsub(/["\\\x0-\x1f]/n) { MAP[$MATCH] }
     end
   end
   module_function :utf8_to_pson
diff --git a/spec/unit/util/pson_spec.rb b/spec/unit/util/pson_spec.rb
index d02d285..474ddaf 100755
--- a/spec/unit/util/pson_spec.rb
+++ b/spec/unit/util/pson_spec.rb
@@ -35,4 +35,19 @@ describe Puppet::Util::Pson do
     bin_string = (1..20000).collect { |i| ((17*i+13*i*i) % 255).chr }.join
     PSON.parse(%Q{{ "type": "foo", "data": #{bin_string.to_pson} }})["data"].should == bin_string
   end
+
+  it "should be able to handle UTF8 that isn't a real unicode character" do
+    s = ["\355\274\267"]
+    PSON.parse( [s].to_pson ).should == [s]
+  end
+
+  it "should be able to handle UTF8 for \\xFF" do
+    s = ["\xc3\xbf"]
+    PSON.parse( [s].to_pson ).should == [s]
+  end
+
+  it "should be able to handle invalid UTF8 bytes" do
+    s = ["\xc3\xc3"]
+    PSON.parse( [s].to_pson ).should == [s]
+  end
 end

-- 
Puppet packaging for Debian



More information about the Pkg-puppet-devel mailing list