r31022 - in /branches/upstream/libjson-xs-perl/current: Changes MANIFEST META.yml README XS.pm XS.xs t/20_faihu.t t/99_binary.t

ansgar-guest at users.alioth.debian.org ansgar-guest at users.alioth.debian.org
Sun Feb 22 14:06:06 UTC 2009


Author: ansgar-guest
Date: Sun Feb 22 14:06:01 2009
New Revision: 31022

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=31022
Log:
[svn-upgrade] Integrating new upstream version, libjson-xs-perl (2.232)

Added:
    branches/upstream/libjson-xs-perl/current/t/20_faihu.t
Modified:
    branches/upstream/libjson-xs-perl/current/Changes
    branches/upstream/libjson-xs-perl/current/MANIFEST
    branches/upstream/libjson-xs-perl/current/META.yml
    branches/upstream/libjson-xs-perl/current/README
    branches/upstream/libjson-xs-perl/current/XS.pm
    branches/upstream/libjson-xs-perl/current/XS.xs
    branches/upstream/libjson-xs-perl/current/t/99_binary.t

Modified: branches/upstream/libjson-xs-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libjson-xs-perl/current/Changes?rev=31022&op=diff
==============================================================================
--- branches/upstream/libjson-xs-perl/current/Changes (original)
+++ branches/upstream/libjson-xs-perl/current/Changes Sun Feb 22 14:06:01 2009
@@ -1,11 +1,24 @@
 Revision history for Perl extension JSON::XS
+
+2.232 Sun Feb 22 11:12:25 CET 2009
+	- use an exponential algorithm to extend strings, to
+          help platforms with bad or abysmal==windows memory
+          allocater performance, at the expense of some memory
+          wastage (use shrink to recover this extra memory).
+          (nicely analysed by Dmitry Karasik).
+
+2.2311 Thu Feb 19 02:12:54 CET 2009
+        - add a section "JSON and ECMAscript" to explain some
+          incompatibilities between the two (problem was noted by
+          various people).
+	- add t/20_faihu.t.
 
 2.231 Thu Nov 20 04:59:08 CET 2008
 	- work around 5.10.0 magic bugs where manipulating magic values
           (such as $1) would permanently damage them as perl would
           ignore the magicalness, by making a full copy of the string,
           reported by Dmitry Karasik.
-        - work around spurious wanrings under older perl 5.8's.
+        - work around spurious warnings under older perl 5.8's.
 
 2.23 Mon Sep 29 05:08:29 CEST 2008
 	- fix a compilation problem when perl is not using char * as, well,

Modified: branches/upstream/libjson-xs-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libjson-xs-perl/current/MANIFEST?rev=31022&op=diff
==============================================================================
--- branches/upstream/libjson-xs-perl/current/MANIFEST (original)
+++ branches/upstream/libjson-xs-perl/current/MANIFEST Sun Feb 22 14:06:01 2009
@@ -28,6 +28,7 @@
 t/17_relaxed.t
 t/18_json_checker.t
 t/19_incr.t
+t/20_faihu.t
 t/99_binary.t
 typemap
 META.yml                                 Module meta-data (added by MakeMaker)

Modified: branches/upstream/libjson-xs-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libjson-xs-perl/current/META.yml?rev=31022&op=diff
==============================================================================
--- branches/upstream/libjson-xs-perl/current/META.yml (original)
+++ branches/upstream/libjson-xs-perl/current/META.yml Sun Feb 22 14:06:01 2009
@@ -1,12 +1,18 @@
 --- #YAML:1.0
-name:                JSON-XS
-version:             2.231
-abstract:            ~
-license:             ~
-author:              ~
-generated_by:        ExtUtils::MakeMaker version 6.42
-distribution_type:   module
-requires:     
+name:               JSON-XS
+version:            2.232
+abstract:           ~
+author:  []
+license:            unknown
+distribution_type:  module
+configure_requires:
+    ExtUtils::MakeMaker:  0
+requires:  {}
+no_index:
+    directory:
+        - t
+        - inc
+generated_by:       ExtUtils::MakeMaker version 6.48
 meta-spec:
-    url:     http://module-build.sourceforge.net/META-spec-v1.3.html
-    version: 1.3
+    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
+    version:  1.4

Modified: branches/upstream/libjson-xs-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libjson-xs-perl/current/README?rev=31022&op=diff
==============================================================================
--- branches/upstream/libjson-xs-perl/current/README (original)
+++ branches/upstream/libjson-xs-perl/current/README Sun Feb 22 14:06:01 2009
@@ -686,16 +686,19 @@
 
     $json->incr_skip
         This will reset the state of the incremental parser and will remove
-        the parsed text from the input buffer. This is useful after
+        the parsed text from the input buffer so far. This is useful after
         "incr_parse" died, in which case the input buffer and incremental
         parser state is left unchanged, to skip the text parsed so far and
         to reset the parse state.
 
+        The difference to "incr_reset" is that only text until the parse
+        error occured is removed.
+
     $json->incr_reset
         This completely resets the incremental parser, that is, after this
         call, it will be as if the parser had never parsed anything.
 
-        This is useful if you want ot repeatedly parse JSON objects and want
+        This is useful if you want to repeatedly parse JSON objects and want
         to ignore any trailing data, which means you have to reset the
         parser after each successful decode.
 
@@ -1070,6 +1073,69 @@
         is not 8-bit clean or the encoding might be mangled in between (e.g.
         in mail), and works because ASCII is a proper subset of most 8-bit
         and multibyte encodings in use in the world.
+
+  JSON and ECMAscript
+    JSON syntax is based on how literals are represented in javascript (the
+    not-standardised predecessor of ECMAscript) which is presumably why it
+    is called "JavaScript Object Notation".
+
+    However, JSON is not a subset (and also not a superset of course) of
+    ECMAscript (the standard) or javascript (whatever browsers actually
+    implement).
+
+    If you want to use javascript's "eval" function to "parse" JSON, you
+    might run into parse errors for valid JSON texts, or the resulting data
+    structure might not be queryable:
+
+    One of the problems is that U+2028 and U+2029 are valid characters
+    inside JSON strings, but are not allowed in ECMAscript string literals,
+    so the following Perl fragment will not output something that can be
+    guaranteed to be parsable by javascript's "eval":
+
+       use JSON::XS;
+
+       print encode_json [chr 0x2028];
+
+    The right fix for this is to use a proper JSON parser in your javascript
+    programs, and not rely on "eval" (see for example Douglas Crockford's
+    json2.js parser).
+
+    If this is not an option, you can, as a stop-gap measure, simply encode
+    to ASCII-only JSON:
+
+       use JSON::XS;
+
+       print JSON::XS->new->ascii->encode ([chr 0x2028]);
+
+    Note that this will enlarge the resulting JSON text quite a bit if you
+    have many non-ASCII characters. You might be tempted to run some regexes
+    to only escape U+2028 and U+2029, e.g.:
+
+       # DO NOT USE THIS!
+       my $json = JSON::XS->new->utf8->encode ([chr 0x2028]);
+       $json =~ s/\xe2\x80\xa8/\\u2028/g; # escape U+2028
+       $json =~ s/\xe2\x80\xa9/\\u2029/g; # escape U+2029
+       print $json;
+
+    Note that *this is a bad idea*: the above only works for U+2028 and
+    U+2029 and thus only for fully ECMAscript-compliant parsers. Many
+    existing javascript implementations, however, have issues with other
+    characters as well - using "eval" naively simply *will* cause problems.
+
+    Another problem is that some javascript implementations reserve some
+    property names for their own purposes (which probably makes them
+    non-ECMAscript-compliant). For example, Iceweasel reserves the
+    "__proto__" property name for it's own purposes.
+
+    If that is a problem, you could parse try to filter the resulting JSON
+    output for these property strings, e.g.:
+
+       $json =~ s/"__proto__"\s*:/"__proto__renamed":/g;
+
+    This works because "__proto__" is not valid outside of strings, so every
+    occurence of ""__proto__"\s*:" must be a string used as property name.
+
+    If you know of other incompatibilities, please let me know.
 
   JSON and YAML
     You often hear that JSON is a subset of YAML. This is, however, a mass

Modified: branches/upstream/libjson-xs-perl/current/XS.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libjson-xs-perl/current/XS.pm?rev=31022&op=diff
==============================================================================
--- branches/upstream/libjson-xs-perl/current/XS.pm (original)
+++ branches/upstream/libjson-xs-perl/current/XS.pm Sun Feb 22 14:06:01 2009
@@ -104,7 +104,7 @@
 no warnings;
 use strict;
 
-our $VERSION = '2.231';
+our $VERSION = '2.232';
 our @ISA = qw(Exporter);
 
 our @EXPORT = qw(encode_json decode_json to_json from_json);
@@ -768,17 +768,21 @@
 
 =item $json->incr_skip
 
-This will reset the state of the incremental parser and will remove the
-parsed text from the input buffer. This is useful after C<incr_parse>
-died, in which case the input buffer and incremental parser state is left
-unchanged, to skip the text parsed so far and to reset the parse state.
+This will reset the state of the incremental parser and will remove
+the parsed text from the input buffer so far. This is useful after
+C<incr_parse> died, in which case the input buffer and incremental parser
+state is left unchanged, to skip the text parsed so far and to reset the
+parse state.
+
+The difference to C<incr_reset> is that only text until the parse error
+occured is removed.
 
 =item $json->incr_reset
 
 This completely resets the incremental parser, that is, after this call,
 it will be as if the parser had never parsed anything.
 
-This is useful if you want ot repeatedly parse JSON objects and want to
+This is useful if you want to repeatedly parse JSON objects and want to
 ignore any trailing data, which means you have to reset the parser after
 each successful decode.
 
@@ -1181,6 +1185,71 @@
 proper subset of most 8-bit and multibyte encodings in use in the world.
 
 =back
+
+
+=head2 JSON and ECMAscript
+
+JSON syntax is based on how literals are represented in javascript (the
+not-standardised predecessor of ECMAscript) which is presumably why it is
+called "JavaScript Object Notation".
+
+However, JSON is not a subset (and also not a superset of course) of
+ECMAscript (the standard) or javascript (whatever browsers actually
+implement).
+
+If you want to use javascript's C<eval> function to "parse" JSON, you
+might run into parse errors for valid JSON texts, or the resulting data
+structure might not be queryable:
+
+One of the problems is that U+2028 and U+2029 are valid characters inside
+JSON strings, but are not allowed in ECMAscript string literals, so the
+following Perl fragment will not output something that can be guaranteed
+to be parsable by javascript's C<eval>:
+
+   use JSON::XS;
+
+   print encode_json [chr 0x2028];
+
+The right fix for this is to use a proper JSON parser in your javascript
+programs, and not rely on C<eval> (see for example Douglas Crockford's
+F<json2.js> parser).
+
+If this is not an option, you can, as a stop-gap measure, simply encode to
+ASCII-only JSON:
+
+   use JSON::XS;
+
+   print JSON::XS->new->ascii->encode ([chr 0x2028]);
+
+Note that this will enlarge the resulting JSON text quite a bit if you
+have many non-ASCII characters. You might be tempted to run some regexes
+to only escape U+2028 and U+2029, e.g.:
+
+   # DO NOT USE THIS!
+   my $json = JSON::XS->new->utf8->encode ([chr 0x2028]);
+   $json =~ s/\xe2\x80\xa8/\\u2028/g; # escape U+2028
+   $json =~ s/\xe2\x80\xa9/\\u2029/g; # escape U+2029
+   print $json;
+
+Note that I<this is a bad idea>: the above only works for U+2028 and
+U+2029 and thus only for fully ECMAscript-compliant parsers. Many existing
+javascript implementations, however, have issues with other characters as
+well - using C<eval> naively simply I<will> cause problems.
+
+Another problem is that some javascript implementations reserve
+some property names for their own purposes (which probably makes
+them non-ECMAscript-compliant). For example, Iceweasel reserves the
+C<__proto__> property name for it's own purposes.
+
+If that is a problem, you could parse try to filter the resulting JSON
+output for these property strings, e.g.:
+
+   $json =~ s/"__proto__"\s*:/"__proto__renamed":/g;
+
+This works because C<__proto__> is not valid outside of strings, so every
+occurence of C<"__proto__"\s*:> must be a string used as property name.
+
+If you know of other incompatibilities, please let me know.
 
 
 =head2 JSON and YAML

Modified: branches/upstream/libjson-xs-perl/current/XS.xs
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libjson-xs-perl/current/XS.xs?rev=31022&op=diff
==============================================================================
--- branches/upstream/libjson-xs-perl/current/XS.xs (original)
+++ branches/upstream/libjson-xs-perl/current/XS.xs Sun Feb 22 14:06:01 2009
@@ -197,7 +197,7 @@
   if (expect_false (enc->cur + len >= enc->end))
     {
       STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
-      SvGROW (enc->sv, cur + len + 1);
+      SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
       enc->cur = SvPVX (enc->sv) + cur;
       enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
     }
@@ -972,7 +972,11 @@
 
         if (sv)
           {
-            SvGROW (sv, SvCUR (sv) + len + 1);
+            STRLEN cur = SvCUR (sv);
+
+            if (SvLEN (sv) <= cur + len)
+              SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
+
             memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
             SvCUR_set (sv, SvCUR (sv) + len);
           }
@@ -1807,7 +1811,11 @@
             {
               STRLEN len;
               const char *str = SvPV (jsonstr, len);
-              SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1);
+              STRLEN cur = SvCUR (self->incr_text);
+
+              if (SvLEN (self->incr_text) <= cur + len)
+                SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
+
               Move (str, SvEND (self->incr_text), len, char);
               SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len);
               *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there

Added: branches/upstream/libjson-xs-perl/current/t/20_faihu.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libjson-xs-perl/current/t/20_faihu.t?rev=31022&op=file
==============================================================================
--- branches/upstream/libjson-xs-perl/current/t/20_faihu.t (added)
+++ branches/upstream/libjson-xs-perl/current/t/20_faihu.t Sun Feb 22 14:06:01 2009
@@ -1,0 +1,28 @@
+#! perl
+
+# adapted from a test by Aristotle Pagaltzis (http://intertwingly.net/blog/2007/11/15/Astral-Plane-Characters-in-Json)
+
+use strict;
+use warnings;
+
+use JSON::XS;
+use Encode qw(encode decode);
+
+use Test::More tests => 3;
+
+my ($faihu, $faihu_json, $roundtrip, $js) = "\x{10346}";
+
+$js = JSON::XS->new->allow_nonref->ascii;
+$faihu_json = $js->encode($faihu);
+$roundtrip = $js->decode($faihu_json);
+is ($roundtrip, $faihu, 'JSON in ASCII roundtrips correctly');
+
+$js = JSON::XS->new->allow_nonref->utf8;
+$faihu_json = $js->encode ($faihu);
+$roundtrip = $js->decode ($faihu_json);
+is ($roundtrip, $faihu, 'JSON in UTF-8 roundtrips correctly');
+
+$js = JSON::XS->new->allow_nonref;
+$faihu_json = encode 'UTF-16BE', $js->encode ($faihu);
+$roundtrip = $js->decode( decode 'UTF-16BE', $faihu_json);
+is ($roundtrip, $faihu, 'JSON with external recoding roundtrips correctly' );

Modified: branches/upstream/libjson-xs-perl/current/t/99_binary.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libjson-xs-perl/current/t/99_binary.t?rev=31022&op=diff
==============================================================================
--- branches/upstream/libjson-xs-perl/current/t/99_binary.t (original)
+++ branches/upstream/libjson-xs-perl/current/t/99_binary.t Sun Feb 22 14:06:01 2009
@@ -31,7 +31,7 @@
    ok ($_[0] eq JSON::XS->new->shrink->decode ($js)->[0], 7);
 }
 
-srand 0; # doesn't help too much, but its at leats more deterministic
+srand 0; # doesn't help too much, but its at least more deterministic
 
 for (1..768) {
    test join "", map chr ($_ & 255), 0..$_;
@@ -39,3 +39,4 @@
    test join "", map chr ($_ * 97 & ~0x4000), 0..$_;
    test join "", map chr (rand (2**20) & ~0x800), 0..$_;
 }
+




More information about the Pkg-perl-cvs-commits mailing list