r63565 - in /trunk/libregexp-grammars-perl: Changes MANIFEST META.yml README debian/changelog lib/Regexp/Grammars.pm t/arg.t t/backref_ARG.t t/lookaheads.t

carnil at users.alioth.debian.org carnil at users.alioth.debian.org
Mon Oct 11 06:06:04 UTC 2010


Author: carnil
Date: Mon Oct 11 06:05:47 2010
New Revision: 63565

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=63565
Log:
New upstream release

Added:
    trunk/libregexp-grammars-perl/t/lookaheads.t
      - copied unchanged from r63564, branches/upstream/libregexp-grammars-perl/current/t/lookaheads.t
Modified:
    trunk/libregexp-grammars-perl/Changes
    trunk/libregexp-grammars-perl/MANIFEST
    trunk/libregexp-grammars-perl/META.yml
    trunk/libregexp-grammars-perl/README
    trunk/libregexp-grammars-perl/debian/changelog
    trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm
    trunk/libregexp-grammars-perl/t/arg.t
    trunk/libregexp-grammars-perl/t/backref_ARG.t

Modified: trunk/libregexp-grammars-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/Changes?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/Changes (original)
+++ trunk/libregexp-grammars-perl/Changes Mon Oct 11 06:05:47 2010
@@ -128,3 +128,13 @@
     * Documented more explicitly that start-pattern is supposed to act
       like a regular regex (or a token) wrt to whitespace
 
+
+
+1.011  Sun Oct 10 18:57:10 2010
+
+    * Added <!RULENAME> as alias for: (?! <.RULENAME> )
+
+    * Added <?RULENAME> as alias for: (?= <.RULENAME> ) 
+      (and made it work around normal lookahead/capture problem)
+
+    * Fixed major bugs in <:arg> handling

Modified: trunk/libregexp-grammars-perl/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/MANIFEST?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/MANIFEST (original)
+++ trunk/libregexp-grammars-perl/MANIFEST Mon Oct 11 06:05:47 2010
@@ -55,4 +55,5 @@
 t/matchpos.t
 t/repop_ws.t
 t/top_is_token.t
+t/lookaheads.t
 META.yml                                 Module meta-data (added by MakeMaker)

Modified: trunk/libregexp-grammars-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/META.yml?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/META.yml (original)
+++ trunk/libregexp-grammars-perl/META.yml Mon Oct 11 06:05:47 2010
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:                Regexp-Grammars
-version:             1.010
+version:             1.011
 abstract:            Add grammatical parsing features to Perl 5.10 regexes
 license:             ~
 author:              

Modified: trunk/libregexp-grammars-perl/README
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/README?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/README (original)
+++ trunk/libregexp-grammars-perl/README Mon Oct 11 06:05:47 2010
@@ -1,4 +1,4 @@
-Regexp::Grammars version 1.010
+Regexp::Grammars version 1.011
 
 This module adds a small number of new regex constructs that can be used
 within Perl 5.10 patterns to implement complete recursive-descent parsing.

Modified: trunk/libregexp-grammars-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/debian/changelog?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/debian/changelog (original)
+++ trunk/libregexp-grammars-perl/debian/changelog Mon Oct 11 06:05:47 2010
@@ -1,5 +1,6 @@
-libregexp-grammars-perl (1.010-2) UNRELEASED; urgency=low
+libregexp-grammars-perl (1.011-1) UNRELEASED; urgency=low
 
+  * New upstream release
   * Update carnil's email address
 
  -- Salvatore Bonaccorso <carnil at debian.org>  Sun, 10 Oct 2010 15:03:33 +0200

Modified: trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm (original)
+++ trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm Mon Oct 11 06:05:47 2010
@@ -7,7 +7,7 @@
 use Scalar::Util qw< blessed >;
 use Data::Dumper qw< Dumper  >;
 
-our $VERSION = '1.010';
+our $VERSION = '1.011';
 
 # Load the module...
 sub import {
@@ -822,6 +822,9 @@
     # Replace negative lookahead with one that works under R::G...
     $regex =~ s{\(\?!}{(?!(?!)|}gxms;
     # ToDo: Also replace positive lookahead with one that works under R::G...
+    #       This replacement should be of the form:
+    #           $regex =~ s{\(\?!}{(?!(?!)|(?!(?!)|}gxms;
+    #       but need to find a way to insert the extra ) at the other end
 
     return $is_comment ? q{} : $regex;
 }
@@ -1125,7 +1128,7 @@
     }
 
     # Determine save behaviour...
-    my $is_noncapturing = $savemode eq 'noncapturing';
+    my $is_noncapturing = $savemode =~ /noncapturing|lookahead/;
     my $is_listifying   = $savemode eq 'list';
 
     my $save_code =
@@ -1151,11 +1154,15 @@
                  : substr($postmodifier,0,1) eq '?'   ? 'any match'
                  :                                      'the match'
                  ;
+    my $do_something_with = $savemode eq 'neglookahead' ? 'lookahead for anything except'
+                          : $savemode eq 'poslookahead' ? 'lookahead for'
+                          :                               'match'
+                          ;
     if ($debug_build) {
         _debug_notify( info =>
                                  "   |",
                                  "   |...Treating $construct as:",
-                                 "   |      |  match the subrule <$subrule> $repeatedly",
+                                 "   |      |  $do_something_with the subrule <$subrule> $repeatedly",
             (defined $arg_desc ? "   |      |  passing the args: ($arg_desc)"
             :                    ()
             ),
@@ -1234,6 +1241,9 @@
             (?<self_subrule_scalar_nocap>
                    \.                            \s* (?<subrule>(?&QUALIDENT)) \s* (?<args>(?&ARGLIST)) \s*
             )
+          | (?<self_subrule_lookahead>
+                   (?<sign> \? | \! )            \s* (?<subrule>(?&QUALIDENT)) \s* (?<args>(?&ARGLIST)) \s*
+            )
           | (?<self_subrule_scalar>
                                                  \s* (?<subrule>(?&QUALIDENT)) \s* (?<args>(?&ARGLIST)) \s*
 
@@ -1256,7 +1266,7 @@
                        (?<alias>(?&IDENT)) \s* = \s* : (?<subrule>(?&QUALIDENT)) \s*
             )
           | (?<alias_argrule_list>
-                   \[  (?<alias>(?&IDENT)) \s* = \s* : (?<subrule>(?&QUALIDENT)) \s*
+                   \[  (?<alias>(?&IDENT)) \s* = \s* : (?<subrule>(?&QUALIDENT)) \s*  \]
             )
 
           | (?<alias_parens_scalar_nocap>
@@ -1415,7 +1425,25 @@
                 );
             }
 
-        # Translate subrule calls of the form: <RULENAME>...
+        # Translate subrule calls of the form: <?RULENAME> and <!RULENAME>...
+            elsif ($+{self_subrule_lookahead}) {
+
+                # Determine type of lookahead, and work around capture problem...
+                my ($type, $pre, $post) = ( 'neglookahead', '(?!(?!)|', ')' );
+                if ($+{sign} eq '?') {
+                    $type = 'poslookahead';
+                    $pre  x= 2;
+                    $post x= 2;
+                }
+
+                $pre . _translate_subrule_call(
+                    $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, $type, q{},
+                    $compiletime_debugging_requested,
+                    $runtime_debugging_requested,
+                    $subrule_names_ref,
+                  )
+                . $post;
+            }
             elsif ($+{self_subrule_scalar_nocap}) {
                 _translate_subrule_call(
                     $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'noncapturing', $+{modifier},
@@ -1463,7 +1491,7 @@
             elsif ($+{self_argrule_scalar}) {
                 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
                 _translate_subpattern(
-                    $curr_construct, qq{'$+{subrule}'}, $+{subrule}, 'noncapturing', $+{modifier},
+                    $curr_construct, qq{'$+{subrule}'}, $pattern, 'noncapturing', $+{modifier},
                     $compiletime_debugging_requested, $runtime_debugging_requested,
                     "in \$ARG{'$+{subrule}'}"
                 );
@@ -2068,7 +2096,7 @@
 
 =head1 VERSION
 
-This document describes Regexp::Grammars version 1.010
+This document describes Regexp::Grammars version 1.011
 
 
 =head1 SYNOPSIS
@@ -2170,6 +2198,9 @@
 
     <RULENAME(...)>          Call named subrule, passing args to it
 
+    <!RULENAME>              Call subrule and fail if it matches
+    <!RULENAME(...)>         (shorthand for (?!<.RULENAME>) )
+
     <:IDENT>                 Match contents of $ARG{IDENT} as a pattern
     <\:IDENT>                Match contents of $ARG{IDENT} as a literal
     </:IDENT>                Match closing delimiter for $ARG{IDENT}
@@ -2192,6 +2223,7 @@
 
     <.SUBRULE>               Call subrule (one of the above forms),
                              but don't save the result in %MATCH
+
 
     <[SUBRULE]>              Call subrule (one of the above forms), but
                              append result instead of overwriting it
@@ -3060,6 +3092,40 @@
 L<result distillation|"Result distillation">.
 
 
+=head2 Lookahead (zero-width) subrules
+
+Non-capturing subrule calls can be used in normal lookaheads:
+
+    <rule: qualified_typename>
+        # A valid typename and has a :: in it...
+        (?= <.typename> )  [^\s:]+ :: \S+
+
+    <rule: identifier>
+        # An alpha followed by alnums (but not a valid typename)...
+        (?! <.typename> )    [^\W\d]\w*
+
+but the syntax is a little unwieldy. More importantly, an internal
+problem with backtracking causes positive lookaheads to mess up
+the module's named capturing mechanism.
+
+So Regexp::Grammars provides two shorthands:
+
+    <!typename>        same as: (?! <.typename> )
+    <?typename>        same as: (?= <.typename> ) ...but works correctly!
+
+These two constructs can also be called with arguments, if necessary:
+
+    <rule: Command>
+        <Keyword>
+        (?:
+            <!Terminator(:Keyword)>  <Args=(\S+)>
+        )?
+        <Terminator(:Keyword)>
+
+Note that, as the above equivalences imply, neither of these forms of a
+subroutine call ever captures what it matches.
+
+
 =head2 Matching separated lists
 
 One of the commonest tasks in text parsing is to match a list of unspecified
@@ -3421,7 +3487,7 @@
 useful when refactoring subrules. For example, instead of:
 
     <rule: Command>
-        <Keyword>  <Command>  end_ <\Keyword>
+        <Keyword>  <CommandBody>  end_ <\Keyword>
 
     <rule: Placeholder>
         <Keyword>    \.\.\.   end_ <\Keyword>
@@ -3429,7 +3495,7 @@
 you could parameterize the Terminator rule, like so:
 
     <rule: Command>
-        <Keyword>  <Command>  <Terminator(:Keyword)>
+        <Keyword>  <CommandBody>  <Terminator(:Keyword)>
 
     <rule: Placeholder>
         <Keyword>    \.\.\.   <Terminator(:Keyword)>
@@ -5291,12 +5357,13 @@
 places a subrule call within a positive look-ahead, since
 these don't play nicely with the data stack.
 
-This may be an internal problem with perl itself or it may simply be
-a bug (or perhaps an intrinsic limitation) in the current implementation.
-Investigations are proceeding.
+This seems to be an internal problem with perl itself.
+Investigations, and attempts at a workaround, are proceeding.
 
 For the time being, you need to make sure that grammar rules don't appear
-inside a positive lookahead.
+inside a positive lookahead or use the
+L<<< C<< <?RULENAME> >> construct | "Lookahead (zero-width) subrules" >>>
+instead
 
 =item *
 

Modified: trunk/libregexp-grammars-perl/t/arg.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/t/arg.t?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/t/arg.t (original)
+++ trunk/libregexp-grammars-perl/t/arg.t Mon Oct 11 06:05:47 2010
@@ -19,11 +19,12 @@
             <content=(.+?)>
         <[revkeyword=unkeyword(?{ keyword => scalar reverse $MATCH{keyword} })]>
 
+
         <rule: unkeyword>
             (??{ quotemeta( ($ARG{prefix}//q{}) . $ARG{keyword} ) })
 
         <token: dekeyword>
-            <terminator=:delim>
+            (<:delim>) <terminator=(?{$CAPTURE})>
     }xms;
 };
 

Modified: trunk/libregexp-grammars-perl/t/backref_ARG.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/t/backref_ARG.t?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/t/backref_ARG.t (original)
+++ trunk/libregexp-grammars-perl/t/backref_ARG.t Mon Oct 11 06:05:47 2010
@@ -10,6 +10,7 @@
         <keyword=(\w+)>
             <content=(.+?)>
         <end_keyword (:keyword)>
+
       | <keyword=(\w+)>
             <content=(.+?)>
         <rev_keyword(:keyword)>




More information about the Pkg-perl-cvs-commits mailing list