r63565 - in /trunk/libregexp-grammars-perl: Changes MANIFEST META.yml README debian/changelog lib/Regexp/Grammars.pm t/arg.t t/backref_ARG.t t/lookaheads.t
carnil at users.alioth.debian.org
carnil at users.alioth.debian.org
Mon Oct 11 06:06:04 UTC 2010
Author: carnil
Date: Mon Oct 11 06:05:47 2010
New Revision: 63565
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=63565
Log:
New upstream release
Added:
trunk/libregexp-grammars-perl/t/lookaheads.t
- copied unchanged from r63564, branches/upstream/libregexp-grammars-perl/current/t/lookaheads.t
Modified:
trunk/libregexp-grammars-perl/Changes
trunk/libregexp-grammars-perl/MANIFEST
trunk/libregexp-grammars-perl/META.yml
trunk/libregexp-grammars-perl/README
trunk/libregexp-grammars-perl/debian/changelog
trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm
trunk/libregexp-grammars-perl/t/arg.t
trunk/libregexp-grammars-perl/t/backref_ARG.t
Modified: trunk/libregexp-grammars-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/Changes?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/Changes (original)
+++ trunk/libregexp-grammars-perl/Changes Mon Oct 11 06:05:47 2010
@@ -128,3 +128,13 @@
* Documented more explicitly that start-pattern is supposed to act
like a regular regex (or a token) wrt to whitespace
+
+
+1.011 Sun Oct 10 18:57:10 2010
+
+ * Added <!RULENAME> as alias for: (?! <.RULENAME> )
+
+ * Added <?RULENAME> as alias for: (?= <.RULENAME> )
+ (and made it work around normal lookahead/capture problem)
+
+ * Fixed major bugs in <:arg> handling
Modified: trunk/libregexp-grammars-perl/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/MANIFEST?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/MANIFEST (original)
+++ trunk/libregexp-grammars-perl/MANIFEST Mon Oct 11 06:05:47 2010
@@ -55,4 +55,5 @@
t/matchpos.t
t/repop_ws.t
t/top_is_token.t
+t/lookaheads.t
META.yml Module meta-data (added by MakeMaker)
Modified: trunk/libregexp-grammars-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/META.yml?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/META.yml (original)
+++ trunk/libregexp-grammars-perl/META.yml Mon Oct 11 06:05:47 2010
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: Regexp-Grammars
-version: 1.010
+version: 1.011
abstract: Add grammatical parsing features to Perl 5.10 regexes
license: ~
author:
Modified: trunk/libregexp-grammars-perl/README
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/README?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/README (original)
+++ trunk/libregexp-grammars-perl/README Mon Oct 11 06:05:47 2010
@@ -1,4 +1,4 @@
-Regexp::Grammars version 1.010
+Regexp::Grammars version 1.011
This module adds a small number of new regex constructs that can be used
within Perl 5.10 patterns to implement complete recursive-descent parsing.
Modified: trunk/libregexp-grammars-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/debian/changelog?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/debian/changelog (original)
+++ trunk/libregexp-grammars-perl/debian/changelog Mon Oct 11 06:05:47 2010
@@ -1,5 +1,6 @@
-libregexp-grammars-perl (1.010-2) UNRELEASED; urgency=low
+libregexp-grammars-perl (1.011-1) UNRELEASED; urgency=low
+ * New upstream release
* Update carnil's email address
-- Salvatore Bonaccorso <carnil at debian.org> Sun, 10 Oct 2010 15:03:33 +0200
Modified: trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm (original)
+++ trunk/libregexp-grammars-perl/lib/Regexp/Grammars.pm Mon Oct 11 06:05:47 2010
@@ -7,7 +7,7 @@
use Scalar::Util qw< blessed >;
use Data::Dumper qw< Dumper >;
-our $VERSION = '1.010';
+our $VERSION = '1.011';
# Load the module...
sub import {
@@ -822,6 +822,9 @@
# Replace negative lookahead with one that works under R::G...
$regex =~ s{\(\?!}{(?!(?!)|}gxms;
# ToDo: Also replace positive lookahead with one that works under R::G...
+ # This replacement should be of the form:
+ # $regex =~ s{\(\?!}{(?!(?!)|(?!(?!)|}gxms;
+ # but need to find a way to insert the extra ) at the other end
return $is_comment ? q{} : $regex;
}
@@ -1125,7 +1128,7 @@
}
# Determine save behaviour...
- my $is_noncapturing = $savemode eq 'noncapturing';
+ my $is_noncapturing = $savemode =~ /noncapturing|lookahead/;
my $is_listifying = $savemode eq 'list';
my $save_code =
@@ -1151,11 +1154,15 @@
: substr($postmodifier,0,1) eq '?' ? 'any match'
: 'the match'
;
+ my $do_something_with = $savemode eq 'neglookahead' ? 'lookahead for anything except'
+ : $savemode eq 'poslookahead' ? 'lookahead for'
+ : 'match'
+ ;
if ($debug_build) {
_debug_notify( info =>
" |",
" |...Treating $construct as:",
- " | | match the subrule <$subrule> $repeatedly",
+ " | | $do_something_with the subrule <$subrule> $repeatedly",
(defined $arg_desc ? " | | passing the args: ($arg_desc)"
: ()
),
@@ -1234,6 +1241,9 @@
(?<self_subrule_scalar_nocap>
\. \s* (?<subrule>(?&QUALIDENT)) \s* (?<args>(?&ARGLIST)) \s*
)
+ | (?<self_subrule_lookahead>
+ (?<sign> \? | \! ) \s* (?<subrule>(?&QUALIDENT)) \s* (?<args>(?&ARGLIST)) \s*
+ )
| (?<self_subrule_scalar>
\s* (?<subrule>(?&QUALIDENT)) \s* (?<args>(?&ARGLIST)) \s*
@@ -1256,7 +1266,7 @@
(?<alias>(?&IDENT)) \s* = \s* : (?<subrule>(?&QUALIDENT)) \s*
)
| (?<alias_argrule_list>
- \[ (?<alias>(?&IDENT)) \s* = \s* : (?<subrule>(?&QUALIDENT)) \s*
+ \[ (?<alias>(?&IDENT)) \s* = \s* : (?<subrule>(?&QUALIDENT)) \s* \]
)
| (?<alias_parens_scalar_nocap>
@@ -1415,7 +1425,25 @@
);
}
- # Translate subrule calls of the form: <RULENAME>...
+ # Translate subrule calls of the form: <?RULENAME> and <!RULENAME>...
+ elsif ($+{self_subrule_lookahead}) {
+
+ # Determine type of lookahead, and work around capture problem...
+ my ($type, $pre, $post) = ( 'neglookahead', '(?!(?!)|', ')' );
+ if ($+{sign} eq '?') {
+ $type = 'poslookahead';
+ $pre x= 2;
+ $post x= 2;
+ }
+
+ $pre . _translate_subrule_call(
+ $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, $type, q{},
+ $compiletime_debugging_requested,
+ $runtime_debugging_requested,
+ $subrule_names_ref,
+ )
+ . $post;
+ }
elsif ($+{self_subrule_scalar_nocap}) {
_translate_subrule_call(
$curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'noncapturing', $+{modifier},
@@ -1463,7 +1491,7 @@
elsif ($+{self_argrule_scalar}) {
my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
_translate_subpattern(
- $curr_construct, qq{'$+{subrule}'}, $+{subrule}, 'noncapturing', $+{modifier},
+ $curr_construct, qq{'$+{subrule}'}, $pattern, 'noncapturing', $+{modifier},
$compiletime_debugging_requested, $runtime_debugging_requested,
"in \$ARG{'$+{subrule}'}"
);
@@ -2068,7 +2096,7 @@
=head1 VERSION
-This document describes Regexp::Grammars version 1.010
+This document describes Regexp::Grammars version 1.011
=head1 SYNOPSIS
@@ -2170,6 +2198,9 @@
<RULENAME(...)> Call named subrule, passing args to it
+ <!RULENAME> Call subrule and fail if it matches
+ <!RULENAME(...)> (shorthand for (?!<.RULENAME>) )
+
<:IDENT> Match contents of $ARG{IDENT} as a pattern
<\:IDENT> Match contents of $ARG{IDENT} as a literal
</:IDENT> Match closing delimiter for $ARG{IDENT}
@@ -2192,6 +2223,7 @@
<.SUBRULE> Call subrule (one of the above forms),
but don't save the result in %MATCH
+
<[SUBRULE]> Call subrule (one of the above forms), but
append result instead of overwriting it
@@ -3060,6 +3092,40 @@
L<result distillation|"Result distillation">.
+=head2 Lookahead (zero-width) subrules
+
+Non-capturing subrule calls can be used in normal lookaheads:
+
+ <rule: qualified_typename>
+ # A valid typename and has a :: in it...
+ (?= <.typename> ) [^\s:]+ :: \S+
+
+ <rule: identifier>
+ # An alpha followed by alnums (but not a valid typename)...
+ (?! <.typename> ) [^\W\d]\w*
+
+but the syntax is a little unwieldy. More importantly, an internal
+problem with backtracking causes positive lookaheads to mess up
+the module's named capturing mechanism.
+
+So Regexp::Grammars provides two shorthands:
+
+ <!typename> same as: (?! <.typename> )
+ <?typename> same as: (?= <.typename> ) ...but works correctly!
+
+These two constructs can also be called with arguments, if necessary:
+
+ <rule: Command>
+ <Keyword>
+ (?:
+ <!Terminator(:Keyword)> <Args=(\S+)>
+ )?
+ <Terminator(:Keyword)>
+
+Note that, as the above equivalences imply, neither of these forms of a
+subroutine call ever captures what it matches.
+
+
=head2 Matching separated lists
One of the commonest tasks in text parsing is to match a list of unspecified
@@ -3421,7 +3487,7 @@
useful when refactoring subrules. For example, instead of:
<rule: Command>
- <Keyword> <Command> end_ <\Keyword>
+ <Keyword> <CommandBody> end_ <\Keyword>
<rule: Placeholder>
<Keyword> \.\.\. end_ <\Keyword>
@@ -3429,7 +3495,7 @@
you could parameterize the Terminator rule, like so:
<rule: Command>
- <Keyword> <Command> <Terminator(:Keyword)>
+ <Keyword> <CommandBody> <Terminator(:Keyword)>
<rule: Placeholder>
<Keyword> \.\.\. <Terminator(:Keyword)>
@@ -5291,12 +5357,13 @@
places a subrule call within a positive look-ahead, since
these don't play nicely with the data stack.
-This may be an internal problem with perl itself or it may simply be
-a bug (or perhaps an intrinsic limitation) in the current implementation.
-Investigations are proceeding.
+This seems to be an internal problem with perl itself.
+Investigations, and attempts at a workaround, are proceeding.
For the time being, you need to make sure that grammar rules don't appear
-inside a positive lookahead.
+inside a positive lookahead or use the
+L<<< C<< <?RULENAME> >> construct | "Lookahead (zero-width) subrules" >>>
+instead
=item *
Modified: trunk/libregexp-grammars-perl/t/arg.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/t/arg.t?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/t/arg.t (original)
+++ trunk/libregexp-grammars-perl/t/arg.t Mon Oct 11 06:05:47 2010
@@ -19,11 +19,12 @@
<content=(.+?)>
<[revkeyword=unkeyword(?{ keyword => scalar reverse $MATCH{keyword} })]>
+
<rule: unkeyword>
(??{ quotemeta( ($ARG{prefix}//q{}) . $ARG{keyword} ) })
<token: dekeyword>
- <terminator=:delim>
+ (<:delim>) <terminator=(?{$CAPTURE})>
}xms;
};
Modified: trunk/libregexp-grammars-perl/t/backref_ARG.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libregexp-grammars-perl/t/backref_ARG.t?rev=63565&op=diff
==============================================================================
--- trunk/libregexp-grammars-perl/t/backref_ARG.t (original)
+++ trunk/libregexp-grammars-perl/t/backref_ARG.t Mon Oct 11 06:05:47 2010
@@ -10,6 +10,7 @@
<keyword=(\w+)>
<content=(.+?)>
<end_keyword (:keyword)>
+
| <keyword=(\w+)>
<content=(.+?)>
<rev_keyword(:keyword)>
More information about the Pkg-perl-cvs-commits
mailing list