[Pkg-puppet-devel] [SCM] Puppet packaging for Debian branch, upstream, updated. puppet-0.24.5-rc3-1456-g2f0b1e5

Tue Oct 27 17:06:00 UTC 2009

The following commit has been merged in the upstream branch:
commit ce46be5773656f68eddc7edd6212e283b46f9320
Author: Markus Roberts <Markus at reality.com>
Date:   Wed Sep 23 16:58:14 2009 -0700

    Proposed alternative solution for #2664 (REGEX / DIV lexing)
    
    This is my proposed attack on the lexing problem, with a few minor
    cleanups to simplify its integration.  The strategy:
    
    * Anotate tokens with a method "acceptable?" that determines if
      they can be generated in a given context.  Have this default
      to true.
    * Give the lexer the notion of a context; initialize it and
      update it as needed.  The present context records the name of
      the last significant token generated and a start_of_line flag.
    * When a token is found to match, check if it is acceptable in
      the present context before generating it.
    
    These changes don't result any any change in behaviour but they
    enable:
    
    * Give the REGEX token an acceptable? rule that only permits a
      regular expression in specific contexts.
    
    The other changes were a fix to the scan bug Brice reported,
    adjusting a test and clearing up some cluttered conditions in the
    context collection path.
    
    Added tests and subsumed change restricting REGEX to one line.

diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb
index 0db6c22..bb4fdf9 100644
--- a/lib/puppet/parser/lexer.rb
+++ b/lib/puppet/parser/lexer.rb
@@ -11,7 +11,7 @@ end
 module Puppet::Parser; end
 
 class Puppet::Parser::Lexer
-    attr_reader :last, :file
+    attr_reader :last, :file, :lexing_context
 
     attr_accessor :line, :indefine
 
@@ -41,6 +41,11 @@ class Puppet::Parser::Lexer
                 @name.to_s
             end
         end
+        
+        def acceptable?(context={})
+            # By default tokens are aceeptable in any context
+            true 
+        end
     end
 
     # Maintain a list of tokens.
@@ -171,7 +176,7 @@ class Puppet::Parser::Lexer
         [self,value]
     end
 
-    TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer, value|
+    regex_token = TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer, value|
         # Make sure we haven't matched an escaped /
         while value[-2..-2] == '\\'
             other = lexer.scan_until(%r{/})
@@ -181,6 +186,10 @@ class Puppet::Parser::Lexer
         [self, Regexp.new(regex)]
     end
 
+    def regex_token.acceptable?(context={})
+        [:NODE,:LBRACE,:RBRACE,:MATCH,:NOMATCH,:COMMA].include? context[:after]
+    end
+
     TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true
 
     TOKENS.add_token :SQUOTE, "'" do |lexer, value|
@@ -286,36 +295,28 @@ class Puppet::Parser::Lexer
     # Find the next token that matches a regex.  We look for these first.
     def find_regex_token
         @regex += 1
-        matched_token = nil
-        value = ""
-        length = 0
+        best_token = nil
+        best_length = 0
 
         # I tried optimizing based on the first char, but it had
         # a slightly negative affect and was a good bit more complicated.
         TOKENS.regex_tokens.each do |token|
-            next unless match_length = @scanner.match?(token.regex)
-
-            # We've found a longer match
-            if match_length > length
-                value = @scanner.scan(token.regex)
-                length = value.length
-                matched_token = token
+            if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context)
+                # We've found a longer match
+                if length > best_length
+                    best_length = length
+                    best_token = token
+                end
             end
         end
 
-        return matched_token, value
+        return best_token, @scanner.scan(best_token.regex) if best_token
     end
 
     # Find the next token, returning the string and the token.
     def find_token
         @find += 1
-        matched_token, value = find_regex_token
-
-        unless matched_token
-            matched_token, value = find_string_token
-        end
-
-        return matched_token, value
+        find_regex_token || find_string_token
     end
 
     def indefine?
@@ -345,6 +346,7 @@ class Puppet::Parser::Lexer
         @indefine = false
         @expected = []
         @commentstack = [ ['', @line] ]
+        @lexing_context = {:after => nil, :start_of_line => true}
     end
 
     # Make any necessary changes to the token and/or value.
@@ -417,17 +419,11 @@ class Puppet::Parser::Lexer
                 raise "Could not match '%s'" % nword
             end
 
-            if matched_token.name == :RETURN
-                # this matches a blank line
-                if @last_return
-                    # eat the previously accumulated comments
-                    getcomment
-                end
-                # since :RETURN skips, we won't survive to munge_token
-                @last_return = true
-            else
-                @last_return = false
-            end
+            newline = matched_token.name == :RETURN
+
+            # this matches a blank line; eat the previously accumulated comments
+            getcomment if lexing_context[:start_of_line] and newline
+            lexing_context[:start_of_line] = newline
 
             final_token, token_value = munge_token(matched_token, value)
 
@@ -436,6 +432,8 @@ class Puppet::Parser::Lexer
                 next
             end
 
+            lexing_context[:after]         = final_token.name unless newline
+
             value = token_value[:value]
 
             if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE
diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb
index 3c73ca9..959f360 100755
--- a/spec/unit/parser/lexer.rb
+++ b/spec/unit/parser/lexer.rb
@@ -464,18 +464,39 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX] do
         @token.regex.should_not =~ "/this is \n a regex/"
     end
 
-    describe "when including escaped slashes" do
-        before { @lexer = Puppet::Parser::Lexer.new }
-
-        it "should not consider escaped slashes to be the end of a regex" do
-            @lexer.string = "/this \\/ foo/"
+    describe "when scanning" do
+        def tokens_scanned_from(s)
+            lexer = Puppet::Parser::Lexer.new
+            lexer.string = s
             tokens = []
-            @lexer.scan do |name, value|
+            lexer.scan do |name, value|
                 tokens << value
             end
-            tokens[0][:value].should == Regexp.new("this / foo")
+            tokens[0..-2]
         end
-    end
+
+        it "should not consider escaped slashes to be the end of a regex" do
+            tokens_scanned_from("$x =~ /this \\/ foo/").last[:value].should == Regexp.new("this / foo")
+        end
+
+        it "should not lex chained division as a regex" do
+            tokens_scanned_from("$x = $a/$b/$c").any? {|t| t[:value].class == Regexp }.should == false
+        end
+
+        it "should accept a regular expression after NODE" do
+            tokens_scanned_from("node /www.*\.mysite\.org/").last[:value].should == Regexp.new("www.*\.mysite\.org")
+        end
+
+        it "should accept regular expressions in a CASE" do
+            s = %q{case $variable {
+                "something": {$othervar = 4096 / 2}
+                /regex/: {notice("this notably sucks")}
+                }
+            }
+            tokens_scanned_from(s)[12][:value].should == Regexp.new("regex")
+        end
+ 
+   end
 
 
     it "should return the REGEX token and a Regexp" do

-- 
Puppet packaging for Debian