[Pkg-puppet-devel] [SCM] Puppet packaging for Debian branch, experimental, updated. debian/2.6.8-1-844-g7ec39d5

Daniel Pittman daniel at rimspace.net
Tue May 10 08:04:21 UTC 2011


The following commit has been merged in the experimental branch:
commit 34a57b846319f2962f78b161cd80b15f491e1086
Author: Daniel Pittman <daniel at rimspace.net>
Date:   Sat Jan 22 21:41:52 2011 -0800

    Feature #2597 -- really find and report cycles.
    
    This implements Tarjan's algorithm for finding strongly connected components
    in a directed graph, and leverages that to find cycles.
    
    This allows us to report the minimum set of nodes in each cycle, as well as
    reporting each cycle discretely if there are multiple of them.
    
    While this can still produce overwhelming and/or unhelpful output, it
    represents a large step forward in delivering useful information when a cycle
    is detected.
    
    This presently reports the set of nodes that contain the cycle, in no
    particular order, rather than the set of edges connecting those nodes.
    
    Sadly, it also suffers a limitation: the implementation of Tarjan's algorithm
    used to find strongly connected components is recursive, so is limited by the
    maximum Ruby stack depth to dependency chains less than 1,000 nodes deep.
    
    While this is probably not a limit in practice, it is a nasty limitation, and
    other considerations (like Ruby stack consumption across versions) could
    trigger this much sooner than is desirable.

diff --git a/lib/puppet/simple_graph.rb b/lib/puppet/simple_graph.rb
index f9a665d..b900793 100644
--- a/lib/puppet/simple_graph.rb
+++ b/lib/puppet/simple_graph.rb
@@ -1,9 +1,11 @@
-#  Created by Luke A. Kanies on 2007-11-07.
-#  Copyright (c) 2007. All rights reserved.
+# Created by Luke A. Kanies on 2007-11-07.
+# Cycle detection and reporting by Daniel Pittman 2011-01-22
+# Copyright (c) 2007, 2010.  All rights reserved.
 
 require 'puppet/external/dot'
 require 'puppet/relationship'
 require 'set'
+require 'ostruct'
 
 # A hopefully-faster graph class to replace the use of GRATR.
 class Puppet::SimpleGraph
@@ -92,40 +94,79 @@ class Puppet::SimpleGraph
     vertices
   end
 
-  # Provide a topological sort with cycle reporting
-  def topsort_with_cycles
-    degree = {}
-    zeros = []
-    result = []
-
-    # Collect each of our vertices, with the number of in-edges each has.
-    vertices.each do |v|
-      edges = @in_to[v].dup
-      zeros << v if edges.empty?
-      degree[v] = edges
+  # This is a simple implementation of Tarjan's algorithm to find strongly
+  # connected components in the graph; this is a fairly ugly implementation,
+  # because I can't just decorate the vertices themselves.
+  #
+  # This method has an unhealthy relationship with the find_cycles_in_graph
+  # method below, which contains the knowledge of how the state object is
+  # maintained.
+  def tarjan(v, s)
+    s.index[v]   = s.n
+    s.lowlink[v] = s.n
+    s.n = s.n + 1
+
+    s.s.push v
+
+    @out_from[v].each do |edge|
+      to = edge[0]
+
+      if ! s.index[to] then
+        tarjan(to, s)
+        s.lowlink[v] = [s.lowlink[v], s.lowlink[to]].min
+      elsif s.s.member? to then
+        # Performance note: the stack membership test *should* be done with a
+        # constant time check, but I was lazy and used something that is
+        # likely to be O(N) where N is the stack depth; this will bite us
+        # eventually, and should be improved before the change lands.
+        #
+        # OTOH, this is only invoked on a very cold path, when things have
+        # gone wrong anyhow, right now.  I feel that getting the code out is
+        # worth more than that final performance boost. --daniel 2011-01-22
+        s.lowlink[v] = [s.lowlink[v], s.index[to]].min
+      end
     end
 
-    # Iterate over each 0-degree vertex, decrementing the degree of
-    # each of its out-edges.
-    while v = zeros.pop
-      result << v
-      @out_from[v].each { |v2,es|
-        degree[v2].delete(v)
-        zeros << v2 if degree[v2].empty?
-      }
+    if s.lowlink[v] == s.index[v] then
+      # REVISIT: Surely there must be a nicer way to partition this around an
+      # index, but I don't know what it is.  This works. :/ --daniel 2011-01-22
+      #
+      # Performance note: this might also suffer an O(stack depth) performance
+      # hit, better replaced with something that is O(1) for splitting the
+      # stack into parts.
+      tmp = s.s.slice!(0, s.s.index(v))
+      s.scc.push s.s
+      s.s = tmp
     end
+  end
 
-    # If we have any vertices left with non-zero in-degrees, then we've found a cycle.
-    if cycles = degree.values.reject { |ns| ns.empty? } and cycles.length > 0
-      message = cycles.collect { |edges|
-        '(' + edges.collect { |e| e[1].to_s }.join(", ") + ')'
-      }.join("\n")
-      raise Puppet::Error, "Found dependency cycles in the following relationships:\n" +
-        message + "\n" +
-        "Try the '--graph' option and opening the '.dot' file in OmniGraffle or GraphViz"
+  # Find all cycles in the graph by detecting all the strongly connected
+  # components, then eliminating everything with a size of one as
+  # uninteresting - which it is, because it can't be a cycle. :)
+  #
+  # This has an unhealthy relationship with the 'tarjan' method above, which
+  # it uses to implement the detection of strongly connected components.
+  def find_cycles_in_graph
+    state = OpenStruct.new :n => 0, :index => {}, :lowlink => {}, :s => [], :scc => []
+
+    # we usually have a disconnected graph, must walk all possible roots
+    vertices.each do |vertex|
+      if ! state.index[vertex] then
+        tarjan vertex, state
+      end
     end
 
-    result
+    return state.scc.select { |c| c.length > 1 }
+  end
+
+  def report_cycles_in_graph
+    cycles = find_cycles_in_graph
+    n = cycles.length           # where is "pluralize"? --daniel 2011-01-22
+    s = n == 1 ? '' : 's'
+
+    raise Puppet::Error, "Found #{n} dependency cycle#{s}:\n" +
+      cycles.collect { |c| '(' + c.join(", ") + ')' }.join("\n") + "\n" +
+      "Try the '--graph' option and opening the '.dot' file in OmniGraffle or GraphViz"
   end
 
   # Provide a topological sort.
@@ -152,7 +193,7 @@ class Puppet::SimpleGraph
 
     # If we have any vertices left with non-zero in-degrees, then we've found a cycle.
     if cycles = degree.values.reject { |ns| ns == 0  } and cycles.length > 0
-      topsort_with_cycles
+      report_cycles_in_graph
     end
 
     result
diff --git a/spec/unit/simple_graph_spec.rb b/spec/unit/simple_graph_spec.rb
index 58978f2..a8e0c44 100755
--- a/spec/unit/simple_graph_spec.rb
+++ b/spec/unit/simple_graph_spec.rb
@@ -305,10 +305,59 @@ describe Puppet::SimpleGraph do
 
     it "should produce the correct relationship text" do
       add_edges :a => :b, :b => :a
-      want = %r{following relationships:\n\(b => a\)\n\(a => b\)}
+      want = %r{Found 1 dependency cycle:\n\(a, b\)\nTry}
       expect { @graph.topsort }.to raise_error(Puppet::Error, want)
     end
 
+    it "cycle discovery should be the minimum cycle for a simple graph" do
+      add_edges "a" => "b"
+      add_edges "b" => "a"
+      add_edges "b" => "c"
+
+      cycles = nil
+      expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+      cycles.should be == [["a", "b"]]
+    end
+
+    it "cycle discovery should handle two distinct cycles" do
+      add_edges "a" => "a1", "a1" => "a"
+      add_edges "b" => "b1", "b1" => "b"
+
+      cycles = nil
+      expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+      cycles.should be == [["a", "a1"], ["b", "b1"]]
+    end
+
+    it "cycle discovery should handle two cycles in a connected graph" do
+      add_edges "a" => "b", "b" => "c", "c" => "d"
+      add_edges "a" => "a1", "a1" => "a"
+      add_edges "c" => "c1", "c1" => "c2", "c2" => "c3", "c3" => "c"
+
+      cycles = nil
+      expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+      cycles.should be == [%w{a a1}, %w{c c1 c2 c3}]
+    end
+
+    it "cycle discovery should handle a complicated cycle" do
+      add_edges "a" => "b", "b" => "c"
+      add_edges "a" => "c"
+      add_edges "c" => "c1", "c1" => "a"
+      add_edges "c" => "c2", "c2" => "b"
+
+      cycles = nil
+      expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+      cycles.should be == [%w{a b c c1 c2}]
+    end
+
+    it "cycle discovery should not fail with large data sets" do
+      limit = 1000
+      (1..(limit - 1)).each do |n| add_edges n.to_s => (n+1).to_s end
+
+      cycles = nil
+      expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+      cycles.should be == []
+    end
+
     # Our graph's add_edge method is smart enough not to add
     # duplicate edges, so we use the objects, which it doesn't
     # check.

-- 
Puppet packaging for Debian



More information about the Pkg-puppet-devel mailing list