[Debtags-commits] [svn] r1409 - autodebtag/trunk/dbacl

Enrico Zini enrico at costa.debian.org
Thu Oct 27 16:42:07 UTC 2005


Author: enrico
Date: Thu Oct 27 16:42:06 2005
New Revision: 1409

Modified:
   autodebtag/trunk/dbacl/debtags-ai
Log:
More debtags-ai improvements

Modified: autodebtag/trunk/dbacl/debtags-ai
==============================================================================
--- autodebtag/trunk/dbacl/debtags-ai	(original)
+++ autodebtag/trunk/dbacl/debtags-ai	Thu Oct 27 16:42:06 2005
@@ -4,14 +4,30 @@
 use strict;
 use warnings;
 
+# 0: Be silent
+# 1: Print terse progress info
+# 2: Be annoying
 my $verbose = 1;
 
+# Minimum cardinality a tag should have to be considered for testing when
+# generating patches
+my $tag_min_card = 800;
+
+# Matches tags we don't want to consider anyway
+my $tag_blacklist = qr/^special::/;
+
+# Minimum percentage of a dbacl result to be considered good
+my $sure_perc = 85;
+
+# What should we read as the package cache
+my $pkgcache_source = "apt-cache dumpavail |";
+
 sub train_all ();
 sub read_apt ();
 sub patch_package ($);
 
 my %pkgdata;
-my $pkg_count = 0;
+my $pkg_count = 1;
 my %tags;
 my @interesting_tags;
 
@@ -38,7 +54,8 @@
 	for my $tag (keys %tags)
 	{
 		push @interesting_tags, $tag 
-			if $tag !~ /^(special|culture|accessibility|hardware)::/;
+			if scalar keys %{$tags{$tag}} >= $tag_min_card
+			   and $tag !~ $tag_blacklist;
 	}
 
 	my %pkgs_done;
@@ -60,6 +77,10 @@
 		}
 		close(IN);
 	}
+	my $count_total = scalar keys %pkgdata;
+	my $count_reallydone = 0;
+	my $count_done = scalar keys %pkgs_done;
+	my $time_used = 0;
 	for my $pkg (keys %pkgdata)
 	{
 		if (-f 'patch-stop')
@@ -69,12 +90,24 @@
 			last;
 		}
 
+		$count_done++;
 		if (not exists $pkgs_done{$pkg})
 		{
-			print STDERR "Doing: $pkg\n" if $verbose > 0;
-			patch_package($pkg)
+			#print STDERR "Doing: $pkg\n" if $verbose > 0;
+			my ($user, $system) = patch_package($pkg);
+			$time_used += $user + $system;
+			$count_reallydone++;
+			printf STDERR "$pkg done.  It took %f (user), %f (system), %f (total)\n", $user, $system, $user + $system
+				if ($verbose > 1);
+			if ($verbose == 1 && $count_reallydone % 20 == 0)
+			{
+				printf STDERR "Computing on %d tags. (%d/%d, %.3f seconds per package)\r",
+					scalar(@interesting_tags),
+					$count_done, $count_total, $time_used / $count_reallydone;
+			}
 		}
 	}
+	printf STDERR "\n" if $verbose == 1;
 } else {
 	usage();
 }
@@ -100,7 +133,8 @@
 	close $rdrfh;
 	waitpid $pid, 0;
 	$out =~ /^(not-)?.+ # (\d+)%\n$/ or die "Can't parse output line \"$out\"";
-	return (not defined $1), $2;
+	return (0, $2) if (defined $1);
+	return (1, $2);
 }
 
 # Output a tag patch for the given package, computed by dbacl
@@ -114,7 +148,7 @@
 	{
 		my ($has, $perc) = testtag($pkg, $tag);
 		next if not defined $perc;
-		next if $perc < 90;
+		next if $perc < $sure_perc;
 		if ($has)
 		{
 			if (exists $tags{$tag}{$pkg})
@@ -134,18 +168,15 @@
 			}
 		}
 	}
-	print $pkg, ": ", join(', ', @patch), "\n";
+	print $pkg, ": ", join(', ', @patch), "\n" if @patch;
 
-	if ($verbose > 1)
+	my @end = times();
+	my @ela;
+	for (my $i = 0; $i < 4; $i++)
 	{
-		my @end = times();
-		my @ela;
-		for (my $i = 0; $i < 4; $i++)
-		{
-			$ela[$i] = $end[$i] - $start[$i];
-		}
-		printf STDERR "$pkg done.  It took %f (user), %f (system), %f (total)\n", $ela[0] + $ela[2], $ela[1] + $ela[3],  $ela[0] + $ela[2] + $ela[1] + $ela[3];
+		$ela[$i] = $end[$i] - $start[$i];
 	}
+	return $ela[0] + $ela[2], $ela[1] + $ela[3]
 }
 
 
@@ -156,7 +187,7 @@
 # Read apt database
 sub read_apt ()
 {
-	open IN, "apt-cache dumpavail |" or die "Can't read package cache: $!";
+	open IN, "$pkgcache_source" or die "Can't read package cache from \"$pkgcache_source\": $!";
 	local $/ = "\n\n";
 	while (my $rec = <IN>)
 	{



More information about the Debtags-commits mailing list