[Collab-qa-commits] r1531 - udd/udd

Lucas Nussbaum lucas at alioth.debian.org
Thu Jul 23 14:05:41 UTC 2009


Author: lucas
Date: 2009-07-23 14:05:39 +0000 (Thu, 23 Jul 2009)
New Revision: 1531

Modified:
   udd/udd/bugs_gatherer.pl
   udd/udd/carnivore_gatherer.py
   udd/udd/ddtp_gatherer.py
   udd/udd/debtags_gatherer.py
   udd/udd/ftpnew_gatherer.py
   udd/udd/lintian_gatherer.py
   udd/udd/orphaned_packages_gatherer.py
   udd/udd/packages_gatherer.py
   udd/udd/popcon_gatherer.py
   udd/udd/screenshot_gatherer.py
   udd/udd/sources_gatherer.py
   udd/udd/testing_migrations_gatherer.py
   udd/udd/ubuntu_bugs_gatherer.py
   udd/udd/upload_history_gatherer.py
Log:
add ANALYZE at the end of all importers to teach pgsql some stats about the data we just imported

Modified: udd/udd/bugs_gatherer.pl
===================================================================
--- udd/udd/bugs_gatherer.pl	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/bugs_gatherer.pl	2009-07-23 14:05:39 UTC (rev 1531)
@@ -288,6 +288,19 @@
 		}
 	}
 	print "Inserting bugs: ",(time() - $t),"s\n" if $timing;
+
+	$dbh->commit();
+	print "Committing bugs: ",(time() - $t),"s\n" if $timing;
+
+	foreach my $postfix (qw{_packages _merged_with _found_in _fixed_in _tags}, '') {
+		my $sth = $dbh->prepare("ANALYZE $table$postfix");
+		$sth->execute() or die $!;
+	}
+
+	my $sth = $dbh->prepare("ANALYZE ".$src_config{'usertags-table'});
+	$sth->execute() or die $!;
+
+	print "Analyzing bugs: ",(time() - $t),"s\n" if $timing;
 }
 
 sub main {
@@ -323,8 +336,6 @@
 		exit(1)
 	}
 
-	$dbh->commit();
-	print "Committing bugs: ",(time() - $t),"s\n" if $timing;
 }
 
 main();

Modified: udd/udd/carnivore_gatherer.py
===================================================================
--- udd/udd/carnivore_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/carnivore_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -107,6 +107,8 @@
             if info["name"] not in record:
               record[info["name"]] = set()
             record[info["name"]].add(content.rstrip())
+    for table in ['emails', 'names', 'keys', 'login']:
+      cur.execute("ANALYZE %s" % my_config["%s-table" % table])
 
 if __name__ == '__main__':
   main()

Modified: udd/udd/ddtp_gatherer.py
===================================================================
--- udd/udd/ddtp_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/ddtp_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -137,6 +137,7 @@
           print >>stderr, "Error reading %s (%s)" % (dir+filename, err)
 
     cur.execute("DEALLOCATE ddtp_insert")
+    cur.execute("ANALYZE %s" % my_config['table'])
 
 if __name__ == '__main__':
   main()

Modified: udd/udd/debtags_gatherer.py
===================================================================
--- udd/udd/debtags_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/debtags_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -70,6 +70,7 @@
             cur.execute('EXECUTE debtags_insert (%s, %s)' \
                             % (quote(pkg), quote(tag)))
         cur.execute('DEALLOCATE debtags_insert')
+        cur.execute("ANALYZE %s" % conf['table'])
 
 
 def test():

Modified: udd/udd/ftpnew_gatherer.py
===================================================================
--- udd/udd/ftpnew_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/ftpnew_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -446,6 +446,8 @@
     cur.execute("DEALLOCATE ftpnew_insert_source")
     cur.execute("DEALLOCATE ftpnew_insert_package")
     cur.execute("DEALLOCATE ftpnew_check_existing_package")
+    cur.execute("ANALYZE %s" % my_config["table_sources"])
+    cur.execute("ANALYZE %s" % my_config["table_packages"])
 
 if __name__ == '__main__':
   main()

Modified: udd/udd/lintian_gatherer.py
===================================================================
--- udd/udd/lintian_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/lintian_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -70,6 +70,7 @@
         print "Can't parse line %d: %s" % (line_number, line.rstrip())
 
     cur.execute("DEALLOCATE lintian_insert")
+    cur.execute("ANALYZE %s" % my_config["table"])
 
 if __name__ == '__main__':
   main()

Modified: udd/udd/orphaned_packages_gatherer.py
===================================================================
--- udd/udd/orphaned_packages_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/orphaned_packages_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -70,5 +70,6 @@
         except IntegrityError, message:
           print "Integrity Error inserting bug " + str(row[0]) + " " + m.group(2)
           continue
+    cur2.execute("ANALYZE %s" % self.my_config['table'])
 
 # vim:set et tabstop=2:

Modified: udd/udd/packages_gatherer.py
===================================================================
--- udd/udd/packages_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/packages_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -211,6 +211,10 @@
       SELECT DISTINCT distribution, release, component, architecture
       FROM %s""" % (table + '_distrelcomparch', table))
 
+    cur.execute("ANALYZE %s" % table)
+    cur.execute("ANALYZE %s" % table + '_summary')
+    cur.execute("ANALYZE %s" % table + '_distrelcomparch')
+
     self.print_warnings()
 
   def print_warnings(self):

Modified: udd/udd/popcon_gatherer.py
===================================================================
--- udd/udd/popcon_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/popcon_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -90,6 +90,9 @@
       WHERE %(table)s.package = pkgs.package
       GROUP BY pkgs.source;
       """ % my_config)
+    cur.execute("ANALYZE " + table)
+    cur.execute("ANALYZE " + table_src)
+    cur.execute("ANALYZE " + table_src_average)
 
 if __name__ == '__main__':
   main()

Modified: udd/udd/screenshot_gatherer.py
===================================================================
--- udd/udd/screenshot_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/screenshot_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -62,6 +62,7 @@
         print >>stderr, "Unable to inject data for package %s. %s" % (res['name'], err)
         print >>stderr,  "-->", res
     cur.execute("DEALLOCATE screenshots_insert")
+    cur.execute("ANALYZE %s" % my_config['table'])
 
 if __name__ == '__main__':
   main()

Modified: udd/udd/sources_gatherer.py
===================================================================
--- udd/udd/sources_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/sources_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -48,25 +48,25 @@
     d = {}
     for k in sources_gatherer.mandatory:
       if k not in control:
-	raise "Mandatory field %s not specified" % k
+        raise "Mandatory field %s not specified" % k
       d[k] = control[k]
     for k in sources_gatherer.non_mandatory:
       if k in control:
-	d[k] = control[k]
+        d[k] = control[k]
       else:
-	d[k] = None
+        d[k] = None
     
     d['Vcs-Type'] = None
     d['Vcs-Url'] = None
     for vcs in sources_gatherer.vcs:
       if control.has_key("Vcs-"+vcs):  
         d['Vcs-Type'] = vcs
-	d['Vcs-Url'] = control["Vcs-"+vcs]
-	break
+        d['Vcs-Url'] = control["Vcs-"+vcs]
+        break
       elif control.has_key("X-Vcs-"+vcs):  
         d['Vcs-Type'] = vcs
-	d['Vcs-Url'] = control["X-Vcs-"+vcs]
-	break
+        d['Vcs-Url'] = control["X-Vcs-"+vcs]
+        break
     if control.has_key("Vcs-Browser"):  
         d['Vcs-Browser'] = control["Vcs-Browser"]
     elif control.has_key("X-Vcs-Browser"):  
@@ -139,56 +139,59 @@
     for comp in src_cfg['components']:
       path = os.path.join(src_cfg['directory'], comp, 'source', 'Sources.gz')
       cur.execute("DELETE from %s WHERE Distribution = '%s' AND\
-	release = '%s' AND component = '%s'"\
-	% (table, src_cfg['distribution'], src_cfg['release'], comp))
+        release = '%s' AND component = '%s'"\
+        % (table, src_cfg['distribution'], src_cfg['release'], comp))
       cur.execute("DELETE from %s WHERE Distribution = '%s' AND\
-	release = '%s' AND component = '%s'"\
-	% (utable, src_cfg['distribution'], src_cfg['release'], comp))
+        release = '%s' AND component = '%s'"\
+        % (utable, src_cfg['distribution'], src_cfg['release'], comp))
       try:
-	query = """PREPARE source_insert as INSERT INTO %s
-	  (Source, Version, Maintainer, Maintainer_name, Maintainer_email, Format, Files, Uploaders, Bin,
-	  Architecture, Standards_Version, Homepage, Build_Depends,
-	  Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
-	  Section, Vcs_Type, Vcs_Url, Vcs_Browser, python_version, checksums_sha1,
-	  checksums_sha256, original_maintainer, dm_upload_allowed,
-	  Distribution, Release, Component)
-	VALUES
-	  ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
-	  $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, '%s', '%s', '%s')"""\
-	  % (table, src_cfg['distribution'], src_cfg['release'], comp)
-	cur.execute(query)
-	query = """PREPARE uploader_insert as INSERT INTO %s
-	  (Source, Version, Distribution, Release, Component, Uploader, Name, Email) VALUES
-	  ($1, $2, '%s', '%s', '%s', $3, $4, $5) """ % \
-	(utable, src_cfg['distribution'], src_cfg['release'], comp)
-	cur.execute(query)
+        query = """PREPARE source_insert as INSERT INTO %s
+          (Source, Version, Maintainer, Maintainer_name, Maintainer_email, Format, Files, Uploaders, Bin,
+          Architecture, Standards_Version, Homepage, Build_Depends,
+          Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
+          Section, Vcs_Type, Vcs_Url, Vcs_Browser, python_version, checksums_sha1,
+          checksums_sha256, original_maintainer, dm_upload_allowed,
+          Distribution, Release, Component)
+        VALUES
+          ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
+          $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, '%s', '%s', '%s')"""\
+          % (table, src_cfg['distribution'], src_cfg['release'], comp)
+        cur.execute(query)
+        query = """PREPARE uploader_insert as INSERT INTO %s
+          (Source, Version, Distribution, Release, Component, Uploader, Name, Email) VALUES
+          ($1, $2, '%s', '%s', '%s', $3, $4, $5) """ % \
+        (utable, src_cfg['distribution'], src_cfg['release'], comp)
+        cur.execute(query)
 
-#	aux.print_debug("Reading file " + path)
-	# Copy content from gzipped file to temporary file, so that apt_pkg is
-	# used by debian_bundle
-	tmp = tempfile.NamedTemporaryFile()
-	file = gzip.open(path)
-	tmp.write(file.read())
-	file.close()
-	tmp.seek(0)
-#	aux.print_debug("Importing from " + path)
-	self.import_sources(open(tmp.name))
-	tmp.close()
+#        aux.print_debug("Reading file " + path)
+        # Copy content from gzipped file to temporary file, so that apt_pkg is
+        # used by debian_bundle
+        tmp = tempfile.NamedTemporaryFile()
+        file = gzip.open(path)
+        tmp.write(file.read())
+        file.close()
+        tmp.seek(0)
+#        aux.print_debug("Importing from " + path)
+        self.import_sources(open(tmp.name))
+        tmp.close()
       except IOError, (e, message):
-	print "Could not read packages from %s: %s" % (path, message)
+        print "Could not read packages from %s: %s" % (path, message)
       cur.execute("DEALLOCATE source_insert")
       cur.execute("DEALLOCATE uploader_insert")
 
+    cur.execute('ANALYZE %s' % table)
+    cur.execute('ANALYZE %s' % utable)
+
     self.print_warnings()
 
   def setup(self):
     if 'schema-dir' in self.config['general']:
       schema_dir = self.config['general']['schema-dir']
       if 'sources-schema' in self.my_config:
-	schema = schema_dir + '/' + self.my_config['sources-schema']
-	self.eval_sql_file(schema, self.my_config)
+        schema = schema_dir + '/' + self.my_config['sources-schema']
+        self.eval_sql_file(schema, self.my_config)
       else:
-	raise Exception("'packages-schema' not specified for source " + self.source)
+        raise Exception("'packages-schema' not specified for source " + self.source)
     else:
       raise Exception("'schema-dir' not specified")
 

Modified: udd/udd/testing_migrations_gatherer.py
===================================================================
--- udd/udd/testing_migrations_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/testing_migrations_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -21,35 +21,36 @@
     self.assert_my_config('path')
 
   def run(self):
-      src_cfg = self.my_config
+    src_cfg = self.my_config
 
-      c = self.connection.cursor()
+    c = self.connection.cursor()
 
-      c.execute("DELETE FROM migrations")
+    c.execute("DELETE FROM migrations")
 
-      c.execute("PREPARE mig_insert AS INSERT INTO migrations (source, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)")
+    c.execute("PREPARE mig_insert AS INSERT INTO migrations (source, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)")
       
-      f = open(src_cfg['path'])
-      for line in f:
-	(package, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen) = line.split()
-	for field in ('in_testing', 'in_unstable', 'sync', 'first_seen'):
-	  is_null = False
-	  exec "is_null = %s == ZERO_DATE" % field
-	  if is_null:
-	    exec "%s = 'NULL'" % field
-	  else:
-	    exec "%s = quote(%s)" % (field, field)
+    f = open(src_cfg['path'])
+    for line in f:
+      (package, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen) = line.split()
+      for field in ('in_testing', 'in_unstable', 'sync', 'first_seen'):
+        is_null = False
+        exec "is_null = %s == ZERO_DATE" % field
+        if is_null:
+          exec "%s = 'NULL'" % field
+        else:
+          exec "%s = quote(%s)" % (field, field)
 
-	for field in ('package', 'testing_version', 'unstable_version', 'sync_version'):
-	  is_null = False
-	  exec "is_null = %s == '-'" % field
-	  if is_null:
-	    exec "%s = 'NULL'" % field
-	  else:
-	    exec "%s = quote(%s)" % (field, field)
-	  
-	c.execute("EXECUTE mig_insert(%s, %s, %s, %s, %s, %s, %s, %s)" \
-	    % (package, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen))
+      for field in ('package', 'testing_version', 'unstable_version', 'sync_version'):
+        is_null = False
+        exec "is_null = %s == '-'" % field
+        if is_null:
+          exec "%s = 'NULL'" % field
+        else:
+          exec "%s = quote(%s)" % (field, field)
+        
+      c.execute("EXECUTE mig_insert(%s, %s, %s, %s, %s, %s, %s, %s)" \
+         % (package, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen))
 
-      c.execute("DEALLOCATE mig_insert")
+    c.execute("DEALLOCATE mig_insert")
+    c.execute("ANALYZE migrations")
 

Modified: udd/udd/ubuntu_bugs_gatherer.py
===================================================================
--- udd/udd/ubuntu_bugs_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/ubuntu_bugs_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -62,6 +62,11 @@
       except Empty:
         if httpq.qsize() == 0:
           ok = False
+    c.execute("analyze ubuntu_bugs_subscribers")
+    c.execute("analyze ubuntu_bugs_duplicates")
+    c.execute("analyze ubuntu_bugs_tags")
+    c.execute("analyze ubuntu_bugs_tasks")
+    c.execute("analyze ubuntu_bugs")
 
   def fetch_all_bugs(self):
     fh = urllib.urlopen('https://launchpad.net/ubuntu/+bugs-text')

Modified: udd/udd/upload_history_gatherer.py
===================================================================
--- udd/udd/upload_history_gatherer.py	2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/upload_history_gatherer.py	2009-07-23 14:05:39 UTC (rev 1531)
@@ -113,3 +113,6 @@
     cursor.executemany(query_archs, uploads_archs)
     cursor.executemany(query_closes, uploads_closes)
     cursor.execute("DEALLOCATE uh_insert")
+    cursor.execute("ANALYZE " + self.my_config['table'] + '_architecture')
+    cursor.execute("ANALYZE " + self.my_config['table'] + '_closes')
+    cursor.execute("ANALYZE " + self.my_config['table'])




More information about the Collab-qa-commits mailing list