[med-svn] r2352 - trunk/community/talks/200808_debconf8

tille at alioth.debian.org tille at alioth.debian.org
Mon Jul 28 12:35:25 UTC 2008


Author: tille
Date: 2008-07-28 12:35:23 +0000 (Mon, 28 Jul 2008)
New Revision: 2352

Modified:
   trunk/community/talks/200808_debconf8/archives.sql
   trunk/community/talks/200808_debconf8/get-archive-pages
   trunk/community/talks/200808_debconf8/liststat
Log:
Use some SQL functions to simplify queries about list statistics


Modified: trunk/community/talks/200808_debconf8/archives.sql
===================================================================
--- trunk/community/talks/200808_debconf8/archives.sql	2008-07-28 12:31:31 UTC (rev 2351)
+++ trunk/community/talks/200808_debconf8/archives.sql	2008-07-28 12:35:23 UTC (rev 2352)
@@ -15,5 +15,135 @@
    ts        date
 );
 
+CREATE LANGUAGE plpgsql ;
+
+/*
+ * Build a query string for several purposes
+ *   ARG1: Query to obtain wanted columns
+ *   ARG2: Feature that is queried
+ * See below how this helper is used.
+ */
+CREATE OR REPLACE FUNCTION BuildQueryCDDsHelper(text, text)
+    RETURNS text AS '
+    DECLARE
+       IterQuery  ALIAS FOR \$1 ;
+       Feature    ALIAS FOR \$2 ;
+       ret        text ;
+       union      text ;
+       query1     text ;
+       query2     text ;
+       r1         RECORD ;
+       r2         RECORD ;
+       ri         RECORD ;
+       qi         RECORD ;
+    BEGIN
+
+    query1 := IterQuery ;
+    query2 := query1;
+    union  := '''' ;
+    ret    := ''SELECT yearmonth'' ;
+
+    FOR r1 IN EXECUTE query1 LOOP
+    	ret := ret || '', CAST(SUM("'' || r1.feature || ''") AS int) AS "'' || r1.feature || ''"'' ;
+    END LOOP;
+
+    ret := ret || ''
+  FROM (
+'' ;
+
+    FOR r1 IN EXECUTE query1 LOOP
+       ret   := ret || union || ''    SELECT yearmonth'' ;
+       union := ''
+    UNION
+'';
+       FOR r2 IN EXECUTE query2 LOOP
+       	   IF r1.feature = r2.feature THEN
+	      ret := ret || '', COUNT(*)'' ;
+	   ELSE
+	      ret := ret || '', 0'' ;
+	   END IF;
+	   ret := ret || '' AS "'' || r2.feature || ''"'';
+       END LOOP ;
+       ret := ret || ''
+       FROM listarchive 
+       WHERE '' || Feature || '' = '''''' || r1.feature || '''''' GROUP BY yearmonth'';
+    END LOOP ;
+
+    ret := ret || ''
+  ) zw
+  GROUP BY yearmonth
+  ORDER BY yearmonth;'' ;
+
+    RETURN ret;
+  END; ' LANGUAGE 'plpgsql';
+
+/*
+ * This query returns stats about all mailing lists
+ */
+
+CREATE OR REPLACE FUNCTION BuildQueryCDDs()
+    RETURNS text AS '
+    DECLARE
+       ret        text ;
+
+    BEGIN
+
+    ret := BuildQueryCDDsHelper(
+               ''SELECT project AS feature, COUNT(*) AS num FROM listarchive GROUP BY project ORDER BY num DESC;'',
+               ''project'') ;
+    return ret ;
+  END; ' LANGUAGE 'plpgsql';
+
+/*
+ * This query returns stats about the ARG2 most active authors in a specific
+ * Mailing list (ARG1)
+ */
+
+CREATE OR REPLACE FUNCTION BuildQueryAuthors(text, int)
+    RETURNS text AS '
+    DECLARE
+       Project    ALIAS FOR \$1 ;
+       NumAuthors ALIAS FOR \$2 ;
+       ret        text ;
+
+    BEGIN
+
+    ret := BuildQueryCDDsHelper(
+               ''SELECT author AS feature, COUNT(*) AS num FROM listarchive
+                 WHERE project = '''''' || Project || '''''' AND author IN (
+      SELECT author FROM (SELECT author, count(*) as anz From listarchive where project = '''''' || Project || ''''''
+           GROUP BY author ORDER BY anz DESC LIMIT '' || NumAuthors || '') AS zw)
+   GROUP BY author ORDER BY num DESC;'',
+               ''author'') ;
+    return ret ;
+  END; ' LANGUAGE 'plpgsql';
+
+
+CREATE OR REPLACE FUNCTION CompareCDDs()
+    RETURNS SETOF RECORD AS '
+    DECLARE
+       ret        text ;
+       query      text ;
+       r          RECORD ;
+
+    BEGIN
+
+    SELECT INTO query BuildQueryCDDs() ;
+
+    FOR r IN EXECUTE query LOOP
+        RETURN NEXT r;
+    END LOOP;
+
+  END; ' LANGUAGE 'plpgsql';
+
+/*
+ * You might call this as, but there is less chance to get column names right.
+ *  SELECT * FROM CompareCDDs() AS
+ *   ( yearmonth date, cdd1 int, cdd2 int, cdd3 int, cdd4 int, cdd5 int, cdd6 int, cdd7 int, cdd8 int );
+ *
+ * That's why we use the shell script wrappers ...
+ */
+
 COMMIT;
 EOT
+

Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages	2008-07-28 12:31:31 UTC (rev 2351)
+++ trunk/community/talks/200808_debconf8/get-archive-pages	2008-07-28 12:35:23 UTC (rev 2352)
@@ -102,11 +102,11 @@
 			    $_ =~ s/^\[[^\]]+\]\s*([^\s]+)/$1/ ; # Remove other list markers (but only if something is following)
 			    $_ =~ s/\s*\(fwd\)\s*//i ; # Remove (fwd)
 			    $subject = $_ ;
-			    if ( $subject =~ /^[&#x\d;\sA-F\?]+$/ ) {
+			    if ( $subject =~ /^[-&#x\d;\sA-F\?:,]+$/ ) {
 				print "Potential SPAM line - strange subject: $project $year-$month: $subject\n";
 				$spamlines++ ;
 			    } else {
-				if ( $author =~ /^[&#x\d;\sA-F\?]+$/ ||
+				if ( $author =~ /^[-&#x\d;\sA-F\?:,]+$/ ||
                                      $author =~ /info/i ) { # never had a non-spam message from an author whos name contains info
 				    print "Potential SPAM line - strange author: $project $year-$month: $author\n";
 				    $spamlines++ ;

Modified: trunk/community/talks/200808_debconf8/liststat
===================================================================
--- trunk/community/talks/200808_debconf8/liststat	2008-07-28 12:31:31 UTC (rev 2351)
+++ trunk/community/talks/200808_debconf8/liststat	2008-07-28 12:35:23 UTC (rev 2352)
@@ -1,55 +1,8 @@
-#!/bin/sh
+#!/bin/sh -x
 DATFILE="liststat.dat"
-psql cddlistarchives >"$DATFILE"  <<EOT
+psql -t cddlistarchives -c 'SELECT BuildQueryCDDs() ;' | \
+   psql cddlistarchives \
+   >"$DATFILE"
 
-SELECT EXTRACT (year FROM yearmonth) || '-' ||
-       CASE WHEN EXTRACT (month FROM yearmonth) < 10 THEN '0' ELSE '' END || EXTRACT (month FROM yearmonth) AS "YearMonth",
-       SUM(med) AS med, SUM(edu) AS edu, SUM(jr) AS jr, SUM(nonprofit) AS nonprofit, SUM(lex) AS lex,
-       SUM(enterprise) AS enterprise, SUM(accessibility) AS accessibility, SUM(desktop) AS desktop
-  FROM (
-    SELECT yearmonth, COUNT(*) AS med, 0        AS edu, 0        AS jr, 0        AS nonprofit, 0        AS lex,
-                      0        AS enterprise, 0        AS accessibility, 0        AS desktop
-       FROM listarchive 
-       WHERE project = 'med' GROUP BY yearmonth
-    UNION
-    SELECT yearmonth, 0        AS med, COUNT(*) AS edu, 0        AS jr, 0        AS nonprofit, 0        AS lex,
-                      0        AS enterprise, 0        AS accessibility, 0        AS desktop
-       FROM listarchive 
-       WHERE project = 'edu' GROUP BY yearmonth
-    UNION
-    SELECT yearmonth, 0        AS med, 0        AS edu, COUNT(*) AS jr, 0        AS nonprofit, 0        AS lex,
-                      0        AS enterprise, 0        AS accessibility, 0        AS desktop
-       FROM listarchive 
-       WHERE project = 'jr' GROUP BY yearmonth
-    UNION
-    SELECT yearmonth, 0        AS med, 0        AS edu, 0        AS jr, COUNT(*) AS nonprofit, 0        AS lex,
-                      0        AS enterprise, 0        AS accessibility, 0        AS desktop
-       FROM listarchive 
-       WHERE project = 'nonprofit' GROUP BY yearmonth
-    UNION
-    SELECT yearmonth, 0        AS med, 0        AS edu, 0        AS jr, 0        AS nonprofit, COUNT(*) AS lex,
-                      0        AS enterprise, 0        AS accessibility, 0        AS desktop
-       FROM listarchive 
-       WHERE project = 'lex' GROUP BY yearmonth
-    UNION
-    SELECT yearmonth, 0        AS med, 0        AS edu, 0        AS jr, 0        AS nonprofit, 0        AS lex,
-                      COUNT(*) AS enterprise, 0        AS accessibility, 0        AS desktop
-       FROM listarchive 
-       WHERE project = 'jr' GROUP BY yearmonth
-    UNION
-    SELECT yearmonth, 0        AS med, 0        AS edu, 0        AS jr, 0        AS nonprofit, 0        AS lex,
-                      0        AS enterprise, COUNT(*) AS accessibility, 0        AS desktop
-       FROM listarchive 
-       WHERE project = 'nonprofit' GROUP BY yearmonth
-    UNION
-    SELECT yearmonth, 0        AS med, 0        AS edu, 0        AS jr, 0        AS nonprofit, 0        AS lex,
-                      0        AS enterprise, 0        AS accessibility, COUNT(*) AS desktop
-       FROM listarchive 
-       WHERE project = 'lex' GROUP BY yearmonth
-  ) zw
-  GROUP BY yearmonth
-  ORDER BY yearmonth;
-
-EOT
-
 sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' -e 's/[[:space:]]*|[[:space:]]*/\t/g' "$DATFILE"
+




More information about the debian-med-commit mailing list