[Collab-qa-commits] r1698 - in udd: . scripts sql udd

Andreas Tille tille at alioth.debian.org
Thu Feb 25 08:56:08 UTC 2010


Author: tille
Date: 2010-02-25 08:56:08 +0000 (Thu, 25 Feb 2010)
New Revision: 1698

Added:
   udd/config-i18n-apps.yaml
   udd/scripts/fetch_i18n-apps.sh
   udd/sql/i18n-apps.sql
   udd/udd/i18n_apps_gatherer.py
Log:
Injecting information about translated applications in Debian created by the I18N team into UDD.  Please do not use this code for the moment.  While it does some useful stuff it needs further discussion with I18N people.


Added: udd/config-i18n-apps.yaml
===================================================================
--- udd/config-i18n-apps.yaml	                        (rev 0)
+++ udd/config-i18n-apps.yaml	2010-02-25 08:56:08 UTC (rev 1698)
@@ -0,0 +1,25 @@
+general:
+  dbname: udd
+  dbport: 5441
+  types:
+    i18n-apps: module udd.i18n_apps_gatherer
+  timestamp-dir: /org/udd.debian.org/timestamps
+  lock-dir: /org/udd.debian.org/locks
+  archs:
+   [alpha, amd64, arm, armeb, armel, hppa, hurd-i386,
+    i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, mips,
+    mipsel, powerpc, ppc64, s390, sparc, all, any, lpia, m32r, s390x, sh3,
+    sh3eb, sh4, sh4eb, sh, knetbsd-i386, netbsd-alpha, sparc64,
+    netbsd-i386, hurd-powerpc, kfreebsd-powerpc, netbsd-powerpc, hurd-sparc,
+    kfreebsd-sparc, netbsd-sparc, darwin-i386, freebsd-i386, openbsd-i386, darwin-powerpc]
+
+i18n-apps:
+   type: i18n-apps
+   update-command: /org/udd.debian.org/udd/scripts/fetch_i18n-apps.sh
+   path: /org/udd.debian.org/mirrors/i18n-apps
+   mirror: http://i18n.debian.net/material/data
+   files: .*\.gz
+   releases: sid squeeze
+   table_apps: i18n_apps
+   table_debconf: po_debconf
+

Added: udd/scripts/fetch_i18n-apps.sh
===================================================================
--- udd/scripts/fetch_i18n-apps.sh	                        (rev 0)
+++ udd/scripts/fetch_i18n-apps.sh	2010-02-25 08:56:08 UTC (rev 1698)
@@ -0,0 +1,7 @@
+#!/bin/sh
+TARGETDIR=/org/udd.debian.org/mirrors/i18n-apps
+mkdir -p $TARGETDIR
+rm -rf $TARGETDIR/*
+wget -q http://i18n.debian.net/material/data/unstable.gz -O ${TARGETDIR}/sid.gz
+wget -q http://i18n.debian.net/material/data/testing.gz -O ${TARGETDIR}/squeeze.gz
+


Property changes on: udd/scripts/fetch_i18n-apps.sh
___________________________________________________________________
Added: svn:executable
   + *

Added: udd/sql/i18n-apps.sql
===================================================================
--- udd/sql/i18n-apps.sql	                        (rev 0)
+++ udd/sql/i18n-apps.sql	2010-02-25 08:56:08 UTC (rev 1698)
@@ -0,0 +1,34 @@
+-- Applications containing po files
+-- PO: field
+CREATE TABLE i18n_apps (
+    package         text,
+    version         debversion,
+    release         text,
+    maintainer      text,
+    po_file         text,
+      -- *.pot ignorieren!
+    language        text,
+    ID              text,  -- no idea what this field means
+    pkg_version_lang text, -- no idea what sense this field makes
+    last_translator text,
+    language_team   text,
+    PRIMARY KEY (package, version, release, language)
+);
+
+-- Packages containing debconf translation in po files
+-- PODEBCONF: field
+CREATE TABLE po_debconf (
+    package         text,
+    version         debversion,
+    release         text,
+    maintainer      text,
+    po_file         text,
+      -- *.pot ignorieren!
+    language        text,
+    ID              text,  -- no idea what this field means
+    pkg_version_lang text, -- no idea what sense this field makes
+    last_translator text,
+    language_team   text,
+    PRIMARY KEY (package, version, release, language)
+);
+

Added: udd/udd/i18n_apps_gatherer.py
===================================================================
--- udd/udd/i18n_apps_gatherer.py	                        (rev 0)
+++ udd/udd/i18n_apps_gatherer.py	2010-02-25 08:56:08 UTC (rev 1698)
@@ -0,0 +1,194 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+This script imports informations about translated applications
+inside Debian packages.
+"""
+
+from aux import quote
+from gatherer import gatherer
+import re
+from debian_bundle import deb822
+from os import stat
+from sys import stderr, exit
+from filecmp import cmp
+import gzip
+# import bz2
+from psycopg2 import IntegrityError, InternalError
+
+debug=0
+
+check_char_re = re.compile('&#[0-9][0-9][0-9];')
+
+def replace_special_char(string):
+  if not check_char_re.search(string):
+    return string
+  parts = string.split('&#')
+  newstring = ''
+  for p in parts:
+    q = p.split(';')
+    if len(q) > 1:
+      newstring += unichr(int(q[0])) + q[1]
+    else:
+      newstring += q[0]
+  return newstring.encode('utf-8')
+
+def get_gatherer(connection, config, source):
+  return i18n_apps_gatherer(connection, config, source)
+
+class pkg_info():
+  def __init__(self, package, release):
+    self.package          = package
+    self.release          = release
+    self.version          = ''
+    self.maintainer       = ''
+    self.po_info          = {}
+    self.debconfpo_info   = {}
+
+  def __str__(self):
+    return "Package %s: %s, %s\n%s" % \
+        (self.package, self.maintainer, self.version, self.po_info)
+
+class po_info():
+  def __init__(self, poline):
+    po = poline.strip().split('!')
+    # ignore .pot and .templates files
+    if po[0].endswith('.pot') or po[0].endswith('.templates'):
+      # or po[1].startswith('_') :
+      self.infofields = 0
+      return
+    # Keep track of the number of information fields given for a po files
+    # In case there are more than one po file in a package just take the
+    # one containing more information
+    self.infofields       = len(po)
+    self.po_file          = po[0]
+    self.language         = po[1]
+    if len(self.language) < 2:
+      print >>stderr, "Invalid language '%s'. Po filename is %s." % (self.language, self.po_file)
+      self.infofields = 0
+      return
+    self.ID               = po[2]       # Need to ask Nicolas for the meaning of this
+    self.pkg_version_lang = po[3]       # Meaning is unclear
+
+    # sometimes language translation team is missing
+    if self.infofields < 6:
+      self.language_team = 'NULL'
+    else:
+      self.language_team = replace_special_char(po[5])
+    if self.infofields == 4:
+      self.last_translator = 'NULL'
+    else:
+      self.last_translator = replace_special_char(po[4])
+
+  def __str__(self):
+    return "Package %s: %s, %s\n%s" % \
+        (self.infofields, self.language, self.po_file, self.last_translator)
+
+  def __cmp__(self, other):
+    return self.infofields - other.infofields
+
+class i18n_apps_gatherer(gatherer):
+
+  def __init__(self, connection, config, source):
+    gatherer.__init__(self, connection, config, source)
+    self.assert_my_config('path', 'files', 'table_apps', 'table_debconf')
+    my_config = self.my_config
+
+    cur = self.cursor()
+    # create prepared statements here!
+    query = """PREPARE i18n_apps_insert
+                   (text, text, text, text, text, text, text, text, text, text)
+                AS INSERT INTO %s
+                   (package, version, release, maintainer, po_file, language,
+                    id, pkg_version_lang, last_translator, language_team)
+                    VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)""" % (my_config['table_apps'])
+    cur.execute(query)
+
+    pkg = None
+
+  def run(self):
+    my_config = self.my_config
+    #start harassing the DB, preparing the final inserts and making place
+    #for the new data:
+    cur = self.cursor()
+
+    releases=my_config['releases'].split(' ')
+
+    # verify whether input files are properly downloaded
+    for rel in releases:
+      file = my_config['path']+'/'+rel+'.gz'
+      statinfo = stat(file)
+      if not statinfo or statinfo[6] < 1:
+        print >>stderr, "File %s for release %s does not exist or is empty" % (file, rel)
+        exit
+      # print "File %s has %i bytes" % ( file, statinfo[6] )
+    # Clean up tables
+    query = "TRUNCATE %s; TRUNCATE %s;" % ( my_config['table_apps'], my_config['table_debconf'])
+    cur.execute(query)
+
+    for rel in releases:
+      file = my_config['path']+'/'+rel+'.gz'
+      g = gzip.GzipFile(file)
+      try:
+        for stanza in deb822.Sources.iter_paragraphs(g, shared_storage=False):
+          self.pkg             = pkg_info(stanza['Package'], rel)
+          # First entry is no real package but a date entry
+          if not stanza.has_key('Version'):
+            continue
+          # Package without language information are irrelevant
+          if not stanza.has_key('PO') or not stanza.has_key('PODEBCONF'):
+            continue
+          self.pkg.version     = stanza['Version']
+          self.pkg.maintainer  = stanza['Maintainer']
+          if stanza.has_key('PO'):
+            for poline in stanza['PO'].split("\n"):
+              # ignore first empty line
+              if len(poline) <= 1:
+                continue
+              poinfo = po_info(poline)
+              if poinfo.infofields == 0:
+                continue
+              # Sometimes there is more than one po file in a package.  We inject the file
+              # which contains better info about translator
+              # Attention: For the current application it is completely sufficient that we
+              #            keep the information *that* a package contains translation for
+              #            a certain package in UDD.  Other applications might need more
+              #            complete information.
+              if self.pkg.po_info.has_key(poinfo.language):
+                self.pkg.po_info[poinfo.language] = max(self.pkg.po_info[poinfo.language], poinfo)
+              else:
+                self.pkg.po_info[poinfo.language] = poinfo
+
+            for lang in self.pkg.po_info.keys():
+              poinfo = self.pkg.po_info[lang]
+              query = "EXECUTE i18n_apps_insert (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % \
+                        (quote(self.pkg.package), quote(self.pkg.version), quote(self.pkg.release), \
+                         quote(self.pkg.maintainer), quote(poinfo.po_file), quote(poinfo.language), \
+                         quote(poinfo.ID), quote(poinfo.pkg_version_lang), \
+                         quote(poinfo.last_translator), quote(poinfo.language_team))
+
+              try:
+                cur.execute(query)
+              except IntegrityError, err:
+                print str(err).strip()
+                print len(po), po, poline, self.pkg
+              except InternalError, err:
+                print "InternalError:", err
+                print len(po), po, poline, self.pkg
+                print query
+                exit(-1)
+              except UnicodeEncodeError, err:
+                print err
+                print query
+      except IOError, err:
+        print >>stderr, "Error reading %s (%s)" % (file, err)
+
+    cur.execute("ANALYZE %s" % my_config['table_apps'])
+    cur.execute("ANALYZE %s" % my_config['table_debconf'])
+
+if __name__ == '__main__':
+  main()
+
+# vim:set et tabstop=2:
+




More information about the Collab-qa-commits mailing list