r392 - /debtorrent/trunk/uniquely_projectb.py
camrdale-guest at users.alioth.debian.org
camrdale-guest at users.alioth.debian.org
Wed Jun 25 20:28:54 UTC 2008
Author: camrdale-guest
Date: Wed Jun 25 20:28:54 2008
New Revision: 392
URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=392
Log:
Add the new uniquely script that uses the projectb DB.
Added:
debtorrent/trunk/uniquely_projectb.py (with props)
Added: debtorrent/trunk/uniquely_projectb.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/uniquely_projectb.py?rev=392&op=file
==============================================================================
--- debtorrent/trunk/uniquely_projectb.py (added)
+++ debtorrent/trunk/uniquely_projectb.py Wed Jun 25 20:28:54 2008
@@ -1,0 +1,431 @@
+#!/usr/bin/env python
+
+"""Process a Release file, creating, finding and updating any torrent files."""
+
+import sha
+import sys
+import gzip
+import pgdb
+from bz2 import BZ2File
+from math import ceil
+from os import remove, rename, system
+from os.path import exists
+from time import strftime, gmtime
+from debian_bundle import deb822
+from tempfile import mkstemp
+
+# The piece size to use (must match the '-extrapieces' file's piece size)
+DEFAULT_PIECESIZE = 512*1024
+
+# The Packages files to read
+EXTENSION = ".gz"
+
+# The fields to hash to determine the torrent identifier
+# (can not contain Date, Infohash, NextPiece or OriginalPieces)
+DEFAULT_HASH_FIELDS = ["Codename", "Suite", "Component", "Architecture",
+ "PieceSize", "OriginalDate"]
+
+# The tracker announce URL to use
+DEFAULT_TRACKER = "http://dttracker.debian.net:6969/announce"
+
+# The order to write the headers in (headers not listed won't be written)
+HEADER_ORDER = ["Torrent", "Infohash", "InfohashArchs", "OriginalDate", "Date",
+ "PieceSize", "NextPiece", "OriginalPieces", "Codename", "Suite",
+ "Component", "Architecture", "Tracker", "TorrentHashFields"]
+
+# The maximum increase in the size of the torrent before it is reset
+MAX_SIZE_INCREASE = 2.0
+
+class Torrent(deb822._multivalued):
+ """For reading piece numbers from a unique piece number torrent file."""
+ _multivalued_fields = {
+ "piecenumbers": [ "number", "file" ],
+ }
+
+def read_release(filename):
+ """Read the headers and Packages file names from a Release file.
+
+ @type filename: C{string}
+ @param filename: the Release file to read
+ @rtype: C{dictionary}, C{list} of C{string}
+ @return: the headers and full file names of Packages files
+
+ """
+
+ # Initialize the Release file variables
+ release_dir = filename.rsplit('/', 1)[0]
+ read_packages = False
+ headers = {}
+ packages = []
+
+ f = open(filename, 'r')
+
+ rel = deb822.Release(f)
+ for header in rel:
+ if header.lower() not in ["md5sum", "sha1", "sha256"]:
+ # Read the headers from the file
+ headers[header] = rel[header]
+
+ # Read the Packages file names
+ for file in rel.get('MD5Sum', []):
+ if file['name'].endswith("Packages"+EXTENSION) and release_dir + "/" + file['name'] not in packages:
+ packages.append(release_dir + "/" + file['name'])
+ for file in rel.get('SHA1', []):
+ if file['name'].endswith("Packages"+EXTENSION) and release_dir + "/" + file['name'] not in packages:
+ packages.append(release_dir + "/" + file['name'])
+ for file in rel.get('SHA256', []):
+ if file['name'].endswith("Packages"+EXTENSION) and release_dir + "/" + file['name'] not in packages:
+ packages.append(release_dir + "/" + file['name'])
+
+ f.close()
+
+ return headers, packages
+
+def get_old(old_file):
+ """Read the headers and piece ordering data from an old file.
+
+ @type old_file: C{string}
+ @param old_file: the old piece ordering file to open
+ @rtype: C{dictionary}, C{dictionary}
+ @return: the old piece ordering (keys are the file names, values are the
+ starting piece number) and headers
+
+ """
+
+ pieces = {}
+ headers = {}
+
+ try:
+ f = gzip.open(old_file, 'r')
+
+ tor = Torrent(f)
+ for header in tor:
+ if header.lower() != 'piecenumbers':
+ # Read the headers from the file
+ headers[header] = tor[header]
+
+ # Read the piece ordering data from the file
+ for piece in tor['PieceNumbers']:
+ pieces[piece['file']] = int(piece['number'])
+
+ f.close()
+ except:
+ # Delete the file and return empty variables to create a new torrent
+ if exists(old_file):
+ remove(old_file)
+
+ return pieces, headers
+
+def update_headers(headers, release_headers, component, arch):
+ """Update the headers with new fields from the Release file.
+
+ @type headers: C{dictionary}
+ @param headers: the headers from the piece ordering file
+ @type release_headers: C{dictionary}
+ @param release_headers: the headers from the Release file
+ @type component: C{string}
+ @param component: the component name (e.g. main, contrib, non-free)
+ @type arch: C{string}
+ @param arch: the architecture name (e.g. i386, amd64, all)
+ @rtype: C{boolean}
+ @return: whether a new torrent has been created
+
+ """
+
+ # Set any required Release headers
+ if len(release_headers.get("Date", "")) == 0:
+ # Use today's date
+ release_headers["Date"] = strftime('%a, %d %b %Y %H:%M:%S +0000', gmtime())
+
+ # Create/update the headers
+ headers.setdefault("OriginalDate", release_headers["Date"])
+ headers["Date"] = release_headers["Date"]
+ headers.setdefault("PieceSize", str(DEFAULT_PIECESIZE))
+ headers.setdefault("NextPiece", str(0))
+ headers["Codename"] = release_headers.get("Codename", "")
+ headers["Suite"] = release_headers.get("Suite", "")
+ headers["Component"] = component
+ headers["Architecture"] = arch
+ headers.setdefault("Tracker", DEFAULT_TRACKER)
+ headers.setdefault("TorrentHashFields", " ".join(DEFAULT_HASH_FIELDS))
+
+ # Calculate the new hash
+ sha1 = sha.new()
+ for header in headers["TorrentHashFields"].split():
+ sha1.update(headers[header])
+ new_hash = sha1.hexdigest()
+
+ # Check if the hash has changed or the torrent is too big
+ if (headers.get("Torrent", "") == new_hash and
+ int(headers.get("NextPiece")) <
+ int(headers.get("OriginalPieces", "0"))*MAX_SIZE_INCREASE):
+ return False
+ else:
+ # If it has, then reset the torrent to create a new one
+ headers["OriginalDate"] = release_headers["Date"]
+ headers["NextPiece"] = str(0)
+ headers.pop("OriginalPieces", "")
+ sha1 = sha.new()
+ for header in headers["TorrentHashFields"].split():
+ sha1.update(headers[header])
+ headers["Torrent"] = sha1.hexdigest()
+
+ return True
+
+def get_new(db, suite, codename, component, arch, old_files):
+ """Read the new piece data from a Packages file.
+
+ Reads the Packages file, finding old files in it and copying their data to
+ the new ordering, and adding any new files found to the end of the
+ ordering. The old_files input is modified by removing the found files from
+ it, and the 'NextPiece' header in the input headers is changed.
+
+ @type db: C{DB-APIv2 connection}
+ @param db: an open connection to the projectb database
+ @type suite: C{string}
+ @param suite: the suite name (e.g. testing, unstable)
+ @type codename: C{string}
+ @param codename: the codename of the suite (e.g. sid, lenny)
+ @type component: C{string}
+ @param component: the component name (e.g. main, contrib, non-free)
+ @type arch: C{string}
+ @param arch: the architecture name (e.g. i386, amd64, all)
+ @type old_files: C{dictionary}
+ @param old_files: the original piece ordering, keys are the file names,
+ values are the starting piece number
+ @rtype: C{dictionary}
+ @return: the new piece ordering, keys are the starting piece numbers,
+ values are the file names
+
+ """
+
+ c = db.cursor()
+ c.execute("select location.path, files.filename, files.size " +
+ "from binaries join files on binaries.file = files.id " +
+ "join location on files.location = location.id " +
+ "join architecture on binaries.architecture = architecture.id " +
+ "join bin_associations on binaries.id = bin_associations.bin " +
+ "join suite on bin_associations.suite = suite.id " +
+ "join component on location.component = component.id " +
+ "where suite_name = '" + suite +"' and component.name = '" + component + "' and " +
+ "arch_string = '" + arch + "' " +
+ "order by location.path, files.filename")
+
+ pieces = {}
+ new_pieces = []
+
+ res = c.fetchone()
+ while res:
+ filename = '/'.join(res[0].split('/')[-2:]) + res[1]
+ size = res[2]
+ if filename.endswith('.deb'):
+ # Check which torrent to add the info to
+ if filename in old_files:
+ # Found old file, so add it
+ pieces[old_files[filename]] = filename
+ del old_files[filename]
+ else:
+ # Found new file, save it for later processing
+ new_pieces.append((filename, long(size)))
+ res = c.fetchone()
+
+ c.close()
+
+ return pieces, new_pieces
+
+def add_new(pieces, new_pieces, headers):
+ """Read the new piece data from a Packages file.
+
+ Adds new files to the end of the piece ordering. The 'pieces' input is
+ modified by having the new pieces added to it. The 'new_pieces' input
+ list is sorted. The 'NextPiece' header in the input 'headers' is updated.
+
+ @type pieces: C{dictionary}
+ @param pieces: the current piece ordering, keys are the starting piece
+ numbers, values are the file names
+ @type new_pieces: C{list} of (C{string}, C{long})
+ @param new_pieces: the file name and file size of the new files that have
+ been found and are to be added to the pirce ordering
+ @type headers: C{dictionary}
+ @param headers: the headers from the piece ordering file
+
+ """
+
+ # Get the needed header information
+ next_piece = int(headers["NextPiece"])
+ piece_size = int(headers["PieceSize"])
+
+ new_pieces.sort()
+ old_file = ""
+ old_size = 0L
+ for (file, size) in new_pieces:
+ if file == old_file:
+ if size != old_size:
+ print "WARNING: multiple files with different size:", file
+ else:
+ pieces[next_piece] = file
+ next_piece += int(ceil(size/float(piece_size)))
+
+ old_file = file
+ old_size = size
+
+ # Set the final header values
+ headers["NextPiece"] = str(next_piece)
+ headers.setdefault("OriginalPieces", headers["NextPiece"])
+
+def write_file(filename, pieces, headers):
+ """Print the new data to the file.
+
+ @type filename: C{string}
+ @param filename: the file to write to
+ @type pieces: C{dictionary}
+ @param pieces: the current piece ordering, keys are the starting piece
+ numbers, values are the file names
+ @type headers: C{dictionary}
+ @param headers: the headers from the piece ordering file
+
+ """
+
+ f = gzip.open(filename + '.new', 'w')
+
+ # Write the headers
+ for header in HEADER_ORDER:
+ if header in headers:
+ f.write("%s: %s\n" % (header, headers[header]))
+ f.write("PieceNumbers:\n")
+
+ # Write the starting piece numbers
+ ps = pieces.keys()
+ ps.sort()
+ format_string = " %"+str(len(str(max(ps))))+"d %s\n"
+ for p in ps:
+ f.write(format_string % (p, pieces[p]))
+
+ f.close()
+ rename(filename + '.new', filename)
+
+def run(db, releasefile):
+ """Process a single Release file.
+
+ @type db: C{DB-APIv2 connection}
+ @param db: an open connection to the projectb database
+ @type releasefile: C{string}
+ @param releasefile: the Release file to process
+
+ """
+
+ # Process the Release file
+ print "Processing: %s" % releasefile
+ release_headers, packages = read_release(releasefile)
+
+ suite = release_headers['Suite']
+ codename = release_headers["Codename"]
+ torrent_prefix = "dists_" + codename + "_"
+ torrent_suffix = "_Packages-torrent.gz"
+
+ for component in release_headers["Components"].split():
+ # Get the old 'all' data
+ all_file = torrent_prefix + component + "_binary-all" + torrent_suffix
+ print all_file + ": reading ...",
+ sys.stdout.flush()
+ old_all_pieces, all_headers = get_old(all_file)
+
+ # First update the 'all' headers
+ if update_headers(all_headers, release_headers, component, "all"):
+ # If it has, then reset the torrent
+ print "new torrent created ...",
+ sys.stdout.flush()
+ old_all_pieces = {}
+
+ # Parse the database for the new data
+ print "updating ...",
+ sys.stdout.flush()
+ all_pieces, all_new_pieces = get_new(db, suite, codename, component, 'all',
+ old_all_pieces)
+
+ # Add the old removed pieces so out-of-date mirrors will work too
+ for file in old_all_pieces:
+ all_pieces[old_all_pieces[file]] = file
+
+ # If there were 'all' files found
+ if all_pieces or all_new_pieces:
+ # Process the new 'all' files found
+ add_new(all_pieces, all_new_pieces, all_headers)
+
+ # Write the all_headers
+ print "writing ...",
+ sys.stdout.flush()
+ write_file(all_file, all_pieces, all_headers)
+ else:
+ print "empty ...",
+ if exists(all_file):
+ remove(all_file)
+
+ print "done."
+
+ for arch in release_headers["Architectures"].split():
+ torrent_file = torrent_prefix + component + "_binary-" + arch + torrent_suffix
+
+ # Find the Packages file that will be parsed
+ found = False
+ for filename in packages:
+ if (filename.find(component) >= 0 and
+ filename.find("binary-"+arch) >= 0):
+ found = True
+ break
+ if not found:
+ print "WARNING: no matching Packages file for component %s, arch %s" % (component, arch)
+ if exists(torrent_file):
+ remove(torrent_file)
+ continue
+ packages.pop(packages.index(filename))
+
+ # Get the old data for this torrent, if any existed
+ print torrent_file + ": reading ...",
+ sys.stdout.flush()
+ old_pieces, headers = get_old(torrent_file)
+
+ # Update the headers from the Release file ones
+ if update_headers(headers, release_headers, component, arch):
+ print "new torrent created ...",
+ sys.stdout.flush()
+ old_pieces = {}
+
+ # Parse the database for the new data
+ print "updating ...",
+ sys.stdout.flush()
+ pieces, new_pieces = get_new(db, suite, codename, component, arch,
+ old_pieces)
+
+ # Add the old removed pieces so out-of-date mirrors will work too
+ for file in old_pieces:
+ pieces[old_pieces[file]] = file
+
+ if pieces or new_pieces:
+ # Add any new pieces to the end of pieces
+ add_new(pieces, new_pieces, headers)
+
+ # Write the headers
+ print "writing ...",
+ sys.stdout.flush()
+ write_file(torrent_file, pieces, headers)
+ else:
+ print "empty ...",
+ if exists(torrent_file):
+ remove(torrent_file)
+
+ print "done."
+
+ if packages:
+ print "The following packages files were not used:"
+ for package in packages:
+ print " %s" % package
+
+if __name__ == '__main__':
+ if len(sys.argv) >= 2:
+ db = pgdb.connect(database = 'projectb')
+ for file in sys.argv[1:]:
+ run(db, file)
+ db.close()
+ else:
+ print "Usage: " + sys.argv[0] + " Releasefile [Releasefile ...]"
Propchange: debtorrent/trunk/uniquely_projectb.py
------------------------------------------------------------------------------
svn:executable = *
More information about the Debtorrent-commits
mailing list