Bug#325090: Last improvement and cleanup

Daniel 'NebuchadnezzaR' Dehennin nebuchadnezzar at asgardr.info
Fri Aug 26 17:28:22 UTC 2005


Hello,

Now it seems to be ready for production, I remove the __readline and
add a getline to Compress::Zlib and Compress:Bzip2, it's more clean
now.

I put the my out of the while loop (my is expansive).

I post to rt.cpan.org now
(https://rt.cpan.org/NoAuth/Bug.html?id=14329)

Thanks.
-- 
Daniel 'NebuchadnezzaR' Dehennin
Récupérer ma clef GPG:
gpg --keyserver pgp.mit.edu --recv-keys 0x2A408F69
-------------- next part --------------
--- Packages.pm.old	2005-08-26 02:58:55.000000000 +0200
+++ Packages.pm	2005-08-26 19:02:21.000000000 +0200
@@ -1,25 +1,70 @@
 use strict;
 package Parse::Debian::Packages;
-our $VERSION = '0.01';
+our $VERSION = "0.02";
+
+use File::MMagic;
+use FileHandle;
+use Compress::Zlib;
+use Compress::Bzip2;
 
 sub new {
     my $class = shift;
-    my $fh = shift;
+    my $file = shift;
+    my $fh;
 
-    return bless { fh => $fh }, $class;
+    if (! ref $file) {
+	# Caller give us a filename
+	return undef unless -f $file;
+
+	# Default magic is ok for application/x-gzip application/x-bzip2 and text/plain
+	my $magic = File::MMagic->new();
+	my $type = $magic->checktype_filename($file);
+
+      SWITCH: for ($type) {
+	  /text\/plain/ && do {
+	      $fh = new FileHandle;
+	      $fh->open("< $file") or return undef;
+	      last;
+	  };
+	  
+	  /application\/x-gzip/ && do {
+	      $fh = gzopen ($file, "rb") or return undef;
+	      last;
+	  };
+	  
+	  /application\/x-bzip2/ && do {
+	      $fh = bzopen ($file, "rb") or return undef;
+	      last;
+	  };
+	  # It's not a supported file format
+	  return undef;
+      }
+    	return bless { FH => $fh, FIELDS_IGNORED => {}}, $class;
+    } else {
+    	return bless { FH => $file, FIELDS_IGNORED => {}}, $class;
+    }
 }
 
 sub next {
     my $self = shift;
-    my $fh   = $self->{fh};
 
     my %parsed;
-    while (<$fh>) {
+    my ($key, $value);
+    my ($md5, $size, $filename);
+
+    while ($_ = $self->{FH}->getline) {
         last if /^$/;
-        if (my ($key, $value) = m/^(.*): (.*)/) {
-            $parsed{$key} = $value;
-        }
-        else {
+
+        if (($key, $value) = m/^([^\s:]*):\s?(.*)/) {
+	    # Do not add an empty Files key when parsing Sources
+            $parsed{$key} = $value unless $key eq "Files"
+		or exists $self->{FIELDS_IGNORED}->{$key};
+
+        } elsif (!exists $self->{FIELDS_IGNORED}->{Files}
+		 and ($md5, $size, $filename) = /^\s(\w{32})\s(\d+)\s(.*)/) {
+	    $parsed{Files}{$filename} = { size => $size, MD5sum => $md5 };
+	} elsif (! /^\s\w{32}\s\d+\s.*/
+		and !exists $self->{FIELDS_IGNORED}->{body}) { # Do not include Sources Files as body
             s/ //;
             s/^\.$//;
             $parsed{body} .= $_;
@@ -29,7 +74,31 @@
     return %parsed;
 }
 
-1;
+sub ignore_fileds {
+    my $self = shift;
+     if (@_) {
+	 return $self->{FIELDS_IGNORED} = {} if (@_[0] eq "reset");
+
+ 	 return map { $self->{FIELDS_IGNORED}->{$_} = 1
+			  unless exists $self->{FIELDS_IGNORED}->{$_} } @_;
+     } else {
+	 return sort keys %{$self->{FIELDS_IGNORED}};
+     }
+}
+
+## getline like with FileHandle
+sub Compress::Zlib::getline {
+    my ($self, $line) = shift;
+    $self->gzreadline($line) ? return $line : return '';
+}
+
+sub Compress::Bzip2::getline {
+    my ($self, $line) = shift;
+    $self->bzreadline($line) ? return $line : return '';
+}
+
+
+1
 
 
 =head1 NAME
@@ -40,24 +109,58 @@
 
  use YAML;
  use IO::File;
+ use FileHandle;
  use Parse::Debian::Packages;
- my $fh = IO::File->new("Packages");
 
- my $parser = Parse::Debian::Packages->new( $fh );
- while (my %package = $parser->next) {
+ my $pkg_file = "Packages";
+ my $src_file = "Sources";
+ my $other_src_file = "Sources.bz2";
+
+ my $fh_io = IO::File->new($pkg_file);
+ my $fh_FH = new FileHandle;
+ $fh_FH->open("< $src_file");
+
+ my $parser_on_io = Parse::Debian::Packages->new( $fh_io );
+ my $parser_on_FH = Parse::Debian::Packages->new( $fh_FH );
+ my $parser_on_filename = Parse::Debian::Packages->new( $other_src_file );
+
+ parser_on_io->ignore_fileds("Description", "body");
+ parser_on_FH->ignore_fileds("Build-Depends", "Files");
+
+ my %pkg_with_io = $parser_on_io->next;
+ my %pkg_with_FH = $parser_on_FH->next;
+ my %pkg_with_filename = $parser_on_filename->next;
+
+ print Dump \%pkg_with_io;
+ print Dump \%pkg_with_FH;
+ print Dump \%pkg_with_filename;
+
+ while (my %package = $parser_on_io->next) {
      print Dump \%package;
  }
 
 =head1 DESCRIPTION
 
-This module parses the Packages files used by the debian package
-management tools.
+This module parses the Packages and Sources files used by the debian
+package management tools.
 
 It presents itself as an iterator.  Each call of the ->next method
 will return the next package found in the file.
 
-For laziness, we take a filehandle in to the constructor.  Please open
-the file for us.
+You can pass a FileHandle to the constructor or a filename, the
+advantage of the filename is that you can parse plain/text, gziped or
+bziped files.
+
+If the filename passed to the constructor doesn't repressent a file in
+supported format (text/plain, application/x-gzip, application/x-bzip2)
+or if that file can not be open, new() return undef.
+
+You can ignore some fileds with ignore_fileds() method, it take a list
+of filed names you find in Packages or Sources files, with two special
+filed:
+    - body which correspond to the long description of Packages,
+    - reset to reset the list of ignored fileds, this one must be the
+    first argument of ignore_fileds() call.
 
 =head1 AUTHOR
 


More information about the pkg-perl-maintainers mailing list