r30562 - in /branches/upstream/perlindex/current: ChangeLog META.yml Makefile.PL perlindex.PL t/basic.t

dmn at users.alioth.debian.org dmn at users.alioth.debian.org
Tue Feb 10 14:01:36 UTC 2009


Author: dmn
Date: Tue Feb 10 14:01:22 2009
New Revision: 30562

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=30562
Log:
[svn-upgrade] Integrating new upstream version, perlindex (1.605)

Modified:
    branches/upstream/perlindex/current/ChangeLog
    branches/upstream/perlindex/current/META.yml
    branches/upstream/perlindex/current/Makefile.PL
    branches/upstream/perlindex/current/perlindex.PL
    branches/upstream/perlindex/current/t/basic.t

Modified: branches/upstream/perlindex/current/ChangeLog
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/ChangeLog?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/ChangeLog (original)
+++ branches/upstream/perlindex/current/ChangeLog Tue Feb 10 14:01:22 2009
@@ -1,3 +1,27 @@
+2008-11-17  Ulrich Pfeifer  <Ulrich.Pfeifer at VerizonBusiness.com>
+
+	* perlindex.PL: applied typo patch from Slaven Rezic
+	(http://rt.cpan.org/Ticket/Display.html?id=40890)
+
+2008-10-19  Ulrich Pfeifer  <Ulrich.Pfeifer at VerizonBusiness.com>
+
+	* perlindex.PL: Garbage collect can not change the index while
+	scanning it.  Doing a copy now.
+
+	* perlindex.PL: Fixed $gc_required scoping error.  Removed code
+	duplication. Added progress indication for GC collect. 
+
+	* perlindex.PL: Fixed the indexing of the default directories
+	(code duplication needs to be removed) and added checking for
+	removed files (http://rt.cpan.org/Ticket/Display.html?id=39863).
+
+2008-10-19  Ulrich Pfeifer  <pfeifer at wait.de>
+
+	* perlindex.PL: Added support for updating documents as requested
+	by SREZIC in http://rt.cpan.org/Ticket/Display.html?id=39862.
+	Update is transparent - except for the time used in the garbage
+	collect phase.
+
 2006-07-02  Ulrich Pfeifer  <upf at de.uu.net>
 
 	* perlindex.PL (index): moving the check for Pod::Text in the

Modified: branches/upstream/perlindex/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/META.yml?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/META.yml (original)
+++ branches/upstream/perlindex/current/META.yml Tue Feb 10 14:01:22 2009
@@ -1,10 +1,12 @@
-# http://module-build.sourceforge.net/META-spec.html
-#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
-name:         perlindex
-version:      1.502
-version_from: 
-installdirs:  site
-requires:
-
-distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.17
+--- #YAML:1.0
+name:                perlindex
+version:             1.605
+abstract:            ~
+license:             ~
+author:              ~
+generated_by:        ExtUtils::MakeMaker version 6.42
+distribution_type:   module
+requires:     
+meta-spec:
+    url:     http://module-build.sourceforge.net/META-spec-v1.3.html
+    version: 1.3

Modified: branches/upstream/perlindex/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/Makefile.PL?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/Makefile.PL (original)
+++ branches/upstream/perlindex/current/Makefile.PL Tue Feb 10 14:01:22 2009
@@ -2,15 +2,15 @@
 #                              -*- Mode: Perl -*- 
 # Author          : Ulrich Pfeifer
 # Created On      : Tue May 27 17:27:28 1997
-# Last Modified On: Sun Jul  2 10:32:23 2006
+# Last Modified On: Mon Nov 17 19:47:45 2008
 # Language        : CPerl
-# Update Count    : 30
+# Update Count    : 36
 #
 # (C) Copyright 1997-2005, Ulrich Pfeifer, all rights reserved.  This
 # file is free software; you can redistribute it and/or modify it
 # under the same terms as Perl itself.
 
-$VERSION = "1.502";
+$VERSION = "1.605";
 
 use ExtUtils::MakeMaker;
 # See lib/ExtUtils/MakeMaker.pm for details of how to influence

Modified: branches/upstream/perlindex/current/perlindex.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/perlindex.PL?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/perlindex.PL (original)
+++ branches/upstream/perlindex/current/perlindex.PL Tue Feb 10 14:01:22 2009
@@ -34,9 +34,9 @@
 #                              -*- Mode: Perl -*- 
 # Author          : Ulrich Pfeifer
 # Created On      : Mon Jan 22 13:00:41 1996
-# Last Modified On: Sun Jul  2 10:39:22 2006
+# Last Modified On: Sun Oct 19 18:10:35 2008
 # Language        : Perl
-# Update Count    : 349
+# Update Count    : 398
 # Status          : Unknown, Use with caution!
 # 
 # (C) Copyright 1996-2005, Ulrich Pfeifer, all rights reserved.
@@ -44,8 +44,8 @@
 # under the same terms as Perl itself.
 # 
 # 
-# %SEEN is used to store the absolute pathes to files which have been
-#       indexed. Probably this could be replaced by %FN.
+# %SEEN                is used to store the mtime and absolute pathes to
+#                      files which have been indexed.
 # 
 # %FN   $FN{'last'}    greatest documentid
 #       $FN{$did}      a pair of $mtf and $filename where $mtf is the
@@ -129,7 +129,7 @@
 $debug       = 0;
 $opt_index   = '';                # make perl -w happy
 $opt_menu    = 1;
-$opt_maxhits = 15;
+$opt_maxhits = 20;
 $opt_cbreak  = 1;
 &GetOptions(
             'index',
@@ -149,6 +149,8 @@
     $opt_dict ||= 100;
 }
 
+my $GC_REQUIRED = 0; # garbage collect required? Global variable.
+
 if ($opt_index) {
 
     # check whether we can use Pod::Text to extract POD
@@ -194,17 +196,58 @@
             &find($dir);
         }
     }
+
     for $name (@ARGV) {
-        my $fns = $name;
-        $fns =~ s:\Q$prefix/::;
-        next if $SEEN{$fns};
-        next unless -f $name;
-        if ($name !~ /(~|,v)$/) {
-            $did = $FN{'last'}++;
-            $SEEN{$fns} = &index($name, $fns, $did); 
-        }
-    }
-    untie %IF;
+      add_to_index($name);
+    }
+    # Check if all (previuosly) indexed files are still available
+    # This may take some time.
+    warn "Validating index ...\n";
+    while (my ($fns, $value) = each %SEEN) {
+      my $path = $fns; $path = $prefix.'/'.$path unless $path =~ m:^/:;
+      unless (-f $path) {
+        my ($mtime, $did) = unpack "$p$p", $value;
+        # mark document as deleted
+        warn "Marking document $did ($fns) as deleted\n";
+        delete $FN{$did};
+        delete $SEEN{$fns};
+        $GC_REQUIRED++;
+      }
+    }
+    if ($GC_REQUIRED) {
+      print STDERR "Garbage collecting\r";
+      # garbage collection, this is awfully slow
+      my $progress = 0;
+      my $words    = keys %IF;
+      my %if_new;
+      tie (%if_new,   AnyDBM_File, "$IDIR/index_if.new",   O_CREAT|O_RDWR, 0644)
+        or die "Could not tie $IDIR/index_if: $!\n";
+      while (my ($word,$list) = each %IF) {
+        print STDERR "Garbage collecting ".(++$progress)."/".$words."\r";
+        my %post = unpack($p.'*',$list);
+
+        #delete $IF{$word};
+        while (my ($did,$tf) = each %post) {
+          if (exists $FN{$did}) {
+            $if_new{$word} = '' unless defined $if{$word}; # perl -w
+            $if_new{$word} .= pack($p.$p, $did, $tf);
+          } else {
+            $IDF{$word}--;
+          }
+        }
+      }
+      untie %if_new;
+      untie %IF;
+      opendir(IDX, $IDIR) or die "Could not read dir '$IDIR': $!";
+      for $file (readdir DIR) {
+        my $old = $file;
+        if ($file =~ s/^index_if\.new/index_if/) {
+          rename "$IDIR/$old", "$IDIR/$file";
+        }
+      }
+      print STDERR "\rGarbage collecting ... done";
+    }
+    untie %IF unless $GC_REQUIRED;
     untie %IDF;
     untie %FN;
     untie %SEEN;
@@ -237,14 +280,8 @@
     if ($name eq $man3direxp) {
         $prune = 1;
     }
-    $fns =~ s:\Q$prefix/::;
-    return if $SEEN{$fns};
-    return unless -f $_;
-    if ($name =~ /man|bin|\.(pod|pm)$/) {
-        if (!/(~|,v)$/) {
-            $did = $FN{'last'}++;
-            $SEEN{$fns} = &index($name, $fns, $did); 
-        }
+    if (-f $_ and $name =~ /man|bin|\.(pod|pm|txt)$/) {
+      add_to_index($name);
     }
 }
 
@@ -254,9 +291,18 @@
     my $did = shift;
     my %tf;
     my $maxtf = 0;
-    my $pod = ($fns =~ /\.pod|man/);
-
-    if($PodText and -T $fn) {
+
+    if ($fn =~ /\.txt$/) {
+       open (IN, "<$fn") || warn "Could not open $fn: $!\n", return (0);
+       while ($line = <IN>) {
+         warn "=> $line\n" if $debug;
+         for $word (&normalize($line)) {
+             next if $stop{$word};
+             $tf{$word}++;
+         }
+       }
+       close IN;
+    } elsif($PodText and -T $fn) {
       my $result;
       my $parser = Pod::Text->new(sentence => 0, width => 78);
       if ($IoScalar) {
@@ -319,6 +365,32 @@
     1;
 }
 
+sub add_to_index {
+  my ($name) = @_;
+  my $fns = $name; $fns =~ s:\Q$prefix/::;
+
+  if (exists $SEEN{$fns}) {
+    my ($mtime, $did) = unpack "$p$p", $SEEN{$fns};
+    if ((stat $name)[9] > $mtime) {
+      # mark document as deleted
+      delete $FN{$did};
+      warn "Marking document $did ($name) as deleted\n";
+      $GC_REQUIRED++;
+    } else {
+      # index up to date
+      next;
+    }
+  }
+  next unless -f $name;
+  if ($name !~ /(~|,v)$/) {
+    $did = $FN{'last'}++;
+    if (&index($name, $fns, $did)) {
+      my ($mtime) = (stat $name)[9];
+      $SEEN{$fns} = pack "$p$p", (stat $name)[9], $did;
+    }
+  }
+}
+
 sub normalize {
     my $line = join ' ', @_;
     my @result;
@@ -355,6 +427,8 @@
         my %post = unpack($p.'*',$IF{$word});
         my $idf = log($FN{'last'}/$IDF{$word});
         for $did (keys %post) {
+            # skip deleted documents
+            next unless exists $FN{$did};
             my ($maxtf) = unpack($p, $FN{$did});
             $score{$did} = 0 unless defined $score{$did}; # perl -w 
             $score{$did} += $post{$did} / $maxtf * $idf;
@@ -391,15 +465,32 @@
             last if $answer =~ /^q/i;
             $answer = ($s{substr($answer,0,1)})-1;
             if ($answer >= 0 and $answer <= $#menu) {
-                my $selection = $menu[$answer];
-                if ($selection =~ m:/man:) {
+                my $selection = $menu[$answer]; chomp($selection);
+                my ($no, $score, $path) = split ' ', $selection, 3;
+                $path = $prefix.'/'.$path unless $path =~ m:^/:;
+
+                if ($path =~ /\.txt$/) {
+                    my $pdf = $path; $pdf =~ s:pages/(\S+)_(\d+)\.txt$:$1.pdf:;
+                    my $page = $2+0;
+                    my $endp = $page+2;
+                    my $tmp  = "/tmp/perlinde$$.pdf";
+
+                    if (-f $pdf) {
+                       print  "pdftk A=$pdf cat $page-$endp output $tmp\n";
+                       system "pdftk", "A=".$pdf, 'cat', "$page-$endp", 'output', $tmp and
+                       system "pdftk", "A=".$pdf, 'cat', "$page-end",   'output', $tmp;
+                       system "acroread", $tmp;
+                       unlink $tmp;
+                    } else {
+                       print STDERR "$pager '$path'\n";
+                       system $pager, $path;
+                    }
+                } elsif ($selection =~ m:/man:) {
                     my ($page, $sect) = 
                         ($selection =~ m:([^/]*)\.(.{1,3})$:);
                     print STDERR "Running man $sect $page\n";
                     system 'man', $sect, $page;
                 } else {
-                    my ($path) = ($selection =~ m:(\S+)$:);
-                    $path = $prefix.'/'.$path unless $path =~ m:^/:;
                     print STDERR "Running pod2man $path\n";
                     system "pod2man --official $path | $nroff -man | $pager";
                 }

Modified: branches/upstream/perlindex/current/t/basic.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/t/basic.t?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/t/basic.t (original)
+++ branches/upstream/perlindex/current/t/basic.t Tue Feb 10 14:01:22 2009
@@ -5,7 +5,7 @@
 # Author          : Ulrich Pfeifer
 # Created On      : Wed Jun 18 19:44:37 2003
 # Last Modified By: Ulrich Pfeifer
-# Last Modified On: Thu Jun 19 11:25:36 2003
+# Last Modified On: Tue Oct 21 10:29:42 2008
 # Language        : CPerl
 # 
 # (C) Copyright 2003, UUNET Deutschland GmbH, Germany
@@ -16,6 +16,7 @@
     if (!eval {
 	require File::Temp;
 	require File::Spec;
+	require Cwd;
 	1;
     }) {
 	print "1..0 # SKIP: File::Temp and/or File::Spec not available, skipping tests\n";
@@ -37,10 +38,11 @@
 }
 
 my $tmp = tempdir(CLEANUP => 1);
+my $cwd = Cwd::getcwd();
 
 ok(
    run(
-       "-Mblib ./perlindex -idir $tmp --index README MANIFEST perlindex.PL",
+       "-Mblib ./perlindex -idir $tmp --index $cwd/README $cwd/MANIFEST $cwd/perlindex.PL",
        sub { print "[[$_[0]]]\n"; $_[0] =~ /MANIFEST/ }
       )
   );




More information about the Pkg-perl-cvs-commits mailing list