r30562 - in /branches/upstream/perlindex/current: ChangeLog META.yml Makefile.PL perlindex.PL t/basic.t
dmn at users.alioth.debian.org
dmn at users.alioth.debian.org
Tue Feb 10 14:01:36 UTC 2009
Author: dmn
Date: Tue Feb 10 14:01:22 2009
New Revision: 30562
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=30562
Log:
[svn-upgrade] Integrating new upstream version, perlindex (1.605)
Modified:
branches/upstream/perlindex/current/ChangeLog
branches/upstream/perlindex/current/META.yml
branches/upstream/perlindex/current/Makefile.PL
branches/upstream/perlindex/current/perlindex.PL
branches/upstream/perlindex/current/t/basic.t
Modified: branches/upstream/perlindex/current/ChangeLog
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/ChangeLog?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/ChangeLog (original)
+++ branches/upstream/perlindex/current/ChangeLog Tue Feb 10 14:01:22 2009
@@ -1,3 +1,27 @@
+2008-11-17 Ulrich Pfeifer <Ulrich.Pfeifer at VerizonBusiness.com>
+
+ * perlindex.PL: applied typo patch from Slaven Rezic
+ (http://rt.cpan.org/Ticket/Display.html?id=40890)
+
+2008-10-19 Ulrich Pfeifer <Ulrich.Pfeifer at VerizonBusiness.com>
+
+ * perlindex.PL: Garbage collect can not change the index while
+ scanning it. Doing a copy now.
+
+ * perlindex.PL: Fixed $gc_required scoping error. Removed code
+ duplication. Added progress indication for GC collect.
+
+ * perlindex.PL: Fixed the indexing of the default directories
+ (code duplication needs to be removed) and added checking for
+ removed files (http://rt.cpan.org/Ticket/Display.html?id=39863).
+
+2008-10-19 Ulrich Pfeifer <pfeifer at wait.de>
+
+ * perlindex.PL: Added support for updating documents as requested
+ by SREZIC in http://rt.cpan.org/Ticket/Display.html?id=39862.
+ Update is transparent - except for the time used in the garbage
+ collect phase.
+
2006-07-02 Ulrich Pfeifer <upf at de.uu.net>
* perlindex.PL (index): moving the check for Pod::Text in the
Modified: branches/upstream/perlindex/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/META.yml?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/META.yml (original)
+++ branches/upstream/perlindex/current/META.yml Tue Feb 10 14:01:22 2009
@@ -1,10 +1,12 @@
-# http://module-build.sourceforge.net/META-spec.html
-#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
-name: perlindex
-version: 1.502
-version_from:
-installdirs: site
-requires:
-
-distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.17
+--- #YAML:1.0
+name: perlindex
+version: 1.605
+abstract: ~
+license: ~
+author: ~
+generated_by: ExtUtils::MakeMaker version 6.42
+distribution_type: module
+requires:
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.3.html
+ version: 1.3
Modified: branches/upstream/perlindex/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/Makefile.PL?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/Makefile.PL (original)
+++ branches/upstream/perlindex/current/Makefile.PL Tue Feb 10 14:01:22 2009
@@ -2,15 +2,15 @@
# -*- Mode: Perl -*-
# Author : Ulrich Pfeifer
# Created On : Tue May 27 17:27:28 1997
-# Last Modified On: Sun Jul 2 10:32:23 2006
+# Last Modified On: Mon Nov 17 19:47:45 2008
# Language : CPerl
-# Update Count : 30
+# Update Count : 36
#
# (C) Copyright 1997-2005, Ulrich Pfeifer, all rights reserved. This
# file is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.
-$VERSION = "1.502";
+$VERSION = "1.605";
use ExtUtils::MakeMaker;
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
Modified: branches/upstream/perlindex/current/perlindex.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/perlindex.PL?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/perlindex.PL (original)
+++ branches/upstream/perlindex/current/perlindex.PL Tue Feb 10 14:01:22 2009
@@ -34,9 +34,9 @@
# -*- Mode: Perl -*-
# Author : Ulrich Pfeifer
# Created On : Mon Jan 22 13:00:41 1996
-# Last Modified On: Sun Jul 2 10:39:22 2006
+# Last Modified On: Sun Oct 19 18:10:35 2008
# Language : Perl
-# Update Count : 349
+# Update Count : 398
# Status : Unknown, Use with caution!
#
# (C) Copyright 1996-2005, Ulrich Pfeifer, all rights reserved.
@@ -44,8 +44,8 @@
# under the same terms as Perl itself.
#
#
-# %SEEN is used to store the absolute pathes to files which have been
-# indexed. Probably this could be replaced by %FN.
+# %SEEN is used to store the mtime and absolute pathes to
+# files which have been indexed.
#
# %FN $FN{'last'} greatest documentid
# $FN{$did} a pair of $mtf and $filename where $mtf is the
@@ -129,7 +129,7 @@
$debug = 0;
$opt_index = ''; # make perl -w happy
$opt_menu = 1;
-$opt_maxhits = 15;
+$opt_maxhits = 20;
$opt_cbreak = 1;
&GetOptions(
'index',
@@ -149,6 +149,8 @@
$opt_dict ||= 100;
}
+my $GC_REQUIRED = 0; # garbage collect required? Global variable.
+
if ($opt_index) {
# check whether we can use Pod::Text to extract POD
@@ -194,17 +196,58 @@
&find($dir);
}
}
+
for $name (@ARGV) {
- my $fns = $name;
- $fns =~ s:\Q$prefix/::;
- next if $SEEN{$fns};
- next unless -f $name;
- if ($name !~ /(~|,v)$/) {
- $did = $FN{'last'}++;
- $SEEN{$fns} = &index($name, $fns, $did);
- }
- }
- untie %IF;
+ add_to_index($name);
+ }
+ # Check if all (previuosly) indexed files are still available
+ # This may take some time.
+ warn "Validating index ...\n";
+ while (my ($fns, $value) = each %SEEN) {
+ my $path = $fns; $path = $prefix.'/'.$path unless $path =~ m:^/:;
+ unless (-f $path) {
+ my ($mtime, $did) = unpack "$p$p", $value;
+ # mark document as deleted
+ warn "Marking document $did ($fns) as deleted\n";
+ delete $FN{$did};
+ delete $SEEN{$fns};
+ $GC_REQUIRED++;
+ }
+ }
+ if ($GC_REQUIRED) {
+ print STDERR "Garbage collecting\r";
+ # garbage collection, this is awfully slow
+ my $progress = 0;
+ my $words = keys %IF;
+ my %if_new;
+ tie (%if_new, AnyDBM_File, "$IDIR/index_if.new", O_CREAT|O_RDWR, 0644)
+ or die "Could not tie $IDIR/index_if: $!\n";
+ while (my ($word,$list) = each %IF) {
+ print STDERR "Garbage collecting ".(++$progress)."/".$words."\r";
+ my %post = unpack($p.'*',$list);
+
+ #delete $IF{$word};
+ while (my ($did,$tf) = each %post) {
+ if (exists $FN{$did}) {
+ $if_new{$word} = '' unless defined $if{$word}; # perl -w
+ $if_new{$word} .= pack($p.$p, $did, $tf);
+ } else {
+ $IDF{$word}--;
+ }
+ }
+ }
+ untie %if_new;
+ untie %IF;
+ opendir(IDX, $IDIR) or die "Could not read dir '$IDIR': $!";
+ for $file (readdir DIR) {
+ my $old = $file;
+ if ($file =~ s/^index_if\.new/index_if/) {
+ rename "$IDIR/$old", "$IDIR/$file";
+ }
+ }
+ print STDERR "\rGarbage collecting ... done";
+ }
+ untie %IF unless $GC_REQUIRED;
untie %IDF;
untie %FN;
untie %SEEN;
@@ -237,14 +280,8 @@
if ($name eq $man3direxp) {
$prune = 1;
}
- $fns =~ s:\Q$prefix/::;
- return if $SEEN{$fns};
- return unless -f $_;
- if ($name =~ /man|bin|\.(pod|pm)$/) {
- if (!/(~|,v)$/) {
- $did = $FN{'last'}++;
- $SEEN{$fns} = &index($name, $fns, $did);
- }
+ if (-f $_ and $name =~ /man|bin|\.(pod|pm|txt)$/) {
+ add_to_index($name);
}
}
@@ -254,9 +291,18 @@
my $did = shift;
my %tf;
my $maxtf = 0;
- my $pod = ($fns =~ /\.pod|man/);
-
- if($PodText and -T $fn) {
+
+ if ($fn =~ /\.txt$/) {
+ open (IN, "<$fn") || warn "Could not open $fn: $!\n", return (0);
+ while ($line = <IN>) {
+ warn "=> $line\n" if $debug;
+ for $word (&normalize($line)) {
+ next if $stop{$word};
+ $tf{$word}++;
+ }
+ }
+ close IN;
+ } elsif($PodText and -T $fn) {
my $result;
my $parser = Pod::Text->new(sentence => 0, width => 78);
if ($IoScalar) {
@@ -319,6 +365,32 @@
1;
}
+sub add_to_index {
+ my ($name) = @_;
+ my $fns = $name; $fns =~ s:\Q$prefix/::;
+
+ if (exists $SEEN{$fns}) {
+ my ($mtime, $did) = unpack "$p$p", $SEEN{$fns};
+ if ((stat $name)[9] > $mtime) {
+ # mark document as deleted
+ delete $FN{$did};
+ warn "Marking document $did ($name) as deleted\n";
+ $GC_REQUIRED++;
+ } else {
+ # index up to date
+ next;
+ }
+ }
+ next unless -f $name;
+ if ($name !~ /(~|,v)$/) {
+ $did = $FN{'last'}++;
+ if (&index($name, $fns, $did)) {
+ my ($mtime) = (stat $name)[9];
+ $SEEN{$fns} = pack "$p$p", (stat $name)[9], $did;
+ }
+ }
+}
+
sub normalize {
my $line = join ' ', @_;
my @result;
@@ -355,6 +427,8 @@
my %post = unpack($p.'*',$IF{$word});
my $idf = log($FN{'last'}/$IDF{$word});
for $did (keys %post) {
+ # skip deleted documents
+ next unless exists $FN{$did};
my ($maxtf) = unpack($p, $FN{$did});
$score{$did} = 0 unless defined $score{$did}; # perl -w
$score{$did} += $post{$did} / $maxtf * $idf;
@@ -391,15 +465,32 @@
last if $answer =~ /^q/i;
$answer = ($s{substr($answer,0,1)})-1;
if ($answer >= 0 and $answer <= $#menu) {
- my $selection = $menu[$answer];
- if ($selection =~ m:/man:) {
+ my $selection = $menu[$answer]; chomp($selection);
+ my ($no, $score, $path) = split ' ', $selection, 3;
+ $path = $prefix.'/'.$path unless $path =~ m:^/:;
+
+ if ($path =~ /\.txt$/) {
+ my $pdf = $path; $pdf =~ s:pages/(\S+)_(\d+)\.txt$:$1.pdf:;
+ my $page = $2+0;
+ my $endp = $page+2;
+ my $tmp = "/tmp/perlinde$$.pdf";
+
+ if (-f $pdf) {
+ print "pdftk A=$pdf cat $page-$endp output $tmp\n";
+ system "pdftk", "A=".$pdf, 'cat', "$page-$endp", 'output', $tmp and
+ system "pdftk", "A=".$pdf, 'cat', "$page-end", 'output', $tmp;
+ system "acroread", $tmp;
+ unlink $tmp;
+ } else {
+ print STDERR "$pager '$path'\n";
+ system $pager, $path;
+ }
+ } elsif ($selection =~ m:/man:) {
my ($page, $sect) =
($selection =~ m:([^/]*)\.(.{1,3})$:);
print STDERR "Running man $sect $page\n";
system 'man', $sect, $page;
} else {
- my ($path) = ($selection =~ m:(\S+)$:);
- $path = $prefix.'/'.$path unless $path =~ m:^/:;
print STDERR "Running pod2man $path\n";
system "pod2man --official $path | $nroff -man | $pager";
}
Modified: branches/upstream/perlindex/current/t/basic.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/perlindex/current/t/basic.t?rev=30562&op=diff
==============================================================================
--- branches/upstream/perlindex/current/t/basic.t (original)
+++ branches/upstream/perlindex/current/t/basic.t Tue Feb 10 14:01:22 2009
@@ -5,7 +5,7 @@
# Author : Ulrich Pfeifer
# Created On : Wed Jun 18 19:44:37 2003
# Last Modified By: Ulrich Pfeifer
-# Last Modified On: Thu Jun 19 11:25:36 2003
+# Last Modified On: Tue Oct 21 10:29:42 2008
# Language : CPerl
#
# (C) Copyright 2003, UUNET Deutschland GmbH, Germany
@@ -16,6 +16,7 @@
if (!eval {
require File::Temp;
require File::Spec;
+ require Cwd;
1;
}) {
print "1..0 # SKIP: File::Temp and/or File::Spec not available, skipping tests\n";
@@ -37,10 +38,11 @@
}
my $tmp = tempdir(CLEANUP => 1);
+my $cwd = Cwd::getcwd();
ok(
run(
- "-Mblib ./perlindex -idir $tmp --index README MANIFEST perlindex.PL",
+ "-Mblib ./perlindex -idir $tmp --index $cwd/README $cwd/MANIFEST $cwd/perlindex.PL",
sub { print "[[$_[0]]]\n"; $_[0] =~ /MANIFEST/ }
)
);
More information about the Pkg-perl-cvs-commits
mailing list