r9166 - in /branches/upstream/libhtml-highlight-perl: ./ current/ current/Changes current/Highlight.pm current/MANIFEST current/Makefile.PL current/README current/TODO current/VERSION current/test.pl
emhn-guest at users.alioth.debian.org
emhn-guest at users.alioth.debian.org
Fri Nov 9 23:56:49 UTC 2007
Author: emhn-guest
Date: Fri Nov 9 23:56:48 2007
New Revision: 9166
URL: http://svn.debian.org/wsvn/?sc=1&rev=9166
Log:
[svn-inject] Installing original source of libhtml-highlight-perl
Added:
branches/upstream/libhtml-highlight-perl/
branches/upstream/libhtml-highlight-perl/current/
branches/upstream/libhtml-highlight-perl/current/Changes
branches/upstream/libhtml-highlight-perl/current/Highlight.pm
branches/upstream/libhtml-highlight-perl/current/MANIFEST
branches/upstream/libhtml-highlight-perl/current/Makefile.PL
branches/upstream/libhtml-highlight-perl/current/README
branches/upstream/libhtml-highlight-perl/current/TODO
branches/upstream/libhtml-highlight-perl/current/VERSION
branches/upstream/libhtml-highlight-perl/current/test.pl
Added: branches/upstream/libhtml-highlight-perl/current/Changes
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-highlight-perl/current/Changes?rev=9166&op=file
==============================================================================
--- branches/upstream/libhtml-highlight-perl/current/Changes (added)
+++ branches/upstream/libhtml-highlight-perl/current/Changes Fri Nov 9 23:56:48 2007
@@ -1,0 +1,7 @@
+
+0.20 2001/08/15
+ - documentation fixes
+ - first CPAN release
+
+0.10 2001/06/22
+ - original version;
Added: branches/upstream/libhtml-highlight-perl/current/Highlight.pm
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-highlight-perl/current/Highlight.pm?rev=9166&op=file
==============================================================================
--- branches/upstream/libhtml-highlight-perl/current/Highlight.pm (added)
+++ branches/upstream/libhtml-highlight-perl/current/Highlight.pm Fri Nov 9 23:56:48 2007
@@ -1,0 +1,395 @@
+
+package HTML::Highlight;
+
+use locale;
+
+use strict;
+use Carp;
+
+BEGIN {
+ use vars qw ($VERSION @ISA);
+ $VERSION = 0.20;
+ @ISA = ();
+}
+
+END { }
+
+my $MIN_SECTION_LENGTH = 60;
+my $DEFAULT_SECTION_LENGTH = 80;
+
+sub new {
+ $_ = shift;
+ my $class = ref($_) || $_;
+
+ croak ('HTML::Highlight - even number of parameters expected.')
+ if (@_ % 2);
+
+ # set the defaults
+ my $self = {
+ words => [],
+ wildcards => [],
+ colors => [
+ '#ffff66',
+ '#A0FFFF',
+ '#99ff99',
+ '#ff9999',
+ '#ff66ff'
+ ],
+ czech_language => 0,
+ debug => 0
+ };
+
+ bless ($self, $class);
+
+ # get parameters, overiding the defaults
+ for (my $i = 0; $i <= $#_; $i += 2) {
+ exists ( $self->{lc($_[$i])} ) or
+ croak ('HTML::Highlight - invalid parameter ' . $_[$i] . '.');
+ $self->{lc($_[$i])} = $_[($i + 1)];
+ }
+
+ croak ('HTML::Highlight - "words" and "wildcards" parameters must be references to arrays')
+ if (ref($self->{words}) ne 'ARRAY' or ref($self->{wildcards}) ne 'ARRAY');
+
+ require CzFast if ($self->{czech_language});
+
+ return $self;
+}
+
+
+sub highlight {
+ my $self = shift;
+ my $document = shift;
+
+ croak ('HTML::Highlight - no document defined')
+ if (not defined($document));
+ return '' if (length($document) == 0);
+
+ my $doc = $document;
+
+ for (my $i = 0, my $cindex = 0; $i < @{$self->{words}}; $i++, $cindex++) {
+ my $color;
+ my $out;
+ if ($self->{colors}->[$cindex]) {
+ $color = $self->{colors}->[$cindex];
+ }
+ else {
+ $cindex = 0;
+ $color = $self->{colors}->[$cindex];
+ }
+ while($doc) {
+ if ($doc !~ /(.*?)(<.*?>)(.*)/s) {
+ $out .= $self->_highlight($doc, $i, $color);
+ last;
+ }
+ else {
+ my $str = $1;
+ my $html = $2;
+ my $rest = $3;
+ $out .= $self->_highlight($str, $i, $color);
+ $out .= $html;
+ $doc = $rest;
+ }
+ }
+ $doc = $out;
+ }
+
+return $doc;
+}
+
+sub preview_context {
+ my $self = shift;
+ my $document = shift;
+ my $sectlen = shift;
+
+ $self->{context} = {};
+ $self->{sectlen} = $sectlen >= $MIN_SECTION_LENGTH ?
+ $sectlen : $DEFAULT_SECTION_LENGTH;
+ $self->{sections} = [];
+
+ $document =~ s/<.*?>//g;
+
+ for (my $i = 0; $i < @{$self->{words}}; $i++) {
+ my $pattern = $self->{czech_language} ?
+ &CzFast::czregexp($self->{words}->[$i]) :
+ $self->{words}->[$i];
+
+ my $wildcard = $self->{wildcards}->[$i];
+ my $regexp;
+
+ if ($wildcard eq '%') {
+ $regexp = "${pattern}\\w*";
+ }
+ elsif ($wildcard eq '*') {
+ $regexp = "${pattern}s?";
+ }
+ else {
+ $regexp = $pattern;
+ }
+
+ if (not $self->{context}->{$pattern}
+ and not grep (/$regexp/i, values %{$self->{context}})) {
+ my $chars = int(($self->{sectlen} - length($pattern)) / 2);
+ print "Chars: $chars\n" if ($self->{debug});
+ if ($document =~ /(?:^|\W)(.{0,$chars})(\W+|^)($regexp)(\W+|$)(.{0,$chars})(?:\W|$)/six) {
+ my $section = $1.$2.$3.$4.$5;
+ $self->{context}->{$pattern} = $section;
+ push(@{$self->{sections}}, $section);
+ }
+ }
+ }
+
+ return $self->{sections};
+}
+
+#########################
+#### private methods ####
+#########################
+
+sub _highlight {
+ my $self = shift;
+ my $str = shift;
+ my $word = shift;
+ my $color = shift;
+
+ my $pattern = $self->{words}->[$word];
+ $pattern = &CzFast::czregexp($pattern) if ($self->{czech_language});
+
+ my $wildcard = $self->{wildcards}->[$word];
+ my $regexp;
+
+ if ($wildcard eq '%') {
+ my $pat = $self->{czech_language} ? &_cz_pattern : '\w*';
+ $regexp = "${pattern}$pat";
+ }
+ elsif ($wildcard eq '*') {
+ $regexp = "${pattern}s?";
+ }
+ else {
+ $regexp = $pattern;
+ }
+
+ print "$str: $pattern | $wildcard | $regexp | $color\n" if ($self->{debug});
+ $str =~ s!(\W+|^)($regexp)!$1<span style="background-color: $color">$2</span>!sig;
+ return $str;
+}
+
+sub _cz_pattern {
+ my @chars;
+ my $pat = '(';
+ foreach my $char ('a'..'z') {
+ push(@chars, &CzFast::czregexp($char));
+ }
+ $pat .= join('|', at chars);
+ $pat .= ')*';
+ return $pat;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+B<HTML::Highlight - A module to highlight words or patterns in HTML documents>
+
+=head1 SYNOPSIS
+
+ use HTML::Highlight;
+
+ # create the highlighter object
+
+ my $hl = new HTML::Highlight (
+ words => [
+ 'word',
+ 'any',
+ 'car',
+ 'some phrase'
+ ],
+ wildcards => [
+ undef,
+ '%',
+ '*',
+ undef
+ ],
+ colors => [
+ '#FF0000',
+ 'red',
+ 'green',
+ 'rgb(255, 0, 0)'
+ ],
+ czech_language => 0,
+ debug => 0
+ );
+
+ # Remember that you don't need to specify your own colors.
+ # The default colors should be optimal.
+
+ # Now you can use the object to highlight patterns in a document
+ # by passing content of the document to its highlight() method.
+ # The highlighter object "remembers" its configuration.
+
+ my $highlighted_document = $hl->highlight($document);
+
+
+=head1 MOTIVATION
+
+This module was originaly created to work together with fulltext
+indexing module DBIx::TextIndex to highlight search results.
+
+A need for a highlighter that takes wildcard matches and HTML tags into
+account and supports czech language (or other Slavic languages) was
+the motivation to create this module.
+
+=head1 DESCRIPTION
+
+This module provides Google-like highlighting of words or patterns in HTML
+documents. This feature is typically used to highlight search results.
+
+
+=item The construcutor:
+
+ my $hl = new HTML::Highlight (
+ words => [],
+ wildcards => [],
+ colors => [],
+ czech_language => 0,
+ debug => 0
+ );
+
+This is a constructor of the highlighter object. It takes an array of
+even number of parameters.
+
+
+The B<words> parameter is a reference to an array of words to highlight.
+
+The B<wildcards> parameter is a reference to an array of wildcards, that
+are applied to corresponding words in the B<words> array.
+
+A wildcard can be either undef or one of '%' or '*'.
+
+B<The "%" character> means "match any characters":
+
+ "%" applied to 'car' ==> matches "car", "cars", "careful", ...
+
+
+B<The "*" character> means "match also plural form of the word":
+
+ "*" applied to 'car' ==> matches only "car" or "cars"
+
+
+B<An undefined wildcard> means "match exactly the corresponding word":
+
+ undefined wildcard applied to 'car' ==> matches only "car"
+
+
+
+The B<colors> parameter is a reference to an array of CSS color
+identificators, that are used to highlight the corresponding words in
+the B<words> array.
+
+Default Google-like colors are used if you don't specify your own
+colors. Number of colors can be lower than number of words - in this case
+the colors are rotated and some of the words are therefore
+highlighted using the same color.
+
+The highlighter takes HTML tags into account and therefore does not
+"highlight" a word or a pattern inside a tag.
+
+A support for diacritics insenstive matching for ISO-8859-2 languages (for
+for example the czech language) can be activated using the B<czech_language>
+option. This feature requires a module B<CzFast> that is available on CPAN in
+a directory of author TRIPIE or at http://geocities.com/tripiecz/.
+
+B<Your system's locales must be set correctly to use the
+czech_language feature.>
+
+
+=item highlight
+
+ my $hl_document = $hl->highlight($document);
+
+The only parameter is a document in that you want
+to highlight the words that were passed to the constructor of the
+highlighter object. The method returns a version of the document in which
+the words are highlighted.
+
+
+=item preview_context
+
+ my $sections = $hl->preview_context($document, $num);
+
+
+This method takes two parameters. The first one is the document you
+want to scan for the words that were passed to the constructor of the
+highlighter object. The second parameter is an optional integer
+that specifies maximum number of characters in each of the context
+sections (see below). This parameter defaults to 80
+characters if it's not specified. Minimum allowed value of this
+parameter is 60.
+
+The method returns a reference to an array of sections of the document
+in which the words that were passed to the constructor appear.
+HTML tags are removed before the document is proccessed and are
+not present in the ouput.
+This feature is typically used in search engines to preview a context
+in which words from a search query appear in the resulting documents.
+The words are always in the middle of each of the sections. The
+number of sections this method returns is equal to the number of words
+passed to the constructor of the highlighter object.
+That means only the first occurence of each of the words is taken into
+account.
+
+=head1 SUPPORT
+
+No official support is provided, but I welcome any comments, patches
+and suggestions on my email.
+
+=head1 BUGS
+
+I am aware of no bugs.
+
+=head1 AVAILABILITY
+
+ http://geocities.com/tripiecz/
+
+=head1 AUTHOR
+
+B<Tomas Styblo>, tripie at cpan.org, CPAN-ID TRIPIE
+
+Prague, the Czech republic
+
+=head1 LICENSE
+
+HTML::Highlight - A module to highlight words or patterns in HTML documents
+
+Copyright (C) 2000 Tomas Styblo (tripie at cpan.org)
+
+This module is free software; you can redistribute it and/or modify it
+under the terms of either:
+
+a) the GNU General Public License as published by the Free Software
+Foundation; either version 1, or (at your option) any later version,
+or
+
+b) the "Artistic License" which comes with this module.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either
+the GNU General Public License or the Artistic License for more details.
+
+You should have received a copy of the Artistic License with this
+module, in the file Artistic. If not, I'll be glad to provide one.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+=head1 SEE ALSO
+
+perl(1).
+
+=cut
Added: branches/upstream/libhtml-highlight-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-highlight-perl/current/MANIFEST?rev=9166&op=file
==============================================================================
--- branches/upstream/libhtml-highlight-perl/current/MANIFEST (added)
+++ branches/upstream/libhtml-highlight-perl/current/MANIFEST Fri Nov 9 23:56:48 2007
@@ -1,0 +1,8 @@
+Changes
+MANIFEST
+Makefile.PL
+README
+Highlight.pm
+test.pl
+TODO
+VERSION
Added: branches/upstream/libhtml-highlight-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-highlight-perl/current/Makefile.PL?rev=9166&op=file
==============================================================================
--- branches/upstream/libhtml-highlight-perl/current/Makefile.PL (added)
+++ branches/upstream/libhtml-highlight-perl/current/Makefile.PL Fri Nov 9 23:56:48 2007
@@ -1,0 +1,10 @@
+
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+
+WriteMakefile(
+ 'NAME' => 'HTML::Highlight',
+ 'VERSION_FROM' => 'Highlight.pm'
+);
+
Added: branches/upstream/libhtml-highlight-perl/current/README
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-highlight-perl/current/README?rev=9166&op=file
==============================================================================
--- branches/upstream/libhtml-highlight-perl/current/README (added)
+++ branches/upstream/libhtml-highlight-perl/current/README Fri Nov 9 23:56:48 2007
@@ -1,0 +1,58 @@
+
+
+HTML::Highlight - A module to highlight words or patterns in HTML documents
+
+This module provides colour Google-style highlighting of words or
+patterns in HTML documents. This feature is typically used to highlight
+search results. Each specified pattern or word is highlighted using
+a different color.
+
+This module was originaly created to work together with fulltext
+indexing module DBIx::TextIndex, but it can be used independently.
+
+Need for a highlighter that supports wildcards and phrases and takes
+HTML tags into account was my motivation to create the module.
+
+
+ INSTALLATION:
+
+ perl Makefile.PL
+ make
+ make test
+
+ (become root)
+
+ make install
+
+
+ DOCUMENTATION:
+
+ perldoc HTML::Highlight
+
+
+ LICENSE
+
+ Copyright (C) 2000 Tomas Styblo (tripie at cpan.org)
+
+ This module is free software; you can redistribute it and/or modify it
+ under the terms of either:
+
+ a) the GNU General Public License as published by the Free Software
+ Foundation; either version 1, or (at your option) any later version,
+ or
+
+ b) the "Artistic License" which comes with this module.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either
+ the GNU General Public License or the Artistic License for more details.
+
+ You should have received a copy of the Artistic License with this
+ module, in the file ARTISTIC. If not, I'll be glad to provide one.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA
+
Added: branches/upstream/libhtml-highlight-perl/current/TODO
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-highlight-perl/current/TODO?rev=9166&op=file
==============================================================================
(empty)
Added: branches/upstream/libhtml-highlight-perl/current/VERSION
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-highlight-perl/current/VERSION?rev=9166&op=file
==============================================================================
--- branches/upstream/libhtml-highlight-perl/current/VERSION (added)
+++ branches/upstream/libhtml-highlight-perl/current/VERSION Fri Nov 9 23:56:48 2007
@@ -1,0 +1,1 @@
+0.20
Added: branches/upstream/libhtml-highlight-perl/current/test.pl
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-highlight-perl/current/test.pl?rev=9166&op=file
==============================================================================
--- branches/upstream/libhtml-highlight-perl/current/test.pl (added)
+++ branches/upstream/libhtml-highlight-perl/current/test.pl Fri Nov 9 23:56:48 2007
@@ -1,0 +1,90 @@
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.pl'
+
+######################### We start with some black magic to print on failure.
+
+BEGIN { $| = 1; print "1..last_test_to_print\n"; }
+END { print "not ok 1\n" unless $loaded; }
+
+use HTML::Highlight;
+# require "Highlight.pm";
+
+###
+
+print "2..first highlighting test\n";
+
+my $doc = qq{
+<html>
+<text tag="value">Blah</text>
+Textual car misinformation reality sex pride is your own destiny
+purity seduction of miserable voices characters. For all your needs in this
+world of piss and misbehaviour. We all are bullshit people here. No way from
+this horrible place of death and sorrow.
+<character>Text</character>
+Contextual
+</html>
+};
+
+my $hl = new HTML::Highlight (
+ words => [ 'blah', 'text', 'character', 'span' ],
+ wildcards => [ undef, '%', '*' ],
+ colors => [ 'red', 'green' ],
+ debug => 0
+);
+
+my $hldoc = $hl->highlight($doc);
+
+if ($hldoc eq qq{
+<html>
+<text tag="value"><span style="background-color: red">Blah</span></text>
+<span style="background-color: green">Textual</span> car misinformation reality sex pride is your own destiny
+purity seduction of miserable voices <span style="background-color: red">characters</span>. For all your needs in this
+world of piss and misbehaviour. We all are bullshit people here. No way from
+this horrible place of death and sorrow.
+<character><span style="background-color: green">Text</span></character>
+Contextual
+</html>
+}) {
+ print "ok 2\n";
+}
+else {
+ print "not ok 2\n";
+}
+
+#print "$doc\n";
+#print "$hldoc\n";
+
+###
+
+print "3..first preview context test\n";
+
+my $sections = $hl->preview_context($doc, 10);
+
+=item
+print "$sections\n";
+my $len = @{$sections};
+print "len = $len\n";
+{
+ local $, = "\n---\n";
+ print @{$sections};
+}
+print "\n";
+=cut
+
+if ($sections->[0] eq qq{
+
+Blah
+Textual car misinformation reality sex}
+ and $sections->[1] eq qq{seduction of miserable voices characters. For all your needs in this
+world of}) {
+ print "ok 3\n";
+}
+else {
+ print "not ok 3\n";
+}
+
+###
+
+$loaded = 1;
+print "ok 1\n";
More information about the Pkg-perl-cvs-commits
mailing list