r25418 - in /branches/upstream/html-munger: ./ current/ current/Changes current/MANIFEST current/Makefile.PL current/Munger.pm current/README current/test.pl

gregoa at users.alioth.debian.org gregoa at users.alioth.debian.org
Fri Sep 19 20:53:23 UTC 2008


Author: gregoa
Date: Fri Sep 19 20:53:20 2008
New Revision: 25418

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=25418
Log:
[svn-inject] Installing original source of html-munger

Added:
    branches/upstream/html-munger/
    branches/upstream/html-munger/current/
    branches/upstream/html-munger/current/Changes
    branches/upstream/html-munger/current/MANIFEST
    branches/upstream/html-munger/current/Makefile.PL
    branches/upstream/html-munger/current/Munger.pm
    branches/upstream/html-munger/current/README
    branches/upstream/html-munger/current/test.pl

Added: branches/upstream/html-munger/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/html-munger/current/Changes?rev=25418&op=file
==============================================================================
--- branches/upstream/html-munger/current/Changes (added)
+++ branches/upstream/html-munger/current/Changes Fri Sep 19 20:53:20 2008
@@ -1,0 +1,1 @@
+0.01	- first public release

Added: branches/upstream/html-munger/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/html-munger/current/MANIFEST?rev=25418&op=file
==============================================================================
--- branches/upstream/html-munger/current/MANIFEST (added)
+++ branches/upstream/html-munger/current/MANIFEST Fri Sep 19 20:53:20 2008
@@ -1,0 +1,6 @@
+MANIFEST
+Changes
+Makefile.PL
+Munger.pm
+test.pl
+README

Added: branches/upstream/html-munger/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/html-munger/current/Makefile.PL?rev=25418&op=file
==============================================================================
--- branches/upstream/html-munger/current/Makefile.PL (added)
+++ branches/upstream/html-munger/current/Makefile.PL Fri Sep 19 20:53:20 2008
@@ -1,0 +1,6 @@
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+    'NAME'	   => 'HTML::Munger',
+    'VERSION_FROM' => 'Munger.pm',
+);

Added: branches/upstream/html-munger/current/Munger.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/html-munger/current/Munger.pm?rev=25418&op=file
==============================================================================
--- branches/upstream/html-munger/current/Munger.pm (added)
+++ branches/upstream/html-munger/current/Munger.pm Fri Sep 19 20:53:20 2008
@@ -1,0 +1,229 @@
+#!/usr/local/bin/perl -w
+
+package HTML::Munger;
+
+use strict;
+use integer;
+
+use HTML::Parser;
+
+use vars qw($VERSION @ISA);
+
+$VERSION = '0.01';
+ at ISA     = ('HTML::Parser');
+
+# constructor
+sub new {
+    my $proto = shift || return undef;
+    my $class = ref($proto) || $proto;
+    my $self  = $class->SUPER::new();
+
+    $self->{'munger'} = undef;
+
+    bless $self, $class;
+    return $self;
+}
+
+
+# munging code
+sub munge {
+    my $self           = shift || return undef;
+    $self->{'URL'}     = shift || return undef;
+    $self->{'selfURL'} = shift || return undef;
+    my $content        = shift || return undef;
+
+    $self->{'munged'} = '';
+
+    # parse the hostname
+    $self->{'host'} = $self->{'URL'};
+    $self->{'host'} =~ s/^http:\/\///i;
+    $self->{'host'} .= "/";
+    $self->{'host'} =~ s/\/.*$//;
+
+    # parse the directory
+    $self->{'dir'} = $self->{'URL'};
+    $self->{'dir'} =~ s/\/[^\/]*$/\//;
+    ($self->{'dir'} !~ /^\//) && ($self->{'dir'} = '/' . $self->{'dir'});
+
+    # debugging
+    $self->{'munged'} .= '<!-- URL = ' . $self->{'URL'} . ', host = '
+                      .  $self->{'host'} . ', dir = ' . $self->{'dir'}
+                      .  ' -->';
+
+    $self->parse($content);
+    $self->eof();
+
+    return $self->{'munged'};
+}
+
+
+sub set_munger {
+    my($self, $coderef) = @_;
+    
+    $self->{'munger'} = $coderef;
+
+    return(undef);
+}
+
+
+sub declaration {
+    my($self, $decl) = @_;
+
+    $self->{'munged'} .= "<!$decl>";
+
+    return(undef);
+}
+
+
+sub start {
+    my($self, $tag, $attr, $attrseq, $origtext) = @_;
+    my($current);
+
+    $self->{'munged'} .= "<$tag";
+    foreach $current (@{$attrseq}) {
+        if (defined($attr->{$current})) {
+            my($currentval) = $attr->{$current};
+            $self->{'munged'} .= " $current=\"";
+        
+            # a few attributes get munged up
+            if ($current =~ /(src|href|codebase|action|background)/i) {
+                if ($currentval =~ /:(\/\/)?/) {
+                    # this is an absolute URL, so we do nothing
+                } elsif ($currentval =~ /^\//) {
+                    # this is an absolute pathname URL (begins with /)
+                    # so we prepend the hostname
+                    $currentval = "http://" . $self->{'host'} . "$currentval";
+                } else {
+                    # assume this is a relative URL, so we'll add both the
+                    # hostname and directory
+                    $currentval = "http://" . $self->{'host'} .
+                                  $self->{'dir'} . "$currentval";
+                }
+            }
+
+            # and a couple of others are further munged
+            if (($current =~ /^href/i)
+               || (($tag =~ /^frame/i) && ($current =~ /^src/i))) {
+                $currentval = $self->{'selfURL'} . "$currentval";
+            }
+
+            $self->{'munged'} .= "$currentval\"";
+        } else {
+            $self->{'munged'} .= " $current";
+        }
+    }
+    $self->{'munged'} .= ">";
+
+    return(undef);
+}
+
+
+sub end {
+    my($self, $tag, $origtext) = @_;
+
+    $self->{'munged'} .= $origtext;
+
+    return(undef);
+}
+
+
+sub text {
+    my($self, $text) = @_;
+
+    if (defined($self->{'munger'})) {
+        $self->{'munged'} .= &{$self->{'munger'}}($text);
+    } else {
+        $self->{'munged'} .= $text;
+    }
+
+    return(undef);
+}
+
+
+sub comment {
+    my($self, $comment) = @_;
+
+    $self->{'munged'} .= "<!--$comment-->";
+
+    return(undef);
+}
+
+1;
+__END__
+
+=head1 NAME
+
+HTML::Munger - Module which simplifies the creation of web filters.
+
+=head1 SYNOPSIS
+
+ use HTML::Munger;
+
+ $munger = new HTML::Munger;
+ $munger->set_munger(\&filter_function);
+ $output = $munger->munge($URL, $selfURL, $input);
+
+=head1 DESCRIPTION
+
+HTML::Munger is a simple module which allows easy creation of web page
+filtering software.  It was first written to build the pootifier at
+http://pootpoot.com/?pootify
+
+The main task which this module performs is attempting to make all the
+relative links on the filtered page absolute, so that images, and hyperlinks
+work correctly.  It also makes frames and hyperlinks properly filter back
+through the filter.
+
+This leaves two major tasks for the user of HTML::Munger: fetching the original
+page, and building a simple munging function.
+
+=head2 API
+
+There are really only three important functions you need to know how to call
+in order to use this module:
+
+=over 3
+
+=item B<new>
+
+This is a simple constructor, which takes no arguments aside from the implicit
+class.  It returns a blessed reference which is used to call the other methods.
+
+=item B<set_munger>
+
+This method registers the filtering function you want to be called to produce
+the filtered text.  The function specified will be called repeatedly with
+short blocks of text.  For example, given the following HTML:
+
+ <P>Hello</P><CENTER>The quick brown <I>fox</I></CENTER>
+
+The filtering function would be called three times, with 'Hello',
+'The quick brown ', and 'fox', respectively, as input.  The filter function
+is expected to return a string which will replace the given input in the
+output of the munge() call.
+
+=item B<munge>
+
+This method takes three arguments.  The first is the URL of the page which is
+being munged.  Note that the 'munge' method does NOT fetch the page for you!
+It needs this information in order to make relative links in the page absolute.
+The second argument is the URL of the filtering program.  This is used to
+make all hyperlinks and frames pass back through the filter.  Finally, it takes
+the input HTML as its third argument.  This method returns the munged HTML
+string, which can then be further parsed or sent to the user.
+
+=back
+
+=head1 BUGS
+
+Hopefully none.
+
+=head1 AUTHOR
+
+J. David Lowe, dlowe at pootpoot.com
+
+=head1 SEE ALSO
+
+perl(1), HTML::Parser(3)
+
+=cut

Added: branches/upstream/html-munger/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/html-munger/current/README?rev=25418&op=file
==============================================================================
--- branches/upstream/html-munger/current/README (added)
+++ branches/upstream/html-munger/current/README Fri Sep 19 20:53:20 2008
@@ -1,0 +1,62 @@
+HTML::Munger - Module which simplifies the creation of web filters.
+
+Copyright
+=========
+
+Copyright (c) 1997-2000 J. David Lowe.  All rights reserved.  This program is
+free software; you can redistribute it and/or modify it under the same terms
+as Perl itself. 
+
+
+Description
+===========
+
+HTML::Munger is a simple module which allows easy creation of web page
+filtering software, such as those listed here:
+
+http://dir.yahoo.com/Entertainment/Humor/By_Topic/Computers_and_Internet/Filters/Web_Based/
+
+It was first written to build the pootifier:
+
+http://pootpoot.com/?pootify
+
+The main task which this module performs is attempting to make all the
+relative links on the filtered page absolute, so that images, and hyperlinks
+work correctly.  It also makes frames and hyperlinks properly filter back
+through the filter.
+
+This leaves two major tasks for the user of HTML::Munger: fetching the
+original page, and building a simple munging function.
+
+
+Prerequisites
+=============
+
+You'll need Perl and the HTML::Parser module.
+
+
+Building
+========
+
+  # perl Makefile.PL
+  # make
+
+
+Installing
+==========
+
+  # make test
+  # make install
+
+
+Additional Information
+======================
+
+If you're using this module, let me know!  It's always fascinating to hear
+what uses people have found for it.
+
+There is a web page for HTML::Munger at http://pootpoot.com/~dlowe/HTML-Munger/
+The latest version can always be found there.
+
+You can contact me directly at dlowe at pootpoot.com.  I'm happy to answer any
+questions you might have.

Added: branches/upstream/html-munger/current/test.pl
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/html-munger/current/test.pl?rev=25418&op=file
==============================================================================
--- branches/upstream/html-munger/current/test.pl (added)
+++ branches/upstream/html-munger/current/test.pl Fri Sep 19 20:53:20 2008
@@ -1,0 +1,20 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.pl'
+
+######################### We start with some black magic to print on failure.
+
+# Change 1..1 below to 1..last_test_to_print .
+# (It may become useful if the test is moved to ./t subdirectory.)
+
+BEGIN { $| = 1; print "1..1\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use HTML::Munger;
+$loaded = 1;
+print "ok 1\n";
+
+######################### End of black magic.
+
+# Insert your test code below (better if it prints "ok 13"
+# (correspondingly "not ok 13") depending on the success of chunk 13
+# of the test code):
+




More information about the Pkg-perl-cvs-commits mailing list