r12039 - in /branches/upstream/libhtml-copy-perl: ./ current/ current/bin/ current/lib/ current/lib/HTML/ current/t/

mogaal-guest at users.alioth.debian.org mogaal-guest at users.alioth.debian.org
Sat Jan 5 07:00:46 UTC 2008


Author: mogaal-guest
Date: Sat Jan  5 07:00:46 2008
New Revision: 12039

URL: http://svn.debian.org/wsvn/?sc=1&rev=12039
Log:
[svn-inject] Installing original source of libhtml-copy-perl

Added:
    branches/upstream/libhtml-copy-perl/
    branches/upstream/libhtml-copy-perl/current/
    branches/upstream/libhtml-copy-perl/current/Changes
    branches/upstream/libhtml-copy-perl/current/MANIFEST
    branches/upstream/libhtml-copy-perl/current/META.yml
    branches/upstream/libhtml-copy-perl/current/Makefile.PL   (with props)
    branches/upstream/libhtml-copy-perl/current/README
    branches/upstream/libhtml-copy-perl/current/bin/
    branches/upstream/libhtml-copy-perl/current/bin/htmlcopy   (with props)
    branches/upstream/libhtml-copy-perl/current/lib/
    branches/upstream/libhtml-copy-perl/current/lib/HTML/
    branches/upstream/libhtml-copy-perl/current/lib/HTML/Copy.pm
    branches/upstream/libhtml-copy-perl/current/t/
    branches/upstream/libhtml-copy-perl/current/t/00-load.t
    branches/upstream/libhtml-copy-perl/current/t/parse.t

Added: branches/upstream/libhtml-copy-perl/current/Changes
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/Changes?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/Changes (added)
+++ branches/upstream/libhtml-copy-perl/current/Changes Sat Jan  5 07:00:46 2008
@@ -1,0 +1,30 @@
+Revision history for Perl extension HTML::Copy.
+
+1.22 2007-08-10
+    * add HTTP::Headers to Makefile.PL as a prerequired module(PREREQ_PM).
+
+1.21 2007-08-09
+    * copy_to and parse_to can be accept a path of directory.
+    * automatically make missing directory
+    
+1.2 2007-05-31
+    * support XHTML
+    * process with taking account of <base> tag
+    * use URI module instead of File::Spec to convert link path.
+        * HTML::Copy may work on non-unix platform.
+    
+1.13 2007-03-22
+    * fix a problem that processing a link including an anchor is not correct.
+
+1.12 2007-02-22
+    * fix error in POD
+    * use Class::Accessor
+    * fix invalid warnning when a file does not exist
+    
+1.11 2007-02-19
+    * blush up POD
+    * blush up code
+    * add use 5.008
+    
+1.1
+    * First version to update CPAN

Added: branches/upstream/libhtml-copy-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/MANIFEST?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/MANIFEST (added)
+++ branches/upstream/libhtml-copy-perl/current/MANIFEST Sat Jan  5 07:00:46 2008
@@ -1,0 +1,9 @@
+Changes
+README
+MANIFEST
+Makefile.PL
+lib/HTML/Copy.pm
+bin/htmlcopy
+t/00-load.t
+t/parse.t
+META.yml                                 Module meta-data (added by MakeMaker)

Added: branches/upstream/libhtml-copy-perl/current/META.yml
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/META.yml?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/META.yml (added)
+++ branches/upstream/libhtml-copy-perl/current/META.yml Sat Jan  5 07:00:46 2008
@@ -1,0 +1,15 @@
+# http://module-build.sourceforge.net/META-spec.html
+#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
+name:         HTML-Copy
+version:      1.22
+version_from: lib/HTML/Copy.pm
+installdirs:  site
+requires:
+    Class::Accessor:               0
+    HTML::Parser:                  3.4
+    HTTP::Headers:                 0
+    Test::More:                    0
+    URI:                           0
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.17

Added: branches/upstream/libhtml-copy-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/Makefile.PL?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/Makefile.PL (added)
+++ branches/upstream/libhtml-copy-perl/current/Makefile.PL Sat Jan  5 07:00:46 2008
@@ -1,0 +1,22 @@
+#!perl -w
+use 5.008;
+use strict;
+use ExtUtils::MakeMaker;
+
+my @programs_to_install = qw(htmlcopy);
+
+WriteMakefile(
+    'NAME'          => 'HTML::Copy',
+    'VERSION_FROM'  => 'lib/HTML/Copy.pm',
+    'EXE_FILES'     => [ map {"bin/$_"} @programs_to_install ],
+    'PREREQ_PM'     => {
+        'Test::More'   => 0,
+        'HTML::Parser' => 3.40,
+        'HTTP::Headers' => 0,
+        'Class::Accessor' => 0,
+        'URI' => 0
+    },
+    'dist'          => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },	
+    'clean'         => { FILES => 'HTML-Copy-*' },
+
+);

Propchange: branches/upstream/libhtml-copy-perl/current/Makefile.PL
------------------------------------------------------------------------------
    svn:executable = 

Added: branches/upstream/libhtml-copy-perl/current/README
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/README?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/README (added)
+++ branches/upstream/libhtml-copy-perl/current/README Sat Jan  5 07:00:46 2008
@@ -1,0 +1,38 @@
+HTML-Copy
+======================
+
+HTML::Copy copy a HTML file without breaking links in the file. HTML::Copy will be useful to mainten web site and to handle HTML templates. 
+
+This package provide a perl module "HTML::Copy" and a command line tool "htmlcopy".
+
+
+
+== INSTALLATION
+
+To install this module type the following:
+
+   perl Makefile.PL
+   make
+   make test
+   make install
+
+== DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+  HTML::Parser
+  Class::Accessor
+  URI
+
+== COPYRIGHT
+
+Put the correct copyright and licence information here.
+
+Copyright (C) 2007 by Tetsuro KURITA
+mailto:tkurita at mac.com
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.8.3 or,
+at your option, any later version of Perl 5 you may have available.
+
+

Added: branches/upstream/libhtml-copy-perl/current/bin/htmlcopy
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/bin/htmlcopy?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/bin/htmlcopy (added)
+++ branches/upstream/libhtml-copy-perl/current/bin/htmlcopy Sat Jan  5 07:00:46 2008
@@ -1,0 +1,72 @@
+#!/usr/bin/env perl -w
+
+use strict;
+use File::Basename;
+use File::Spec;
+use HTML::Copy;
+use Getopt::Long;
+use Pod::Usage;
+
+our $VERSION = '1.22';
+
+{
+	my $man = 0;
+	my $help = 0;
+  
+	GetOptions('help|?' => \$help, man => \$man) or pod2usage(2);
+	pod2usage(-exitstatus => 0, -verbose => 1) if $help;
+	pod2usage(-exitstatus => 0, -verbose => 2) if $man;
+
+	if (@ARGV > 2) {
+		pod2usage(-message => 'Too many arguments.', 
+				-exitstatus => 1, -verbose => 1)
+  }
+
+	if (@ARGV < 2) {
+		pod2usage(-message => 'Required arguments is not given.', 
+				-exitstatus => 1, -verbose => 1)
+	}
+
+	my ($source_path, $target_path) = @ARGV;
+
+	my $p = HTML::Copy->new($source_path);
+	#$p->set_encode_suspects(qw/euc-jp shiftjis 7bit-jis/);
+	$p->copy_to($target_path);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+htmlcopy -- Copy a HTML file without breaking links.
+
+=head1 SYNOPSIS
+
+ htmlcopy [OPTION] SOURCE DESTINATION
+ htmlcopy [OPTION] SOURCE DIRECTORY
+
+=head1 DESCRIPTION
+
+htmlcopy a source HTML file into DESTINATION or DIRECTORY. If the HTML file have links to images, other HTML files, javascripts and cascading style sheets, htmlcopy changing link path in the HTML file to keep the link destination.
+
+=head1 OPTIONS
+
+=over 4
+
+=item -h, --help
+
+Print a brief help message and exits.
+
+=item -m, --man
+
+Prints the manual page and exits.
+
+=back
+
+=head1 AUTHOR
+
+Tetsuro KURITA <tkurita at mac.com>
+
+=cut

Propchange: branches/upstream/libhtml-copy-perl/current/bin/htmlcopy
------------------------------------------------------------------------------
    svn:executable = 

Added: branches/upstream/libhtml-copy-perl/current/lib/HTML/Copy.pm
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/lib/HTML/Copy.pm?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/lib/HTML/Copy.pm (added)
+++ branches/upstream/libhtml-copy-perl/current/lib/HTML/Copy.pm Sat Jan  5 07:00:46 2008
@@ -1,0 +1,415 @@
+package HTML::Copy;
+
+use 5.008;
+use strict;
+use warnings;
+use File::Spec;
+use File::Basename;
+use File::Path;
+use Cwd;
+use IO::File;
+use utf8;
+use Encode;
+use Encode::Guess;
+use Carp;
+use Data::Dumper;
+
+use HTML::Parser 3.40;
+use HTML::HeadParser;
+use URI::file;
+
+use base qw(HTML::Parser Class::Accessor);
+
+__PACKAGE__->mk_accessors(qw(source_path
+                            destination_path
+                            link_attributes
+                            has_base
+                            source_uri
+                            destination_uri));
+
+#use Data::Dumper;
+
+=head1 NAME
+
+HTML::Copy - copy a HTML file without breaking links.
+
+=head1 VERSION
+
+Version 1.22
+
+=cut
+
+our $VERSION = '1.22';
+
+=head1 SYMPOSIS
+
+  use HTML::Copy;
+  
+  HTML::Copy->htmlcopy($source_path, $destination_path);
+  
+  # or
+  
+  $p = HTML::Copy->new($source_path);
+  $p->copy_to($destination_path);
+
+=head1 DESCRIPTION
+
+This module is to copy a HTML file without beaking links in the file. This module is a sub class of HTML::Parser.
+
+=head1 REQUIRED MODULES
+
+=over 2
+
+=item L<HTML::Parser>
+
+=back
+
+=head1 CLASS METHODS
+
+=head2 htmlcopy
+
+    HTML::Copy->htmlcopy($source_path, $destination_path);
+
+Parse contents of $source_path, change links and write into $destination_path.
+
+=cut
+
+sub htmlcopy($$$) {
+    my ($class, $source_path, $destination_path) = @_;
+    my $p = $class->new($source_path);
+    return $p->copy_to($destination_path);
+}
+
+=head2 parse_file
+
+    $html_text = HTML::Copy->parse_file($source_path, $destination_path);
+
+Parse contents of $source_path and change links to copy into $destination_path. But don't make $destination_path. Just return modified HTML. The encoding of strings is converted into utf8.
+
+=cut
+
+sub parse_file($$$) {
+    my ($class, $source_path, $destination_path) = @_;
+    my $p = $class->new($source_path);
+    return $p->parse_to($destination_path);
+}
+
+
+=head1 CONSTRUCTOR METHODS
+
+=head2 new
+
+    $p = HTML::Copy->new($source_path);
+
+Make an instance of this module.
+
+=cut
+
+sub new {
+    my $class = shift @_;
+    my $self = $class->SUPER::new();
+    if (@_ > 1) {
+        my %args = @_;
+        my @keys = keys %args;
+        @$self{@keys} = @args{@keys};
+    } else {
+        $self->source_path(shift @_);
+    }
+    
+    if ($self->source_path) {
+        (-e $self->source_path) or croak $self->source_path." is not found.\n";
+        $self->source_path($self->source_path);
+    }
+    
+    $self->link_attributes(['src', 'href', 'background', 'csref', 'livesrc']);
+    # 'livesrc' and 'csref' are uesed in Adobe GoLive
+    $self->has_base(0);
+    
+    return $self;
+}
+
+
+=head1 INSTANCE METHODS
+
+=head2 copy_to
+
+    $p->copy_to($destination_path)
+
+Parse contents of $source_path given in new method, change links and write into $destination_path.
+
+=cut
+
+sub copy_to {
+    my ($self, $destination_path) = @_;
+    $destination_path = $self->set_destination($destination_path);
+    my $io_layer = $self->io_layer();
+    
+    my $fh = IO::File->new($destination_path, ">$io_layer");
+    
+    if (defined $fh) {
+        $self->{'outputHTML'} = $fh;
+        $self->SUPER::parse($self->{'source_html'});
+        $self->eof;
+        $fh->close;
+    } else {
+        die "can't open $destination_path.";
+    }
+    
+    return $self->destination_path;
+}
+
+=head2 parse_to
+
+    $p->parse_to($destination_path)
+
+Parse contents of $source_path given in new method, change links and return HTML contents to wirte $destination_path. Unlike copy_to, $destination_path will not created.
+
+=cut
+
+sub parse_to {
+    my ($self, $destination_path) = @_;
+    $destination_path = $self->set_destination($destination_path);
+    $self->io_layer;
+    
+    my $output = '';
+    my $fh = IO::File->new(\$output, ">:utf8");
+    $self->{'outputHTML'} = $fh;
+    $self->SUPER::parse($self->{'source_html'});
+    $self->eof;
+    $fh->close;
+    return decode_utf8($output);
+}
+
+=head1 ACCESSOR METHODS
+
+=head2 io_layer
+
+    $p->io_layer;
+    $p->io_layer(':utf8');
+
+Get and set PerlIO layer to read $source_path and to write $destination_path. Usualy it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
+
+=cut
+
+sub io_layer {
+    my $self = shift @_;
+    if (@_) {
+        $self->{'io_layer'} = shift @_;
+    }
+    else {
+        unless ($self->{'io_layer'}) {
+            $self->{'io_layer'} = $self->check_io_layer();
+        }
+    }
+    
+    return $self->{'io_layer'};
+}
+
+=head2 encode_suspects
+
+    @suspects = $p->encode_sustects;
+    $p->encode_suspects(qw/shiftjis euc-jp/);
+
+Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not requred to add suspects.
+
+=cut
+
+sub encode_suspects {
+    my $self = shift @_;
+    
+    if (@_) {
+        my @suspects = @_;
+        $self->{'EncodeSuspects'} = \@suspects;
+    }
+    
+    if (my $suspects_ref = $self->{'EncodeSuspects'}) {
+        return @$suspects_ref;
+    }
+    else {
+        return ();
+    }
+}
+
+=head2 source_html
+
+    $p->source_html;
+
+Obtain source HTML's contents
+
+=cut
+
+sub source_html {
+    my ($self) = @_;
+    $self->io_layer;
+    return $self->{'source_html'};
+}
+
+=head1 AUTHOR
+
+Tetsuro KURITA <tkurita at mac.com>
+
+=cut
+
+##== overriding methods of HTML::Parser
+
+sub declaration { $_[0]->output("<!$_[1]>")     }
+sub process     { $_[0]->output($_[2])          }
+sub comment     { $_[0]->output("<!--$_[1]-->") }
+sub end         { $_[0]->output($_[2])          }
+sub text        { $_[0]->output($_[1])          }
+
+sub start {
+    my ($self, $tag, $attr_dict, $attr_names, $tag_text) = @_; 
+    
+    unless ($self->has_base) {
+        if ($tag eq 'base') {
+            $self->has_base(1);
+        }
+        
+        my $is_changed = 0;
+        foreach my $an_attr (@{$self->link_attributes}) {
+            if (exists($attr_dict->{$an_attr})){
+                my $link_path = $attr_dict->{$an_attr};
+                next if ($link_path =~ /^\$/);
+                my $uri = URI->new($link_path);
+                next if ($uri->scheme);
+                $is_changed = 1;
+                $attr_dict->{$an_attr} = $self->change_link($uri);
+            }
+        }
+    
+        if ($is_changed) {
+            my $attrs_text = $self->build_attributes($attr_dict, $attr_names);
+            $tag_text = "<$tag $attrs_text>";
+        }
+    }
+    
+    $self->output($tag_text);
+}
+
+##== private functions
+
+sub set_destination {
+    my ($self, $destination_path) = @_;
+
+    if (-d $destination_path) {
+        my $file_name = basename($self->source_path);
+        $destination_path = File::Spec->catfile($destination_path, $file_name);
+    } else {
+        mkpath(dirname($destination_path));
+    }
+
+    return $self->destination_path($destination_path);
+}
+
+sub check_encoding {
+    my ($self) = @_;
+    my $data;
+    open my $in, "<", $self->source_path;
+    {local $/; $data = <$in>;}
+    close $in;
+    
+    my $p = HTML::HeadParser->new;
+    $p->utf8_mode(1);
+    $p->parse($data);
+    my $content_type = $p->header('content-type');
+    my $encoding = '';
+    if ($content_type) {
+        if ($content_type =~ /charset\s*=(.+)/) {
+            $encoding = $1;
+        }
+    }
+    
+    unless ($encoding) {
+        my $decoder;
+        if (my @suspects = $self->encode_suspects) {
+            $decoder = Encode::Guess->guess($data, @suspects);
+        }
+        else {
+            $decoder = Encode::Guess->guess($data);
+        }
+        
+        ref($decoder) or 
+                    die("Can't guess encoding of ".$self->source_path);
+                    
+        $encoding = $decoder->name;
+    }
+    
+    $self->{'source_html'} = Encode::decode($encoding, $data);
+    
+    return $encoding;
+}
+
+sub check_io_layer {
+    my ($self) = @_;
+    my $encoding = $self->check_encoding;
+    return '' unless ($encoding);
+    
+    my $io_layer = '';
+    if (grep {/$encoding/} ('utf8', 'utf-8', 'UTF-8') ) {
+        $io_layer = ":utf8";
+    }
+    else {
+        $io_layer = ":encoding($encoding)";
+    }
+    return $io_layer;
+}
+
+sub build_attributes {
+    my ($self, $attr_dict, $attr_names) = @_;
+    my @attrs = ();
+    foreach my $attr_name (@{$attr_names}) {
+        if ($attr_name eq '/') {
+            push @attrs, '/';
+        } else {
+            my $attr_value = $attr_dict->{$attr_name};
+            push @attrs, "$attr_name=\"$attr_value\"";
+        }
+    }
+    return join(' ', @attrs);
+}
+
+sub change_link {
+    my ($self, $uri) = @_;
+    my $result_uri;
+    my $abs_uri = $uri->abs( $self->source_uri );
+    my $abs_path = $abs_uri->file;
+
+    if (-e $abs_path) {
+        $result_uri = $abs_uri->rel($self->destination_uri);
+    } else {
+        warn("$abs_path is not found.\nThe link to this path is not changed.\n");
+        $result_uri = $uri;
+    }
+    
+    return $result_uri->as_string;
+}
+
+sub output {
+    my ($self, $out_text) = @_;
+    $self->{'outputHTML'}->print($out_text);
+}
+
+sub source_path {
+    my $self = shift @_;
+    
+    if (@_) {
+        my $path = Cwd::abs_path(shift @_);
+        $self->{'source_path'} = $path;
+        $self->source_uri(URI::file->new($path));
+    }
+    return $self->{'source_path'};
+}
+
+sub destination_path {
+    my $self = shift @_;
+    
+    if (@_) {
+        my $path = Cwd::abs_path(shift @_);
+        $self->{'destination_path'} = $path;
+        $self->destination_uri(URI::file->new($path));
+    }
+    return $self->{'destination_path'};
+}
+
+1;

Added: branches/upstream/libhtml-copy-perl/current/t/00-load.t
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/t/00-load.t?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/t/00-load.t (added)
+++ branches/upstream/libhtml-copy-perl/current/t/00-load.t Sat Jan  5 07:00:46 2008
@@ -1,0 +1,9 @@
+#!perl -T
+use 5.008;
+use Test::More tests => 1;
+
+BEGIN {
+	use_ok( 'HTML::Copy' );
+}
+
+diag( "Testing HTML::Copy $HTML::Copy::VERSION, Perl $], $^X" );

Added: branches/upstream/libhtml-copy-perl/current/t/parse.t
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-copy-perl/current/t/parse.t?rev=12039&op=file
==============================================================================
--- branches/upstream/libhtml-copy-perl/current/t/parse.t (added)
+++ branches/upstream/libhtml-copy-perl/current/t/parse.t Sat Jan  5 07:00:46 2008
@@ -1,0 +1,250 @@
+#escapeChars {return}
+#use lib '../lib';
+use strict;
+use warnings;
+use HTML::Copy;
+use utf8;
+use File::Spec;
+#use Data::Dumper;
+
+use Test::More tests => 12;
+
+my $linked_html = <<EOT;
+<!DOCTYPE html>
+<html>
+</html>
+EOT
+
+my $linked_file_name = "linked$$.html";
+open(my $linked_fh, ">", $linked_file_name);
+print $linked_fh $linked_html;
+close $linked_fh;
+
+##== HTML data without charsets
+my $source_html_nocharset = <<EOT;
+<!DOCTYPE html>
+<html>
+ああ
+<a href="$linked_file_name#anchor"></a>
+<frame src="$linked_file_name">
+<img src="$linked_file_name">
+<script src="$linked_file_name"></script>
+<link href="$linked_file_name">
+</html>
+EOT
+
+my $result_html_nocharset = <<EOT;
+<!DOCTYPE html>
+<html>
+ああ
+<a href="../$linked_file_name#anchor"></a>
+<frame src="../$linked_file_name">
+<img src="../$linked_file_name">
+<script src="../$linked_file_name"></script>
+<link href="../$linked_file_name">
+</html>
+EOT
+
+##== write test data
+my $sub_dir_name = "sub$$";
+mkdir($sub_dir_name);
+my $src_file_name = "file$$.html";
+my $destination = File::Spec->catfile($sub_dir_name, $src_file_name);
+
+##== Test code with no charsets HTML
+open(my $src_fh, ">:utf8", $src_file_name);
+print $src_fh $source_html_nocharset;
+close $src_fh;
+
+##=== parse_to UTF-8
+my $p = HTML::Copy->new($src_file_name);
+my $copy_html = $p->parse_to($destination);
+
+ok($copy_html eq $result_html_nocharset, "parse_to no charset UTF-8");
+
+##=== copty_to UTF8
+$p->copy_to($destination);
+open(my $in, "<".$p->io_layer(), $destination);
+{local $/; $copy_html = <$in>};
+close $in;
+unlink($destination);
+
+ok($copy_html eq $result_html_nocharset, "copy_to no charset UTF-8");
+
+##=== write data with shift_jis
+open($src_fh, ">:encoding(shiftjis)", $src_file_name);
+print $src_fh $source_html_nocharset;
+close $src_fh;
+
+##=== parse_to shift_jis
+$p = HTML::Copy->new($src_file_name);
+$p->encode_suspects("shiftjis");
+$copy_html = $p->parse_to("$sub_dir_name/$src_file_name");
+
+ok($copy_html eq $result_html_nocharset, "parse_to no charset shift_jis");
+
+##=== copy_to shift_jis
+$p->copy_to($destination);
+open($in, "<".$p->io_layer, $destination);
+{local $/; $copy_html = <$in>};
+close $in;
+unlink($destination);
+
+ok($copy_html eq $result_html_nocharset, "copy_to no charset shift_jis");
+
+##== HTML with charset uft-8
+my $src_html_utf8 = <<EOT;
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html;charset=utf-8">
+</head>
+ああ
+<a href="$linked_file_name"></a>
+<frame src="$linked_file_name">
+<img src="$linked_file_name">
+<script src="$linked_file_name"></script>
+<link href="$linked_file_name">
+</html>
+EOT
+
+my $result_html_utf8 = <<EOT;
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html;charset=utf-8">
+</head>
+ああ
+<a href="../$linked_file_name"></a>
+<frame src="../$linked_file_name">
+<img src="../$linked_file_name">
+<script src="../$linked_file_name"></script>
+<link href="../$linked_file_name">
+</html>
+EOT
+
+##== Test code with charset utf-8
+open($src_fh, ">:utf8", $src_file_name);
+print $src_fh $src_html_utf8;
+close $src_fh;
+
+##=== parse_to
+$p = HTML::Copy->new($src_file_name);
+$copy_html = $p->parse_to($destination);
+
+ok($copy_html eq $result_html_utf8, "parse_to charset UTF-8");
+
+##=== copy_to
+$p->copy_to($destination);
+open($in, "<".$p->io_layer(), $destination);
+{local $/; $copy_html = <$in>};
+close $in;
+unlink($destination);
+
+ok($copy_html eq $result_html_utf8, "copy_to charset UTF-8");
+
+##== HTML with charset shift_jis
+my $src_html_shiftjis = <<EOT;
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html;charset=shift_jis">
+</head>
+ああ
+<a href="$linked_file_name"></a>
+<frame src="$linked_file_name">
+<img src="$linked_file_name">
+<script src="$linked_file_name"></script>
+<link href="$linked_file_name">
+</html>
+EOT
+
+my $result_html_shiftjis = <<EOT;
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html;charset=shift_jis">
+</head>
+ああ
+<a href="../$linked_file_name"></a>
+<frame src="../$linked_file_name">
+<img src="../$linked_file_name">
+<script src="../$linked_file_name"></script>
+<link href="../$linked_file_name">
+</html>
+EOT
+
+##== Test code with charset shift_jis
+open($src_fh, ">:encoding(shiftjis)", $src_file_name);
+print $src_fh $src_html_shiftjis;
+close $src_fh;
+
+##=== parse_to
+$p = HTML::Copy->new($src_file_name);
+$p->encode_suspects("shiftjis");
+$copy_html = $p->parse_to($destination);
+
+ok($copy_html eq $result_html_shiftjis, "parse_to no charset shift_jis");
+
+##=== copy_to
+$p->copy_to($destination);
+open($in, "<".$p->io_layer, $destination);
+{local $/; $copy_html = <$in>};
+close $in;
+
+ok($copy_html eq $result_html_shiftjis, "copy_to no charset shift_jis");
+unlink($destination);
+
+##== class_methods
+$copy_html = HTML::Copy->parse_file($src_file_name, $destination);
+
+ok($copy_html eq $result_html_shiftjis, "parse_file");
+
+HTML::Copy->htmlcopy($src_file_name, $destination);
+
+open($in, "<".$p->io_layer, $destination);
+{local $/; $copy_html = <$in>};
+close $in;
+
+ok($copy_html eq $result_html_shiftjis, "htmlcopy");
+
+unlink($destination);
+
+##== Test with base url
+my $src_html_base = <<EOT;
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html;charset=utf-8">
+<base href="http://homepage.mac.com/tkurita/scriptfactory/">
+</head>
+ああ
+<a href="$linked_file_name"></a>
+<frame src="$linked_file_name">
+<img src="$linked_file_name">
+<script src="$linked_file_name"></script>
+<link href="$linked_file_name">
+</html>
+EOT
+
+##== Test code with base url
+open($src_fh, ">:utf8", $src_file_name);
+print $src_fh $src_html_base;
+close $src_fh;
+
+##=== parse_to
+$p = HTML::Copy->new($src_file_name);
+$copy_html = $p->parse_to($destination);
+ok($copy_html eq $src_html_base, "parse_to HTML with base URL");
+
+##=== copy_to
+$p->copy_to($destination);
+open($in, "<".$p->io_layer, $destination);
+{local $/; $copy_html = <$in>};
+close $in;
+
+ok($copy_html eq $src_html_base, "copy_to HTML with base URL");
+unlink($destination);
+
+unlink($linked_file_name, $src_file_name, $destination);
+rmdir($sub_dir_name);




More information about the Pkg-perl-cvs-commits mailing list