r27609 - in /trunk/libhtml-parser-perl: ./ debian/ lib/HTML/ t/
dmn at users.alioth.debian.org
dmn at users.alioth.debian.org
Mon Dec 1 21:22:03 UTC 2008
Author: dmn
Date: Mon Dec 1 21:22:00 2008
New Revision: 27609
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=27609
Log:
* New upstream release
+ bump years of copyright
Added:
trunk/libhtml-parser-perl/META.yml
- copied unchanged from r27608, branches/upstream/libhtml-parser-perl/current/META.yml
Modified:
trunk/libhtml-parser-perl/Changes
trunk/libhtml-parser-perl/MANIFEST
trunk/libhtml-parser-perl/Makefile.PL
trunk/libhtml-parser-perl/Parser.pm
trunk/libhtml-parser-perl/Parser.xs
trunk/libhtml-parser-perl/README
trunk/libhtml-parser-perl/debian/changelog
trunk/libhtml-parser-perl/debian/copyright
trunk/libhtml-parser-perl/hparser.c
trunk/libhtml-parser-perl/hparser.h
trunk/libhtml-parser-perl/lib/HTML/Entities.pm
trunk/libhtml-parser-perl/lib/HTML/Filter.pm
trunk/libhtml-parser-perl/lib/HTML/HeadParser.pm
trunk/libhtml-parser-perl/lib/HTML/LinkExtor.pm
trunk/libhtml-parser-perl/lib/HTML/PullParser.pm
trunk/libhtml-parser-perl/lib/HTML/TokeParser.pm
trunk/libhtml-parser-perl/t/headparser.t
trunk/libhtml-parser-perl/t/msie-compat.t
trunk/libhtml-parser-perl/t/plaintext.t
trunk/libhtml-parser-perl/t/unicode-bom.t
trunk/libhtml-parser-perl/t/unicode.t
trunk/libhtml-parser-perl/util.c
Modified: trunk/libhtml-parser-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/Changes?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/Changes (original)
+++ trunk/libhtml-parser-perl/Changes Mon Dec 1 21:22:00 2008
@@ -1,3 +1,42 @@
+2008-11-24 Gisle Aas <gisle at ActiveState.com>
+
+ Release 3.59
+
+ Restore perl-5.6 compatibility for HTML::HeadParser.
+
+ Improved META.yml
+
+
+
+2008-11-17 Gisle Aas <gisle at ActiveState.com>
+
+ Release 3.58
+
+ Suppress "Parsing of undecoded UTF-8 will give garbage" warning
+ with attr_encoded [RT#29089]
+
+ HTML::HeadParser:
+ - Recognize the Unicode BOM in utf8_mode as well [RT#27522]
+ - Avoid ending up with '/' keys attribute in Link headers.
+
+
+
+2008-11-16 Gisle Aas <gisle at ActiveState.com>
+
+ Release 3.57
+
+ The <iframe> element content is now parsed in literal mode.
+
+ Parsing of <script> and <style> content ends on the first end tag
+ even when that tag was in a quoted string. That seems to be the
+ behaviour of all modern browsers.
+
+ Implement backquote() attribute as requested by Alex Kapranoff.
+
+ Test and documentation tweaks from Alex Kapranoff.
+
+
+
2007-01-12 Gisle Aas <gisle at ActiveState.com>
Release 3.56
Modified: trunk/libhtml-parser-perl/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/MANIFEST?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/MANIFEST (original)
+++ trunk/libhtml-parser-perl/MANIFEST Mon Dec 1 21:22:00 2008
@@ -76,3 +76,4 @@
tokenpos.h Dynamically sized token_pos arrays
typemap Convert between HTML::Parser and 'struct p_state'
util.c Some utility functions
+META.yml Module meta-data (added by MakeMaker)
Modified: trunk/libhtml-parser-perl/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/Makefile.PL?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/Makefile.PL (original)
+++ trunk/libhtml-parser-perl/Makefile.PL Mon Dec 1 21:22:00 2008
@@ -1,19 +1,31 @@
-require 5.006;
use strict;
use ExtUtils::MakeMaker;
WriteMakefile(
NAME => 'HTML::Parser',
VERSION_FROM => 'Parser.pm',
+ ABSTRACT_FROM => 'Parser.pm',
+ AUTHOR => 'Gisle Aas <gisle at activestate.com>',
+ LICENSE => 'perl',
+
+ MIN_PERL_VERSION => 5.006,
+ PREREQ_PM => {
+ 'HTML::Tagset' => 3,
+ 'XSLoader' => 0,
+ },
+ META_MERGE => {
+ build_requires => { 'Test::More' => 0 },
+ recommends => { 'HTTP::Headers' => 0 },
+ resources => {
+ repository => 'http://gitorious.org/projects/perl-html-parser',
+ MailingList => 'mailto:libwww at perl.org',
+ }
+ },
+
+ DEFINE => "-DMARKED_SECTION",
H => [ "hparser.h", "hctype.h", "tokenpos.h", "pfunc.h",
"hparser.c", "util.c",
],
- PREREQ_PM => {
- 'HTML::Tagset' => 3,
- 'Test::More' => 0, # only needed to run 'make test'
- },
- DEFINE => "-DMARKED_SECTION",
- dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
clean => { FILES => 'hctype.h pfunc.h' },
);
@@ -28,3 +40,25 @@
$(PERL) mkhctype >hctype.h
'
}
+
+BEGIN {
+ # compatibility with older versions of MakeMaker
+ my $developer = -f "MANIFEST.SKIP";
+ my %mm_req = (
+ LICENCE => 6.31,
+ META_MERGE => 6.45,
+ META_ADD => 6.45,
+ MIN_PERL_VERSION => 6.48,
+ );
+ undef(*WriteMakefile);
+ *WriteMakefile = sub {
+ my %arg = @_;
+ for (keys %mm_req) {
+ unless (eval { ExtUtils::MakeMaker->VERSION($mm_req{$_}) }) {
+ warn "$_ $@" if $developer;
+ delete $arg{$_};
+ }
+ }
+ ExtUtils::MakeMaker::WriteMakefile(%arg);
+ };
+}
Modified: trunk/libhtml-parser-perl/Parser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/Parser.pm?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/Parser.pm (original)
+++ trunk/libhtml-parser-perl/Parser.pm Mon Dec 1 21:22:00 2008
@@ -1,6 +1,6 @@
package HTML::Parser;
-# Copyright 1996-2007, Gisle Aas.
+# Copyright 1996-2008, Gisle Aas.
# Copyright 1999-2000, Michael A. Chase.
#
# This library is free software; you can redistribute it and/or
@@ -9,7 +9,7 @@
use strict;
use vars qw($VERSION @ISA);
-$VERSION = '3.56'; # $Date: 2007/01/12 09:18:31 $
+$VERSION = "3.59";
require HTML::Entities;
@@ -329,6 +329,14 @@
entities for attribute values decoded. Enabling this attribute leaves
entities alone.
+=item $p->backquote
+
+=item $p->backquote( $bool )
+
+By default, only ' and " are recognized as quote characters around
+attribute values. MSIE also recognize backquotes for some reason.
+Enabling this attribute provide compatiblity with this behaviour.
+
=item $p->boolean_attribute_value( $val )
This method sets the value reported for boolean attributes inside HTML
@@ -351,7 +359,7 @@
the end of the document is parsed in CDATA mode. This historical
behaviour is what at least MSIE does. Enabling this attribute makes
closing "</plaintext>" tag effective and the parsing process will resume
-after seeing this tag. This emulates gecko-based browsers.
+after seeing this tag. This emulates early gecko-based browsers.
=item $p->empty_element_tags
@@ -682,7 +690,7 @@
Dtext causes the decoded text to be passed. General entities are
automatically decoded unless the event was inside a CDATA section or
was between literal start and end tags (C<script>, C<style>,
-C<xmp>, and C<plaintext>).
+C<xmp>, C<iframe> and C<plaintext>).
The Unicode character set is assumed for entity decoding. With Perl
version 5.6 or earlier only the Latin-1 range is supported, and
@@ -701,7 +709,7 @@
Is_cdata causes a TRUE value to be passed if the event is inside a CDATA
section or between literal start and end tags (C<script>,
-C<style>, C<xmp>, and C<plaintext>).
+C<style>, C<xmp>, C<iframe> and C<plaintext>).
if the flag is FALSE for a text event, then you should normally
either use C<dtext> or decode the entities yourself before the text is
@@ -1225,7 +1233,7 @@
=head1 COPYRIGHT
- Copyright 1996-2007 Gisle Aas. All rights reserved.
+ Copyright 1996-2008 Gisle Aas. All rights reserved.
Copyright 1999-2000 Michael A. Chase. All rights reserved.
This library is free software; you can redistribute it and/or
Modified: trunk/libhtml-parser-perl/Parser.xs
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/Parser.xs?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/Parser.xs (original)
+++ trunk/libhtml-parser-perl/Parser.xs Mon Dec 1 21:22:00 2008
@@ -1,5 +1,4 @@
-/* $Id: Parser.xs,v 2.137 2007/01/12 10:18:39 gisle Exp $
- *
+/*
* Copyright 1999-2005, Gisle Aas.
* Copyright 1999-2000, Michael A. Chase.
*
@@ -265,6 +264,7 @@
pstate2->utf8_mode = pstate->utf8_mode;
pstate2->empty_element_tags = pstate->empty_element_tags;
pstate2->xml_pic = pstate->xml_pic;
+ pstate2->backquote = pstate->backquote;
pstate2->bool_attr_val =
SvREFCNT_inc(sv_dup(pstate->bool_attr_val, params));
@@ -447,6 +447,7 @@
HTML::Parser::utf8_mode = 10
HTML::Parser::empty_element_tags = 11
HTML::Parser::xml_pic = 12
+ HTML::Parser::backquote = 13
PREINIT:
bool *attr;
CODE:
@@ -470,8 +471,9 @@
#else
case 10: croak("The utf8_mode does not work with this perl; perl-5.8 or better required");
#endif
- case 11: attr = &pstate->empty_element_tags; break;
+ case 11: attr = &pstate->empty_element_tags; break;
case 12: attr = &pstate->xml_pic; break;
+ case 13: attr = &pstate->backquote; break;
default:
croak("Unknown boolean attribute (%d)", ix);
}
Modified: trunk/libhtml-parser-perl/README
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/README?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/README (original)
+++ trunk/libhtml-parser-perl/README Mon Dec 1 21:22:00 2008
@@ -58,7 +58,7 @@
COPYRIGHT
- © 1995-2007 Gisle Aas. All rights reserved.
+ © 1995-2008 Gisle Aas. All rights reserved.
© 1999-2000 Michael A. Chase. All rights reserved.
This library is free software; you can redistribute it and/or modify
Modified: trunk/libhtml-parser-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/debian/changelog?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/debian/changelog (original)
+++ trunk/libhtml-parser-perl/debian/changelog Mon Dec 1 21:22:00 2008
@@ -1,8 +1,10 @@
-libhtml-parser-perl (3.56-2) UNRELEASED; urgency=low
+libhtml-parser-perl (3.59-1) UNRELEASED; urgency=low
* Take over for the Debian Perl Group with eloy's permission
-
- -- Damyan Ivanov <dmn at debian.org> Mon, 01 Dec 2008 23:10:05 +0200
+ * New upstream release
+ + bump years of copyright
+
+ -- Damyan Ivanov <dmn at debian.org> Mon, 01 Dec 2008 23:15:48 +0200
libhtml-parser-perl (3.56-1) unstable; urgency=low
Modified: trunk/libhtml-parser-perl/debian/copyright
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/debian/copyright?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/debian/copyright (original)
+++ trunk/libhtml-parser-perl/debian/copyright Mon Dec 1 21:22:00 2008
@@ -13,7 +13,7 @@
COPYRIGHT
- C 1995-2004 Gisle Aas. All rights reserved.
+ C 1995-2008 Gisle Aas. All rights reserved.
C 1999-2000 Michael A. Chase. All rights reserved.
This library is free software; you can redistribute it and/or modify
Modified: trunk/libhtml-parser-perl/hparser.c
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/hparser.c?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/hparser.c (original)
+++ trunk/libhtml-parser-perl/hparser.c Mon Dec 1 21:22:00 2008
@@ -1,6 +1,5 @@
-/* $Id: hparser.c,v 2.134 2007/01/12 10:54:06 gisle Exp $
- *
- * Copyright 1999-2007, Gisle Aas
+/*
+ * Copyright 1999-2008, Gisle Aas
* Copyright 1999-2000, Michael A. Chase
*
* This library is free software; you can redistribute it and/or
@@ -26,6 +25,7 @@
{6, "script", 1},
{5, "style", 1},
{3, "xmp", 1},
+ {6, "iframe", 1},
{9, "plaintext", 1},
{5, "title", 0},
{8, "textarea", 0},
@@ -455,7 +455,7 @@
if (tokens[i+1].beg) {
char *beg = tokens[i+1].beg;
STRLEN len = tokens[i+1].end - beg;
- if (*beg == '"' || *beg == '\'') {
+ if (*beg == '"' || *beg == '\'' || (*beg == '`' && p_state->backquote)) {
assert(len >= 2 && *beg == beg[len-1]);
beg++; len -= 2;
}
@@ -726,8 +726,12 @@
p_state->skipped_text = newSVpvn("", 0);
}
}
- if (a == ARG_ATTR || a == ARG_ATTRARR || a == ARG_DTEXT) {
- p_state->argspec_entity_decode++;
+ if (a == ARG_ATTR || a == ARG_ATTRARR) {
+ if (p_state->argspec_entity_decode != ARG_DTEXT)
+ p_state->argspec_entity_decode = ARG_ATTR;
+ }
+ else if (a == ARG_DTEXT) {
+ p_state->argspec_entity_decode = ARG_DTEXT;
}
}
else {
@@ -1166,7 +1170,7 @@
if (s == end)
goto PREMATURE;
- if (*s == '"' || *s == '\'') {
+ if (*s == '"' || *s == '\'' || (*s == '`' && p_state->backquote)) {
char *str_beg = s;
s++;
while (s < end && *s != *str_beg)
@@ -1337,7 +1341,7 @@
PUSH_TOKEN(s, s);
break;
}
- if (*s == '"' || *s == '\'') {
+ if (*s == '"' || *s == '\'' || (*s == '`' && p_state->backquote)) {
char *str_beg = s;
s++;
while (s < end && *s != *str_beg)
@@ -1548,29 +1552,9 @@
while (p_state->literal_mode) {
char *l = p_state->literal_mode;
- bool skip_quoted_end = (strEQ(l, "script") || strEQ(l, "style"));
- char inside_quote = 0;
- bool escape_next = 0;
char *end_text;
- while (s < end) {
- if (*s == '<' && !inside_quote)
- break;
- if (skip_quoted_end) {
- if (escape_next) {
- escape_next = 0;
- }
- else {
- if (*s == '\\')
- escape_next = 1;
- else if (inside_quote && *s == inside_quote)
- inside_quote = 0;
- else if (*s == '\r' || *s == '\n')
- inside_quote = 0;
- else if (!inside_quote && (*s == '"' || *s == '\''))
- inside_quote = *s;
- }
- }
+ while (s < end && *s != '<') {
s++;
}
@@ -1761,6 +1745,7 @@
if (p_state->literal_mode) {
if (strEQ(p_state->literal_mode, "plaintext") ||
strEQ(p_state->literal_mode, "xmp") ||
+ strEQ(p_state->literal_mode, "iframe") ||
strEQ(p_state->literal_mode, "textarea"))
{
/* rest is considered text */
@@ -1851,6 +1836,7 @@
/* Print warnings if we find unexpected Unicode BOM forms */
#ifdef UNICODE_HTML_PARSER
if (p_state->argspec_entity_decode &&
+ !(p_state->attr_encoded && p_state->argspec_entity_decode == ARG_ATTR) &&
!p_state->utf8_mode && (
(!utf8 && len >= 3 && strnEQ(beg, "\xEF\xBB\xBF", 3)) ||
(utf8 && len >= 6 && strnEQ(beg, "\xC3\xAF\xC2\xBB\xC2\xBF", 6)) ||
Modified: trunk/libhtml-parser-perl/hparser.h
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/hparser.h?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/hparser.h (original)
+++ trunk/libhtml-parser-perl/hparser.h Mon Dec 1 21:22:00 2008
@@ -1,5 +1,4 @@
-/* $Id: hparser.h,v 2.34 2006/04/26 07:01:10 gisle Exp $
- *
+/*
* Copyright 1999-2005, Gisle Aas
* Copyright 1999-2000, Michael A. Chase
*
@@ -109,11 +108,12 @@
bool utf8_mode;
bool empty_element_tags;
bool xml_pic;
+ bool backquote;
/* other configuration stuff */
SV* bool_attr_val;
struct p_handler handlers[EVENT_COUNT];
- bool argspec_entity_decode;
+ int argspec_entity_decode;
/* filters */
HV* report_tags;
Modified: trunk/libhtml-parser-perl/lib/HTML/Entities.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/lib/HTML/Entities.pm?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/lib/HTML/Entities.pm (original)
+++ trunk/libhtml-parser-perl/lib/HTML/Entities.pm Mon Dec 1 21:22:00 2008
@@ -1,6 +1,4 @@
package HTML::Entities;
-
-# $Id: Entities.pm,v 1.35 2006/03/22 09:15:23 gisle Exp $
=head1 NAME
@@ -141,7 +139,7 @@
@EXPORT = qw(encode_entities decode_entities _decode_entities);
@EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
-$VERSION = sprintf("%d.%02d", q$Revision: 1.35 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
sub Version { $VERSION; }
require HTML::Parser; # for fast XS implemented decode_entities
Modified: trunk/libhtml-parser-perl/lib/HTML/Filter.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/lib/HTML/Filter.pm?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/lib/HTML/Filter.pm (original)
+++ trunk/libhtml-parser-perl/lib/HTML/Filter.pm Mon Dec 1 21:22:00 2008
@@ -6,7 +6,7 @@
require HTML::Parser;
@ISA=qw(HTML::Parser);
-$VERSION = sprintf("%d.%02d", q$Revision: 2.11 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
sub declaration { $_[0]->output("<!$_[1]>") }
sub process { $_[0]->output($_[2]) }
Modified: trunk/libhtml-parser-perl/lib/HTML/HeadParser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/lib/HTML/HeadParser.pm?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/lib/HTML/HeadParser.pm (original)
+++ trunk/libhtml-parser-perl/lib/HTML/HeadParser.pm Mon Dec 1 21:22:00 2008
@@ -76,7 +76,7 @@
use strict;
use vars qw($VERSION $DEBUG);
#$DEBUG = 1;
-$VERSION = sprintf("%d.%02d", q$Revision: 2.22 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.59";
=item $hp = HTML::HeadParser->new
@@ -186,6 +186,7 @@
# <link href="http:..." rel="xxx" rev="xxx" title="xxx">
my $h_val = "<" . delete($attr->{href}) . ">";
for (sort keys %{$attr}) {
+ next if $_ eq "/"; # XHTML junk
$h_val .= qq(; $_="$attr->{$_}");
}
$self->{'header'}->push_header(Link => $h_val);
@@ -208,8 +209,17 @@
sub text
{
my($self, $text) = @_;
- $text =~ s/\x{FEFF}//; # drop Unicode BOM if found
print "TEXT[$text]\n" if $DEBUG;
+ unless ($self->{first_chunk}) {
+ # drop Unicode BOM if found
+ if ($self->utf8_mode) {
+ $text =~ s/^\xEF\xBB\xBF//;
+ }
+ else {
+ $text =~ s/^\x{FEFF}//;
+ }
+ $self->{first_chunk}++;
+ }
my $tag = $self->{tag};
if (!$tag && $text =~ /\S/) {
# Normal text means start of body
@@ -218,6 +228,10 @@
}
return if $tag ne 'title';
$self->{'text'} .= $text;
+}
+
+BEGIN {
+ *utf8_mode = sub { 1 } unless HTML::Entities::UNICODE_SUPPORT;;
}
1;
Modified: trunk/libhtml-parser-perl/lib/HTML/LinkExtor.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/lib/HTML/LinkExtor.pm?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/lib/HTML/LinkExtor.pm (original)
+++ trunk/libhtml-parser-perl/lib/HTML/LinkExtor.pm Mon Dec 1 21:22:00 2008
@@ -1,10 +1,8 @@
package HTML::LinkExtor;
-
-# $Id: LinkExtor.pm,v 1.33 2003/10/10 10:20:56 gisle Exp $
require HTML::Parser;
@ISA = qw(HTML::Parser);
-$VERSION = sprintf("%d.%02d", q$Revision: 1.33 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
=head1 NAME
Modified: trunk/libhtml-parser-perl/lib/HTML/PullParser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/lib/HTML/PullParser.pm?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/lib/HTML/PullParser.pm (original)
+++ trunk/libhtml-parser-perl/lib/HTML/PullParser.pm Mon Dec 1 21:22:00 2008
@@ -1,10 +1,8 @@
package HTML::PullParser;
-
-# $Id: PullParser.pm,v 2.9 2006/04/26 08:00:28 gisle Exp $
require HTML::Parser;
@ISA=qw(HTML::Parser);
-$VERSION = sprintf("%d.%02d", q$Revision: 2.9 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
use strict;
use Carp ();
Modified: trunk/libhtml-parser-perl/lib/HTML/TokeParser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/lib/HTML/TokeParser.pm?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/lib/HTML/TokeParser.pm (original)
+++ trunk/libhtml-parser-perl/lib/HTML/TokeParser.pm Mon Dec 1 21:22:00 2008
@@ -1,10 +1,8 @@
package HTML::TokeParser;
-
-# $Id: TokeParser.pm,v 2.37 2006/04/26 08:00:28 gisle Exp $
require HTML::PullParser;
@ISA=qw(HTML::PullParser);
-$VERSION = sprintf("%d.%02d", q$Revision: 2.37 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
use strict;
use Carp ();
Modified: trunk/libhtml-parser-perl/t/headparser.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/t/headparser.t?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/t/headparser.t (original)
+++ trunk/libhtml-parser-perl/t/headparser.t Mon Dec 1 21:22:00 2008
@@ -1,7 +1,7 @@
#!perl -w
use strict;
-use Test::More tests => 11;
+use Test::More tests => 13;
{ package H;
sub new { bless {}, shift; }
@@ -52,7 +52,6 @@
<script>
- "</script>"
ignore this
</script>
@@ -66,7 +65,6 @@
<style>
- "</style>"
ignore this too
</style>
@@ -143,7 +141,7 @@
ok(!$p->as_string);
SKIP: {
- skip "Need Unicode support", 2 if $] < 5.008;
+ skip "Need Unicode support", 4 if $] < 5.008;
# Test that the Unicode BOM does not confuse us?
$p = HTML::HeadParser->new(H->new);
@@ -151,4 +149,26 @@
$p->eof;
is($p->header("title"), "Hi <foo>");
+
+ $p = HTML::HeadParser->new(H->new);
+ $p->utf8_mode(1);
+ $p->parse(<<"EOT"); # example from http://rt.cpan.org/Ticket/Display.html?id=27522
+\xEF\xBB\xBF<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+ <head>
+ <title>
+Parkinson's disease</title>
+ <meta name="Keywords" content="brain,disease,dopamine,drug,levodopa,parkinson,patients,symptoms,,Medications, Medications">
+ </meta>
+ \t
+\t<link href="../../css/ummAdam.css" rel="stylesheet" type="text/css" />
+\t<link rel="stylesheet" rev="stylesheet" href="../../css/ummprint.css" media="print" />
+\t
+\t </head>
+ <body>
+EOT
+ $p->eof;
+
+ is($p->header("title"), "Parkinson's disease");
+ is($p->header("link")->[0], '<../../css/ummAdam.css>; rel="stylesheet"; type="text/css"');
}
Modified: trunk/libhtml-parser-perl/t/msie-compat.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/t/msie-compat.t?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/t/msie-compat.t (original)
+++ trunk/libhtml-parser-perl/t/msie-compat.t Mon Dec 1 21:22:00 2008
@@ -3,13 +3,13 @@
use strict;
use HTML::Parser;
-use Test::More tests => 2;
+use Test::More tests => 4;
my $TEXT = "";
sub h
{
- my($event, $tagname, $text) = @_;
- for ($event, $tagname, $text) {
+ my($event, $tagname, $text, @attr) = @_;
+ for ($event, $tagname, $text, @attr) {
if (defined) {
s/([\n\r\t])/sprintf "\\%03o", ord($1)/ge;
}
@@ -18,10 +18,10 @@
}
}
- $TEXT .= "[$event,$tagname,$text]\n";
+ $TEXT .= "[$event,$tagname,$text," . join(":", @attr) . "]\n";
}
-my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text"]);
+my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text,\@attr"]);
$p->parse("<a>");
$p->parse("</a f>");
$p->parse("</a 'foo<>' 'bar>' x>");
@@ -33,18 +33,18 @@
$p->eof;
is($TEXT, <<'EOT');
-[start_document,<undef>,]
-[start,a,<a>]
-[end,a,</a f>]
-[end,a,</a 'foo<>' 'bar>' x>]
-[end,a,</a "foo<>" "bar>" x>]
-[comment, foo bar,</ foo bar>]
-[comment, "<>" ,</ "<>" >]
-[comment,comment,<!--comment>]
-[text,<undef>,text]
-[comment,comment,<!--comment>]
-[comment,p,<p]
-[end_document,<undef>,]
+[start_document,<undef>,,]
+[start,a,<a>,]
+[end,a,</a f>,]
+[end,a,</a 'foo<>' 'bar>' x>,]
+[end,a,</a "foo<>" "bar>" x>,]
+[comment, foo bar,</ foo bar>,]
+[comment, "<>" ,</ "<>" >,]
+[comment,comment,<!--comment>,]
+[text,<undef>,text,]
+[comment,comment,<!--comment>,]
+[comment,p,<p,]
+[end_document,<undef>,,]
EOT
$TEXT = "";
@@ -52,7 +52,28 @@
$p->eof;
is($TEXT, <<'EOT');
-[start_document,<undef>,]
-[comment,comment,<!comment>]
-[end_document,<undef>,]
+[start_document,<undef>,,]
+[comment,comment,<!comment>,]
+[end_document,<undef>,,]
EOT
+
+$TEXT = "";
+$p->parse(q(<a name=`foo bar`>));
+$p->eof;
+
+is($TEXT, <<'EOT');
+[start_document,<undef>,,]
+[start,a,<a name=`foo bar`>,name:`foo:bar`:bar`]
+[end_document,<undef>,,]
+EOT
+
+$p->backquote(1);
+$TEXT = "";
+$p->parse(q(<a name=`foo bar`>));
+$p->eof;
+
+is($TEXT, <<'EOT');
+[start_document,<undef>,,]
+[start,a,<a name=`foo bar`>,name:foo bar]
+[end_document,<undef>,,]
+EOT
Modified: trunk/libhtml-parser-perl/t/plaintext.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/t/plaintext.t?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/t/plaintext.t (original)
+++ trunk/libhtml-parser-perl/t/plaintext.t Mon Dec 1 21:22:00 2008
@@ -1,4 +1,4 @@
-use Test::More tests => 2;
+use Test::More tests => 3;
use strict;
use HTML::Parser;
@@ -43,3 +43,16 @@
is($doc, "start_document:::start:<plaintext>::text:<foo>
:1:end:</plaintext>::text:foo::start:<b>::end:</b>::text:
::end_document::");
+
+ at a = ();
+$p->closing_plaintext('yep, emulate gecko (2)');
+$p->parse(<<EOT)->eof;
+<plaintext><foo>
+foo<b></b>
+EOT
+
+$doc = join(":", map { defined $_ ? $_ : "" } @a);
+
+is($doc, "start_document:::start:<plaintext>::text:<foo>
+foo<b></b>
+:1:end_document::");
Modified: trunk/libhtml-parser-perl/t/unicode-bom.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/t/unicode-bom.t?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/t/unicode-bom.t (original)
+++ trunk/libhtml-parser-perl/t/unicode-bom.t Mon Dec 1 21:22:00 2008
@@ -37,13 +37,17 @@
$p->parse("\xFE\xFF\0\0<head>Hi there</head>");
$p->eof;
+for (@warn) {
+ s/line (\d+)/line ##/g;
+}
+
is(join("", @warn), <<EOT);
-Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line 21.
-Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line 25.
-Parsing of undecoded UTF-16 at $0 line 28.
-Parsing of undecoded UTF-16 at $0 line 31.
-Parsing of undecoded UTF-32 at $0 line 34.
-Parsing of undecoded UTF-32 at $0 line 37.
+Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line ##.
+Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line ##.
+Parsing of undecoded UTF-16 at $0 line ##.
+Parsing of undecoded UTF-16 at $0 line ##.
+Parsing of undecoded UTF-32 at $0 line ##.
+Parsing of undecoded UTF-32 at $0 line ##.
EOT
@warn = ();
Modified: trunk/libhtml-parser-perl/t/unicode.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/t/unicode.t?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/t/unicode.t (original)
+++ trunk/libhtml-parser-perl/t/unicode.t Mon Dec 1 21:22:00 2008
@@ -2,10 +2,12 @@
use strict;
use HTML::Parser;
-use Test::More tests => 103;
+use Test::More;
+BEGIN {
+ plan skip_all => "This perl does not support Unicode" if $] < 5.008;
+}
-SKIP: {
-skip "This perl does not support Unicode", 103 if $] < 5.008;
+plan tests => 105;
my @warn;
$SIG{__WARN__} = sub {
@@ -180,4 +182,17 @@
ok(HTML::Entities::_probably_utf8_chunk("f\xE2\x99\xA5o\xE2\x99"));
ok(!HTML::Entities::_probably_utf8_chunk("f\xE2"));
ok(!HTML::Entities::_probably_utf8_chunk("f\xE2\x99"));
-}
+
+$p = HTML::Parser->new(
+ api_version => 3,
+ default_h => [\@parsed, 'event, text, tag, attr'],
+ attr_encoded => 1,
+);
+
+ at warn = ();
+ at parsed = ();
+
+$p->parse($doc)->eof;
+
+ok(!@warn);
+is(@parsed, 9);
Modified: trunk/libhtml-parser-perl/util.c
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/util.c?rev=27609&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/util.c (original)
+++ trunk/libhtml-parser-perl/util.c Mon Dec 1 21:22:00 2008
@@ -1,5 +1,4 @@
-/* $Id: util.c,v 2.30 2006/03/22 09:15:17 gisle Exp $
- *
+/*
* Copyright 1999-2006, Gisle Aas.
*
* This library is free software; you can redistribute it and/or
More information about the Pkg-perl-cvs-commits
mailing list