r27607 - in /branches/upstream/libhtml-parser-perl/current: ./ lib/HTML/ t/

dmn at users.alioth.debian.org dmn at users.alioth.debian.org
Mon Dec 1 21:15:22 UTC 2008


Author: dmn
Date: Mon Dec  1 21:15:19 2008
New Revision: 27607

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=27607
Log:
[svn-upgrade] Integrating new upstream version, libhtml-parser-perl (3.59)

Added:
    branches/upstream/libhtml-parser-perl/current/META.yml
Modified:
    branches/upstream/libhtml-parser-perl/current/Changes
    branches/upstream/libhtml-parser-perl/current/MANIFEST
    branches/upstream/libhtml-parser-perl/current/Makefile.PL
    branches/upstream/libhtml-parser-perl/current/Parser.pm
    branches/upstream/libhtml-parser-perl/current/Parser.xs
    branches/upstream/libhtml-parser-perl/current/README
    branches/upstream/libhtml-parser-perl/current/hparser.c
    branches/upstream/libhtml-parser-perl/current/hparser.h
    branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm
    branches/upstream/libhtml-parser-perl/current/lib/HTML/Filter.pm
    branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm
    branches/upstream/libhtml-parser-perl/current/lib/HTML/LinkExtor.pm
    branches/upstream/libhtml-parser-perl/current/lib/HTML/PullParser.pm
    branches/upstream/libhtml-parser-perl/current/lib/HTML/TokeParser.pm
    branches/upstream/libhtml-parser-perl/current/t/headparser.t
    branches/upstream/libhtml-parser-perl/current/t/msie-compat.t
    branches/upstream/libhtml-parser-perl/current/t/plaintext.t
    branches/upstream/libhtml-parser-perl/current/t/unicode-bom.t
    branches/upstream/libhtml-parser-perl/current/t/unicode.t
    branches/upstream/libhtml-parser-perl/current/util.c

Modified: branches/upstream/libhtml-parser-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Changes?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Changes (original)
+++ branches/upstream/libhtml-parser-perl/current/Changes Mon Dec  1 21:15:19 2008
@@ -1,3 +1,42 @@
+2008-11-24   Gisle Aas <gisle at ActiveState.com>
+
+     Release 3.59
+
+     Restore perl-5.6 compatibility for HTML::HeadParser.
+
+     Improved META.yml
+
+
+
+2008-11-17   Gisle Aas <gisle at ActiveState.com>
+
+     Release 3.58
+
+     Suppress "Parsing of undecoded UTF-8 will give garbage" warning
+     with attr_encoded [RT#29089]
+
+     HTML::HeadParser:
+       - Recognize the Unicode BOM in utf8_mode as well [RT#27522]
+       - Avoid ending up with '/' keys attribute in Link headers.
+
+
+
+2008-11-16   Gisle Aas <gisle at ActiveState.com>
+
+     Release 3.57
+
+     The <iframe> element content is now parsed in literal mode.
+
+     Parsing of <script> and <style> content ends on the first end tag
+     even when that tag was in a quoted string.  That seems to be the
+     behaviour of all modern browsers.
+
+     Implement backquote() attribute as requested by Alex Kapranoff.
+
+     Test and documentation tweaks from Alex Kapranoff.
+
+
+
 2007-01-12   Gisle Aas <gisle at ActiveState.com>
 
      Release 3.56

Modified: branches/upstream/libhtml-parser-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/MANIFEST?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/MANIFEST (original)
+++ branches/upstream/libhtml-parser-perl/current/MANIFEST Mon Dec  1 21:15:19 2008
@@ -76,3 +76,4 @@
 tokenpos.h		Dynamically sized token_pos arrays
 typemap			Convert between HTML::Parser and 'struct p_state'
 util.c			Some utility functions
+META.yml                                 Module meta-data (added by MakeMaker)

Added: branches/upstream/libhtml-parser-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/META.yml?rev=27607&op=file
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/META.yml (added)
+++ branches/upstream/libhtml-parser-perl/current/META.yml Mon Dec  1 21:15:19 2008
@@ -1,0 +1,29 @@
+--- #YAML:1.0
+name:               HTML-Parser
+version:            3.59
+abstract:           HTML parser class
+author:
+    - Gisle Aas <gisle at activestate.com>
+license:            perl
+distribution_type:  module
+configure_requires:
+    ExtUtils::MakeMaker:  0
+build_requires:
+    Test::More:  0
+requires:
+    HTML::Tagset:  3
+    perl:          5.006
+    XSLoader:      0
+resources:
+    MailingList:  mailto:libwww at perl.org
+    repository:   http://gitorious.org/projects/perl-html-parser
+no_index:
+    directory:
+        - t
+        - inc
+generated_by:       ExtUtils::MakeMaker version 6.48
+meta-spec:
+    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
+    version:  1.4
+recommends:
+    HTTP::Headers:  0

Modified: branches/upstream/libhtml-parser-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Makefile.PL?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Makefile.PL (original)
+++ branches/upstream/libhtml-parser-perl/current/Makefile.PL Mon Dec  1 21:15:19 2008
@@ -1,19 +1,31 @@
-require 5.006;
 use strict;
 use ExtUtils::MakeMaker;
 
 WriteMakefile(
     NAME	 => 'HTML::Parser',
     VERSION_FROM => 'Parser.pm',
+    ABSTRACT_FROM => 'Parser.pm',
+    AUTHOR       => 'Gisle Aas <gisle at activestate.com>',
+    LICENSE	 => 'perl',
+
+    MIN_PERL_VERSION => 5.006,
+    PREREQ_PM    => {
+		      'HTML::Tagset' => 3,
+		      'XSLoader' => 0,
+                    },
+    META_MERGE   => {
+        build_requires => { 'Test::More' => 0 },
+        recommends => { 'HTTP::Headers' => 0 },
+        resources => {
+            repository => 'http://gitorious.org/projects/perl-html-parser',
+	    MailingList => 'mailto:libwww at perl.org',
+        }
+    },
+
+    DEFINE       => "-DMARKED_SECTION",
     H            => [ "hparser.h", "hctype.h", "tokenpos.h", "pfunc.h",
 		      "hparser.c", "util.c",
 		    ],
-    PREREQ_PM    => {
-		      'HTML::Tagset' => 3,
-                      'Test::More' => 0,    # only needed to run 'make test'
-                    },
-    DEFINE       => "-DMARKED_SECTION",
-    dist         => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
     clean        => { FILES => 'hctype.h pfunc.h' },
 );
 
@@ -28,3 +40,25 @@
 	$(PERL) mkhctype >hctype.h
 '
 }
+
+BEGIN {
+    # compatibility with older versions of MakeMaker
+    my $developer = -f "MANIFEST.SKIP";
+    my %mm_req = (
+        LICENCE => 6.31,
+        META_MERGE => 6.45,
+        META_ADD => 6.45,
+        MIN_PERL_VERSION => 6.48,
+    );
+    undef(*WriteMakefile);
+    *WriteMakefile = sub {
+        my %arg = @_;
+        for (keys %mm_req) {
+            unless (eval { ExtUtils::MakeMaker->VERSION($mm_req{$_}) }) {
+                warn "$_ $@" if $developer;
+                delete $arg{$_};
+            }
+        }
+        ExtUtils::MakeMaker::WriteMakefile(%arg);
+    };
+}

Modified: branches/upstream/libhtml-parser-perl/current/Parser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Parser.pm?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Parser.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/Parser.pm Mon Dec  1 21:15:19 2008
@@ -1,6 +1,6 @@
 package HTML::Parser;
 
-# Copyright 1996-2007, Gisle Aas.
+# Copyright 1996-2008, Gisle Aas.
 # Copyright 1999-2000, Michael A. Chase.
 #
 # This library is free software; you can redistribute it and/or
@@ -9,7 +9,7 @@
 use strict;
 use vars qw($VERSION @ISA);
 
-$VERSION = '3.56';  # $Date: 2007/01/12 09:18:31 $
+$VERSION = "3.59";
 
 require HTML::Entities;
 
@@ -329,6 +329,14 @@
 entities for attribute values decoded.  Enabling this attribute leaves
 entities alone.
 
+=item $p->backquote
+
+=item $p->backquote( $bool )
+
+By default, only ' and " are recognized as quote characters around
+attribute values.  MSIE also recognize backquotes for some reason.
+Enabling this attribute provide compatiblity with this behaviour.
+
 =item $p->boolean_attribute_value( $val )
 
 This method sets the value reported for boolean attributes inside HTML
@@ -351,7 +359,7 @@
 the end of the document is parsed in CDATA mode.  This historical
 behaviour is what at least MSIE does.  Enabling this attribute makes
 closing "</plaintext>" tag effective and the parsing process will resume
-after seeing this tag.  This emulates gecko-based browsers.
+after seeing this tag.  This emulates early gecko-based browsers.
 
 =item $p->empty_element_tags
 
@@ -682,7 +690,7 @@
 Dtext causes the decoded text to be passed.  General entities are
 automatically decoded unless the event was inside a CDATA section or
 was between literal start and end tags (C<script>, C<style>,
-C<xmp>, and C<plaintext>).
+C<xmp>, C<iframe> and C<plaintext>).
 
 The Unicode character set is assumed for entity decoding.  With Perl
 version 5.6 or earlier only the Latin-1 range is supported, and
@@ -701,7 +709,7 @@
 
 Is_cdata causes a TRUE value to be passed if the event is inside a CDATA
 section or between literal start and end tags (C<script>,
-C<style>, C<xmp>, and C<plaintext>).
+C<style>, C<xmp>, C<iframe> and C<plaintext>).
 
 if the flag is FALSE for a text event, then you should normally
 either use C<dtext> or decode the entities yourself before the text is
@@ -1224,7 +1232,7 @@
 
 =head1 COPYRIGHT
 
- Copyright 1996-2007 Gisle Aas. All rights reserved.
+ Copyright 1996-2008 Gisle Aas. All rights reserved.
  Copyright 1999-2000 Michael A. Chase.  All rights reserved.
 
 This library is free software; you can redistribute it and/or

Modified: branches/upstream/libhtml-parser-perl/current/Parser.xs
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Parser.xs?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Parser.xs (original)
+++ branches/upstream/libhtml-parser-perl/current/Parser.xs Mon Dec  1 21:15:19 2008
@@ -1,5 +1,4 @@
-/* $Id: Parser.xs,v 2.137 2007/01/12 10:18:39 gisle Exp $
- *
+/* 
  * Copyright 1999-2005, Gisle Aas.
  * Copyright 1999-2000, Michael A. Chase.
  *
@@ -265,6 +264,7 @@
     pstate2->utf8_mode = pstate->utf8_mode;
     pstate2->empty_element_tags = pstate->empty_element_tags;
     pstate2->xml_pic = pstate->xml_pic;
+    pstate2->backquote = pstate->backquote;
 
     pstate2->bool_attr_val =
 	SvREFCNT_inc(sv_dup(pstate->bool_attr_val, params));
@@ -447,6 +447,7 @@
         HTML::Parser::utf8_mode = 10
         HTML::Parser::empty_element_tags = 11
         HTML::Parser::xml_pic = 12
+	HTML::Parser::backquote = 13
     PREINIT:
 	bool *attr;
     CODE:
@@ -470,8 +471,9 @@
 #else
 	case 10: croak("The utf8_mode does not work with this perl; perl-5.8 or better required");
 #endif
-	case 11: attr = &pstate->empty_element_tags;    break;
+	case 11: attr = &pstate->empty_element_tags;   break;
         case 12: attr = &pstate->xml_pic;              break;
+	case 13: attr = &pstate->backquote;            break;
 	default:
 	    croak("Unknown boolean attribute (%d)", ix);
         }

Modified: branches/upstream/libhtml-parser-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/README?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/README (original)
+++ branches/upstream/libhtml-parser-perl/current/README Mon Dec  1 21:15:19 2008
@@ -58,7 +58,7 @@
 
 COPYRIGHT
 
-  © 1995-2007 Gisle Aas. All rights reserved.
+  © 1995-2008 Gisle Aas. All rights reserved.
   © 1999-2000 Michael A. Chase.  All rights reserved.
 
 This library is free software; you can redistribute it and/or modify

Modified: branches/upstream/libhtml-parser-perl/current/hparser.c
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/hparser.c?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/hparser.c (original)
+++ branches/upstream/libhtml-parser-perl/current/hparser.c Mon Dec  1 21:15:19 2008
@@ -1,6 +1,5 @@
-/* $Id: hparser.c,v 2.134 2007/01/12 10:54:06 gisle Exp $
- *
- * Copyright 1999-2007, Gisle Aas
+/* 
+ * Copyright 1999-2008, Gisle Aas
  * Copyright 1999-2000, Michael A. Chase
  *
  * This library is free software; you can redistribute it and/or
@@ -26,6 +25,7 @@
     {6, "script", 1},
     {5, "style", 1},
     {3, "xmp", 1},
+    {6, "iframe", 1},
     {9, "plaintext", 1},
     {5, "title", 0},
     {8, "textarea", 0},
@@ -455,7 +455,7 @@
 		    if (tokens[i+1].beg) {
 			char *beg = tokens[i+1].beg;
 			STRLEN len = tokens[i+1].end - beg;
-			if (*beg == '"' || *beg == '\'') {
+			if (*beg == '"' || *beg == '\'' || (*beg == '`' && p_state->backquote)) {
 			    assert(len >= 2 && *beg == beg[len-1]);
 			    beg++; len -= 2;
 			}
@@ -726,8 +726,12 @@
 			p_state->skipped_text = newSVpvn("", 0);
                     }
                 }
-		if (a == ARG_ATTR || a == ARG_ATTRARR || a == ARG_DTEXT) {
-		    p_state->argspec_entity_decode++;
+		if (a == ARG_ATTR || a == ARG_ATTRARR) {
+		    if (p_state->argspec_entity_decode != ARG_DTEXT)
+			p_state->argspec_entity_decode = ARG_ATTR;
+		}
+		else if (a == ARG_DTEXT) {
+		    p_state->argspec_entity_decode = ARG_DTEXT;
 		}
 	    }
 	    else {
@@ -1166,7 +1170,7 @@
 	    if (s == end)
 		goto PREMATURE;
 
-	    if (*s == '"' || *s == '\'') {
+	    if (*s == '"' || *s == '\'' || (*s == '`' && p_state->backquote)) {
 		char *str_beg = s;
 		s++;
 		while (s < end && *s != *str_beg)
@@ -1337,7 +1341,7 @@
 		PUSH_TOKEN(s, s);
 		break;
 	    }
-	    if (*s == '"' || *s == '\'') {
+	    if (*s == '"' || *s == '\'' || (*s == '`' && p_state->backquote)) {
 		char *str_beg = s;
 		s++;
 		while (s < end && *s != *str_beg)
@@ -1548,29 +1552,9 @@
 
 	while (p_state->literal_mode) {
 	    char *l = p_state->literal_mode;
-	    bool skip_quoted_end = (strEQ(l, "script") || strEQ(l, "style"));
-	    char inside_quote = 0;
-	    bool escape_next = 0;
 	    char *end_text;
 
-	    while (s < end) {
-		if (*s == '<' && !inside_quote)
-		    break;
-		if (skip_quoted_end) {
-		    if (escape_next) {
-			escape_next = 0;
-		    }
-		    else {
-			if (*s == '\\')
-			    escape_next = 1;
-			else if (inside_quote && *s == inside_quote)
-			    inside_quote = 0;
-			else if (*s == '\r' || *s == '\n')
-			    inside_quote = 0;
-			else if (!inside_quote && (*s == '"' || *s == '\''))
-			    inside_quote = *s;
-		    }
-		}
+	    while (s < end && *s != '<') {
 		s++;
 	    }
 
@@ -1761,6 +1745,7 @@
 		if (p_state->literal_mode) {
 		    if (strEQ(p_state->literal_mode, "plaintext") ||
 			strEQ(p_state->literal_mode, "xmp") ||
+			strEQ(p_state->literal_mode, "iframe") ||
 			strEQ(p_state->literal_mode, "textarea"))
 		    {
 			/* rest is considered text */
@@ -1851,6 +1836,7 @@
 	    /* Print warnings if we find unexpected Unicode BOM forms */
 #ifdef UNICODE_HTML_PARSER
 	    if (p_state->argspec_entity_decode &&
+		!(p_state->attr_encoded && p_state->argspec_entity_decode == ARG_ATTR) &&
 		!p_state->utf8_mode && (
                  (!utf8 && len >= 3 && strnEQ(beg, "\xEF\xBB\xBF", 3)) ||
 		 (utf8 && len >= 6 && strnEQ(beg, "\xC3\xAF\xC2\xBB\xC2\xBF", 6)) ||

Modified: branches/upstream/libhtml-parser-perl/current/hparser.h
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/hparser.h?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/hparser.h (original)
+++ branches/upstream/libhtml-parser-perl/current/hparser.h Mon Dec  1 21:15:19 2008
@@ -1,5 +1,4 @@
-/* $Id: hparser.h,v 2.34 2006/04/26 07:01:10 gisle Exp $
- *
+/* 
  * Copyright 1999-2005, Gisle Aas
  * Copyright 1999-2000, Michael A. Chase
  *
@@ -109,11 +108,12 @@
     bool utf8_mode;
     bool empty_element_tags;
     bool xml_pic;
+    bool backquote;
 
     /* other configuration stuff */
     SV* bool_attr_val;
     struct p_handler handlers[EVENT_COUNT];
-    bool argspec_entity_decode;
+    int argspec_entity_decode;
 
     /* filters */
     HV* report_tags;

Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm Mon Dec  1 21:15:19 2008
@@ -1,6 +1,4 @@
 package HTML::Entities;
-
-# $Id: Entities.pm,v 1.35 2006/03/22 09:15:23 gisle Exp $
 
 =head1 NAME
 
@@ -141,7 +139,7 @@
 @EXPORT = qw(encode_entities decode_entities _decode_entities);
 @EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
 
-$VERSION = sprintf("%d.%02d", q$Revision: 1.35 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
 sub Version { $VERSION; }
 
 require HTML::Parser;  # for fast XS implemented decode_entities

Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/Filter.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/Filter.pm?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/Filter.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/Filter.pm Mon Dec  1 21:15:19 2008
@@ -6,7 +6,7 @@
 require HTML::Parser;
 @ISA=qw(HTML::Parser);
 
-$VERSION = sprintf("%d.%02d", q$Revision: 2.11 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
 
 sub declaration { $_[0]->output("<!$_[1]>")     }
 sub process     { $_[0]->output($_[2])          }

Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm Mon Dec  1 21:15:19 2008
@@ -76,7 +76,7 @@
 use strict;
 use vars qw($VERSION $DEBUG);
 #$DEBUG = 1;
-$VERSION = sprintf("%d.%02d", q$Revision: 2.22 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.59";
 
 =item $hp = HTML::HeadParser->new
 
@@ -186,6 +186,7 @@
 	# <link href="http:..." rel="xxx" rev="xxx" title="xxx">
 	my $h_val = "<" . delete($attr->{href}) . ">";
 	for (sort keys %{$attr}) {
+	    next if $_ eq "/";  # XHTML junk
 	    $h_val .= qq(; $_="$attr->{$_}");
 	}
 	$self->{'header'}->push_header(Link => $h_val);
@@ -208,8 +209,17 @@
 sub text
 {
     my($self, $text) = @_;
-    $text =~ s/\x{FEFF}//;  # drop Unicode BOM if found
     print "TEXT[$text]\n" if $DEBUG;
+    unless ($self->{first_chunk}) {
+	# drop Unicode BOM if found
+	if ($self->utf8_mode) {
+	    $text =~ s/^\xEF\xBB\xBF//;
+	}
+	else {
+	    $text =~ s/^\x{FEFF}//;
+	}
+	$self->{first_chunk}++;
+    }
     my $tag = $self->{tag};
     if (!$tag && $text =~ /\S/) {
 	# Normal text means start of body
@@ -218,6 +228,10 @@
     }
     return if $tag ne 'title';
     $self->{'text'} .= $text;
+}
+
+BEGIN {
+    *utf8_mode = sub { 1 } unless HTML::Entities::UNICODE_SUPPORT;;
 }
 
 1;

Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/LinkExtor.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/LinkExtor.pm?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/LinkExtor.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/LinkExtor.pm Mon Dec  1 21:15:19 2008
@@ -1,10 +1,8 @@
 package HTML::LinkExtor;
-
-# $Id: LinkExtor.pm,v 1.33 2003/10/10 10:20:56 gisle Exp $
 
 require HTML::Parser;
 @ISA = qw(HTML::Parser);
-$VERSION = sprintf("%d.%02d", q$Revision: 1.33 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
 
 =head1 NAME
 

Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/PullParser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/PullParser.pm?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/PullParser.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/PullParser.pm Mon Dec  1 21:15:19 2008
@@ -1,10 +1,8 @@
 package HTML::PullParser;
-
-# $Id: PullParser.pm,v 2.9 2006/04/26 08:00:28 gisle Exp $
 
 require HTML::Parser;
 @ISA=qw(HTML::Parser);
-$VERSION = sprintf("%d.%02d", q$Revision: 2.9 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
 
 use strict;
 use Carp ();

Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/TokeParser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/TokeParser.pm?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/TokeParser.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/TokeParser.pm Mon Dec  1 21:15:19 2008
@@ -1,10 +1,8 @@
 package HTML::TokeParser;
-
-# $Id: TokeParser.pm,v 2.37 2006/04/26 08:00:28 gisle Exp $
 
 require HTML::PullParser;
 @ISA=qw(HTML::PullParser);
-$VERSION = sprintf("%d.%02d", q$Revision: 2.37 $ =~ /(\d+)\.(\d+)/);
+$VERSION = "3.57";
 
 use strict;
 use Carp ();

Modified: branches/upstream/libhtml-parser-perl/current/t/headparser.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/t/headparser.t?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/t/headparser.t (original)
+++ branches/upstream/libhtml-parser-perl/current/t/headparser.t Mon Dec  1 21:15:19 2008
@@ -1,7 +1,7 @@
 #!perl -w
 
 use strict;
-use Test::More tests => 11;
+use Test::More tests => 13;
 
 { package H;
   sub new { bless {}, shift; }
@@ -52,7 +52,6 @@
 
 <script>
 
-   "</script>"
     ignore this
 
 </script>
@@ -66,7 +65,6 @@
 
 <style>
 
-   "</style>"
    ignore this too
 
 </style>
@@ -143,7 +141,7 @@
 ok(!$p->as_string);
 
 SKIP: {
-  skip "Need Unicode support", 2 if $] < 5.008;
+  skip "Need Unicode support", 4 if $] < 5.008;
 
   # Test that the Unicode BOM does not confuse us?
   $p = HTML::HeadParser->new(H->new);
@@ -151,4 +149,26 @@
   $p->eof;
 
   is($p->header("title"), "Hi <foo>");
+
+  $p = HTML::HeadParser->new(H->new);
+  $p->utf8_mode(1);
+  $p->parse(<<"EOT");  # example from http://rt.cpan.org/Ticket/Display.html?id=27522
+\xEF\xBB\xBF<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+ <head>
+ <title>
+Parkinson's disease</title>
+ <meta name="Keywords" content="brain,disease,dopamine,drug,levodopa,parkinson,patients,symptoms,,Medications, Medications">
+ </meta>
+ \t
+\t<link href="../../css/ummAdam.css" rel="stylesheet" type="text/css" />
+\t<link rel="stylesheet" rev="stylesheet" href="../../css/ummprint.css" media="print" />
+\t
+\t </head>
+ <body>
+EOT
+  $p->eof;
+
+  is($p->header("title"), "Parkinson's disease");
+  is($p->header("link")->[0], '<../../css/ummAdam.css>; rel="stylesheet"; type="text/css"');
 }

Modified: branches/upstream/libhtml-parser-perl/current/t/msie-compat.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/t/msie-compat.t?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/t/msie-compat.t (original)
+++ branches/upstream/libhtml-parser-perl/current/t/msie-compat.t Mon Dec  1 21:15:19 2008
@@ -3,13 +3,13 @@
 use strict;
 use HTML::Parser;
 
-use Test::More tests => 2;
+use Test::More tests => 4;
 
 my $TEXT = "";
 sub h
 {
-    my($event, $tagname, $text) = @_;
-    for ($event, $tagname, $text) {
+    my($event, $tagname, $text, @attr) = @_;
+    for ($event, $tagname, $text, @attr) {
         if (defined) {
 	    s/([\n\r\t])/sprintf "\\%03o", ord($1)/ge;
 	}
@@ -18,10 +18,10 @@
 	}
     }
 
-    $TEXT .= "[$event,$tagname,$text]\n";
+    $TEXT .= "[$event,$tagname,$text," . join(":", @attr) . "]\n";
 }
 
-my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text"]);
+my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text,\@attr"]);
 $p->parse("<a>");
 $p->parse("</a f>");
 $p->parse("</a 'foo<>' 'bar>' x>");
@@ -33,18 +33,18 @@
 $p->eof;
 
 is($TEXT, <<'EOT');
-[start_document,<undef>,]
-[start,a,<a>]
-[end,a,</a f>]
-[end,a,</a 'foo<>' 'bar>' x>]
-[end,a,</a "foo<>" "bar>" x>]
-[comment, foo bar,</ foo bar>]
-[comment, "<>" ,</ "<>" >]
-[comment,comment,<!--comment>]
-[text,<undef>,text]
-[comment,comment,<!--comment>]
-[comment,p,<p]
-[end_document,<undef>,]
+[start_document,<undef>,,]
+[start,a,<a>,]
+[end,a,</a f>,]
+[end,a,</a 'foo<>' 'bar>' x>,]
+[end,a,</a "foo<>" "bar>" x>,]
+[comment, foo bar,</ foo bar>,]
+[comment, "<>" ,</ "<>" >,]
+[comment,comment,<!--comment>,]
+[text,<undef>,text,]
+[comment,comment,<!--comment>,]
+[comment,p,<p,]
+[end_document,<undef>,,]
 EOT
 
 $TEXT = "";
@@ -52,7 +52,28 @@
 $p->eof;
 
 is($TEXT, <<'EOT');
-[start_document,<undef>,]
-[comment,comment,<!comment>]
-[end_document,<undef>,]
+[start_document,<undef>,,]
+[comment,comment,<!comment>,]
+[end_document,<undef>,,]
 EOT
+
+$TEXT = "";
+$p->parse(q(<a name=`foo bar`>));
+$p->eof;
+
+is($TEXT, <<'EOT');
+[start_document,<undef>,,]
+[start,a,<a name=`foo bar`>,name:`foo:bar`:bar`]
+[end_document,<undef>,,]
+EOT
+
+$p->backquote(1);
+$TEXT = "";
+$p->parse(q(<a name=`foo bar`>));
+$p->eof;
+
+is($TEXT, <<'EOT');
+[start_document,<undef>,,]
+[start,a,<a name=`foo bar`>,name:foo bar]
+[end_document,<undef>,,]
+EOT

Modified: branches/upstream/libhtml-parser-perl/current/t/plaintext.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/t/plaintext.t?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/t/plaintext.t (original)
+++ branches/upstream/libhtml-parser-perl/current/t/plaintext.t Mon Dec  1 21:15:19 2008
@@ -1,4 +1,4 @@
-use Test::More tests => 2;
+use Test::More tests => 3;
 
 use strict;
 use HTML::Parser;
@@ -43,3 +43,16 @@
 is($doc, "start_document:::start:<plaintext>::text:<foo>
 :1:end:</plaintext>::text:foo::start:<b>::end:</b>::text:
 ::end_document::");
+
+ at a = ();
+$p->closing_plaintext('yep, emulate gecko (2)');
+$p->parse(<<EOT)->eof;
+<plaintext><foo>
+foo<b></b>
+EOT
+
+$doc = join(":", map { defined $_ ? $_ : "" } @a);
+
+is($doc, "start_document:::start:<plaintext>::text:<foo>
+foo<b></b>
+:1:end_document::");

Modified: branches/upstream/libhtml-parser-perl/current/t/unicode-bom.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/t/unicode-bom.t?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/t/unicode-bom.t (original)
+++ branches/upstream/libhtml-parser-perl/current/t/unicode-bom.t Mon Dec  1 21:15:19 2008
@@ -37,13 +37,17 @@
 $p->parse("\xFE\xFF\0\0<head>Hi there</head>");
 $p->eof;
 
+for (@warn) {
+    s/line (\d+)/line ##/g;
+}
+
 is(join("", @warn), <<EOT);
-Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line 21.
-Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line 25.
-Parsing of undecoded UTF-16 at $0 line 28.
-Parsing of undecoded UTF-16 at $0 line 31.
-Parsing of undecoded UTF-32 at $0 line 34.
-Parsing of undecoded UTF-32 at $0 line 37.
+Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line ##.
+Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line ##.
+Parsing of undecoded UTF-16 at $0 line ##.
+Parsing of undecoded UTF-16 at $0 line ##.
+Parsing of undecoded UTF-32 at $0 line ##.
+Parsing of undecoded UTF-32 at $0 line ##.
 EOT
 
 @warn = ();

Modified: branches/upstream/libhtml-parser-perl/current/t/unicode.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/t/unicode.t?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/t/unicode.t (original)
+++ branches/upstream/libhtml-parser-perl/current/t/unicode.t Mon Dec  1 21:15:19 2008
@@ -2,10 +2,12 @@
 
 use strict;
 use HTML::Parser;
-use Test::More tests => 103;
+use Test::More;
+BEGIN {
+  plan skip_all => "This perl does not support Unicode" if $] < 5.008;
+}
 
-SKIP: {
-skip "This perl does not support Unicode", 103 if $] < 5.008;
+plan tests => 105;
 
 my @warn;
 $SIG{__WARN__} = sub {
@@ -180,4 +182,17 @@
 ok(HTML::Entities::_probably_utf8_chunk("f\xE2\x99\xA5o\xE2\x99"));
 ok(!HTML::Entities::_probably_utf8_chunk("f\xE2"));
 ok(!HTML::Entities::_probably_utf8_chunk("f\xE2\x99"));
-}
+
+$p = HTML::Parser->new(
+    api_version => 3,
+    default_h => [\@parsed, 'event, text, tag, attr'],
+    attr_encoded => 1,
+);
+
+ at warn = ();
+ at parsed = ();
+
+$p->parse($doc)->eof;
+
+ok(!@warn);
+is(@parsed, 9);

Modified: branches/upstream/libhtml-parser-perl/current/util.c
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/util.c?rev=27607&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/util.c (original)
+++ branches/upstream/libhtml-parser-perl/current/util.c Mon Dec  1 21:15:19 2008
@@ -1,5 +1,4 @@
-/* $Id: util.c,v 2.30 2006/03/22 09:15:17 gisle Exp $
- *
+/* 
  * Copyright 1999-2006, Gisle Aas.
  *
  * This library is free software; you can redistribute it and/or




More information about the Pkg-perl-cvs-commits mailing list