r41816 - in /branches/upstream/libhtml-parser-perl/current: Changes META.yml Parser.pm TODO lib/HTML/HeadParser.pm

carnil-guest at users.alioth.debian.org carnil-guest at users.alioth.debian.org
Fri Aug 14 11:26:08 UTC 2009


Author: carnil-guest
Date: Fri Aug 14 11:25:54 2009
New Revision: 41816

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=41816
Log:
[svn-upgrade] Integrating new upstream version, libhtml-parser-perl (3.62)

Modified:
    branches/upstream/libhtml-parser-perl/current/Changes
    branches/upstream/libhtml-parser-perl/current/META.yml
    branches/upstream/libhtml-parser-perl/current/Parser.pm
    branches/upstream/libhtml-parser-perl/current/TODO
    branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm

Modified: branches/upstream/libhtml-parser-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Changes?rev=41816&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Changes (original)
+++ branches/upstream/libhtml-parser-perl/current/Changes Fri Aug 14 11:25:54 2009
@@ -1,3 +1,17 @@
+_______________________________________________________________________________
+2009-08-13  Release 3.62
+
+Ville Skyttä (4):
+      HTTP::Header doc typo fix.
+      Do not bother tracking style or script, they're ignored.
+      Bring HTML 5 head elements up to date with WD-html5-20090423.
+      Improve HeadParser performance.
+
+Gisle Aas (1):
+      Doc patch: Make it clearer what the return value from ->parse is
+
+
+
 _______________________________________________________________________________
 2009-06-20  Release 3.61
 

Modified: branches/upstream/libhtml-parser-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/META.yml?rev=41816&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/META.yml (original)
+++ branches/upstream/libhtml-parser-perl/current/META.yml Fri Aug 14 11:25:54 2009
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:               HTML-Parser
-version:            3.61
+version:            3.62
 abstract:           HTML parser class
 author:
     - Gisle Aas <gisle at activestate.com>

Modified: branches/upstream/libhtml-parser-perl/current/Parser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Parser.pm?rev=41816&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Parser.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/Parser.pm Fri Aug 14 11:25:54 2009
@@ -9,7 +9,7 @@
 use strict;
 use vars qw($VERSION @ISA);
 
-$VERSION = "3.61";
+$VERSION = "3.62";
 
 require HTML::Entities;
 
@@ -240,13 +240,12 @@
 
 =item $p->parse( $string )
 
-Parse $string as the next chunk of the HTML document.  The return
-value is normally a reference to the parser object (i.e. $p).
-Handlers invoked should not attempt to modify the $string in-place until
-$p->parse returns.
-
-If an invoked event handler aborts parsing by calling $p->eof, then
-$p->parse() will return a FALSE value.
+Parse $string as the next chunk of the HTML document.  Handlers invoked should
+not attempt to modify the $string in-place until $p->parse returns.
+
+If an invoked event handler aborts parsing by calling $p->eof, then $p->parse()
+will return a FALSE value.  Otherwise the return value is a reference to the
+parser object ($p).
 
 =item $p->parse( $code_ref )
 

Modified: branches/upstream/libhtml-parser-perl/current/TODO
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/TODO?rev=41816&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/TODO (original)
+++ branches/upstream/libhtml-parser-perl/current/TODO Fri Aug 14 11:25:54 2009
@@ -1,5 +1,5 @@
 TODO
-
+ - Check how we compare to the HTML5 parsing rules
  - limit the length of markup elements that never end.   Perhaps by
    configurable limits on the length that markup can have and still
    be recognized.  Report stuff as 'text' when this happens?
@@ -10,16 +10,12 @@
    to be "script", "style", "title", "iframe", "textarea", "xmp",
    and "plaintext".
 
+
 SGML FEATURES WE WILL PROBABLY IGNORE FOREVER
  - Empty tags: <> </>  (repeat previous start tag)
  - <foo<bar>  (same as <foo><bar>)
  - NET tags <name/.../
  
-
-POSSIBLE OPTIMIZATIONS
- - none that I can think of right now
- - run the profiler
-
 
 MINOR "BUGS" (alias FEATURES)
  - no way to clear "boolean_attribute_value".

Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm?rev=41816&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/HeadParser.pm Fri Aug 14 11:25:54 2009
@@ -87,7 +87,7 @@
 use strict;
 use vars qw($VERSION $DEBUG);
 #$DEBUG = 1;
-$VERSION = "3.60";
+$VERSION = "3.62";
 
 =item $hp = HTML::HeadParser->new
 
@@ -99,7 +99,7 @@
 of some class that is a or delegates to the C<HTTP::Headers> class.
 
 If no $header is given C<HTML::HeadParser> will create an
-C<HTTP::Header> object by itself (initially empty).
+C<HTTP::Headers> object by itself (initially empty).
 
 =cut
 
@@ -111,7 +111,10 @@
 	$header = HTTP::Headers->new;
     }
 
-    my $self = $class->SUPER::new(api_version => 2,
+    my $self = $class->SUPER::new(api_version => 3,
+				  start_h => ["start", "self,tagname,attr"],
+				  end_h   => ["end",   "self,tagname"],
+				  text_h  => ["text",  "self,text"],
 				  ignore_elements => [qw(script style)],
 				 );
     $self->{'header'} = $header;
@@ -175,7 +178,7 @@
 # <!ENTITY % head.content "TITLE & BASE?">
 # <!ELEMENT HEAD O O (%head.content;) +(%head.misc;)>
 #
-# Added in HTML 5: noscript, eventsource, command
+# Added in HTML 5 as of WD-html5-20090423: noscript, command
 
 sub start
 {
@@ -203,8 +206,7 @@
 	# This is a non-standard header.  Perhaps we should just ignore
 	# this element
 	$self->{'header'}->push_header(Isindex => $attr->{prompt} || '?');
-    } elsif ($tag =~ /^(?:title|(?:no)?script|style|object
-		      |eventsource|command)$/x) {
+    } elsif ($tag =~ /^(?:title|noscript|object|command)$/) {
 	# Just remember tag.  Initialize header when we see the end tag.
 	$self->{'tag'} = $tag;
     } elsif ($tag eq 'link') {




More information about the Pkg-perl-cvs-commits mailing list