[libhtml-tidy-perl] 03/10: Add patches to make the tests pass with tidy-html5 providing libtidy

gregor herrmann gregoa at debian.org
Sat Jul 23 13:30:28 UTC 2016


This is an automated email from the git hooks/post-receive script.

gregoa pushed a commit to branch master
in repository libhtml-tidy-perl.

commit f2652c498511e9de6ce6b06eb0445cfa532ce20e
Author: Simon McVittie <smcv at debian.org>
Date:   Fri Jul 22 09:49:52 2016 +0100

    Add patches to make the tests pass with tidy-html5 providing libtidy
    
    Closes: #829409
---
 debian/changelog                                   |   8 ++
 debian/control                                     |   2 +-
 ...tidy-html5-s-differently-formatted-summar.patch |  24 ++++
 debian/patches/series                              |   6 +
 .../patches/t-allow-tidy-html5-as-generator.patch  |  47 ++++++++
 ...ert-that-tidy-will-add-a-HTML-3.2-doctype.patch |  52 +++++++++
 ...-don-t-assert-that-the-DOCTYPE-is-preserv.patch |  36 ++++++
 ...-HTML5-DOCTYPE-to-get-warnings-about-unes.patch | 128 +++++++++++++++++++++
 ...s.t-adjust-expected-result-for-tidy-html5.patch |  58 ++++++++++
 9 files changed, 360 insertions(+), 1 deletion(-)

diff --git a/debian/changelog b/debian/changelog
index 2c0a999..2101180 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -16,6 +16,14 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
     upstream.
   * d/patches: put all patches in the git style allowed by DEP-3,
     so they can be manipulated with gbp-pq
+  * d/p/lib-ignore-tidy-html5-s-differently-formatted-summar.patch,
+    d/p/t-allow-tidy-html5-as-generator.patch,
+    d/p/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch,
+    d/p/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch,
+    d/p/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch,
+    d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch:
+    add patches to make the tests pass with tidy-html5 providing libtidy
+    (Closes: #829409)
 
  -- gregor herrmann <gregoa at debian.org>  Thu, 27 Feb 2014 22:36:29 +0100
 
diff --git a/debian/control b/debian/control
index 0ca02f8..072fde5 100644
--- a/debian/control
+++ b/debian/control
@@ -7,7 +7,7 @@ Section: perl
 Priority: optional
 Build-Depends: debhelper (>= 9.20120312),
                help2man,
-               libtidy-dev,
+               libtidy-dev (>= 1:5.2.0),
                libtest-pod-perl,
                libtest-pod-coverage-perl,
                perl (>= 5.13.11) | libtest-simple-perl (>= 0.98),
diff --git a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
new file mode 100644
index 0000000..04801b2
--- /dev/null
+++ b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
@@ -0,0 +1,24 @@
+From: Simon McVittie <smcv at debian.org>
+Date: Fri, 22 Jul 2016 09:19:08 +0100
+Subject: lib: ignore tidy-html5's differently-formatted summary line
+
+Signed-off-by: Simon McVittie <smcv at debian.org>
+---
+ lib/HTML/Tidy.pm | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
+index a172ea2..aa16c79 100644
+--- a/lib/HTML/Tidy.pm
++++ b/lib/HTML/Tidy.pm
+@@ -265,6 +265,10 @@ sub _parse_errors {
+             # Summary line we don't want
+ 
+         }
++        elsif ( $line =~ /^Tidy found \d+ warnings? and \d+ errors?!/ ) {
++            # Summary line we don't want
++
++        }
+         elsif ( $line eq 'No warnings or errors were found.' ) {
+             # Summary line we don't want
+ 
diff --git a/debian/patches/series b/debian/patches/series
index 4adbeaa..0fa4da0 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,9 @@
 remove-tidy_version.patch
 tidy-not-tidyp.patch
+lib-ignore-tidy-html5-s-differently-formatted-summar.patch
+t-allow-tidy-html5-as-generator.patch
+t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
+t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
+t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
+t-venus.t-adjust-expected-result-for-tidy-html5.patch
 fix-error-message-in-webtidy
diff --git a/debian/patches/t-allow-tidy-html5-as-generator.patch b/debian/patches/t-allow-tidy-html5-as-generator.patch
new file mode 100644
index 0000000..c3d6f30
--- /dev/null
+++ b/debian/patches/t-allow-tidy-html5-as-generator.patch
@@ -0,0 +1,47 @@
+From: Simon McVittie <smcv at debian.org>
+Date: Fri, 22 Jul 2016 09:21:39 +0100
+Subject: t: allow tidy-html5 as generator
+
+Signed-off-by: Simon McVittie <smcv at debian.org>
+---
+ t/roundtrip.t | 2 +-
+ t/unicode.t   | 6 ++----
+ 2 files changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/t/roundtrip.t b/t/roundtrip.t
+index e37cb88..2590795 100644
+--- a/t/roundtrip.t
++++ b/t/roundtrip.t
+@@ -25,7 +25,7 @@ my @messages = $tidy->messages( $clean );
+ 
+ is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
+ 
+-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/;
+ 
+ my $expected = do { local $/ = undef; <DATA> };
+ is( $clean, $expected, 'Cleaned up properly' );
+diff --git a/t/unicode.t b/t/unicode.t
+index 2f45384..9ca6370 100644
+--- a/t/unicode.t
++++ b/t/unicode.t
+@@ -30,8 +30,7 @@ ok(utf8::is_utf8($reference), 'reference is utf8');
+ my $clean = $tidy->clean( $html );
+ ok(utf8::is_utf8($clean), 'cleaned output is also unicode');
+ 
+-$clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
+-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
+ is($clean, $reference, q{Cleanup didn't break anything});
+ 
+ my @messages = $tidy->messages;
+@@ -49,8 +48,7 @@ subtest 'Try send bytes to clean method.' => sub {
+     ok(!utf8::is_utf8($html), 'html is row bytes');
+     my $clean = $tidy->clean( $html );
+     ok(utf8::is_utf8($clean), 'but cleaned output is string');
+-    $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
+-    $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++    $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
+     is($clean, $reference, q{Cleanup didn't break anything});
+ };
+ 
diff --git a/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
new file mode 100644
index 0000000..abbaffc
--- /dev/null
+++ b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
@@ -0,0 +1,52 @@
+From: Simon McVittie <smcv at debian.org>
+Date: Fri, 22 Jul 2016 09:32:09 +0100
+Subject: t: don't assert that tidy will add a HTML 3.2 doctype
+
+tidy-html5 adds the HTML5 doctype, <!DOCTYPE html>.
+
+Signed-off-by: Simon McVittie <smcv at debian.org>
+---
+ t/roundtrip.t | 3 ++-
+ t/wordwrap.t  | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/t/roundtrip.t b/t/roundtrip.t
+index 2590795..68be07f 100644
+--- a/t/roundtrip.t
++++ b/t/roundtrip.t
+@@ -26,12 +26,13 @@ my @messages = $tidy->messages( $clean );
+ is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
+ 
+ $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/;
++$clean =~ s/<!DOCTYPE html[^>]*>/<!DOCTYPE html>/;
+ 
+ my $expected = do { local $/ = undef; <DATA> };
+ is( $clean, $expected, 'Cleaned up properly' );
+ 
+ __DATA__
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <meta name="generator" content="Tidy">
+diff --git a/t/wordwrap.t b/t/wordwrap.t
+index 717d895..3f4daaf 100644
+--- a/t/wordwrap.t
++++ b/t/wordwrap.t
+@@ -11,7 +11,7 @@ my $input=q{Here's some <B>ed and <BR/>eakfest MarkUp};
+ 
+ my $expected=<<'EOD';
+ <!DOCTYPE 
+-html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++html>
+ <html>
+ <head>
+ <title>
+@@ -32,6 +32,7 @@ my $cfg = 't/wordwrap.cfg';
+ my $tidy = HTML::Tidy->new( {config_file => $cfg} );
+ 
+ my $result = $tidy->clean( $input );
++$result =~ s/<!DOCTYPE ?\nhtml[^>]*>/<!DOCTYPE \nhtml>/;
+ my @result = split(/\n/, $result);
+ is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected');
+ 
diff --git a/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
new file mode 100644
index 0000000..48dda49
--- /dev/null
+++ b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
@@ -0,0 +1,36 @@
+From: Simon McVittie <smcv at debian.org>
+Date: Fri, 22 Jul 2016 09:29:39 +0100
+Subject: t/unicode.t: don't assert that the DOCTYPE is preserved
+
+tidy-html5 currently doesn't preserve user-supplied DOCTYPEs
+in output: <https://github.com/htacg/tidy-html5/issues/435>
+
+Signed-off-by: Simon McVittie <smcv at debian.org>
+---
+ t/unicode.html | 2 +-
+ t/unicode.t    | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/t/unicode.html b/t/unicode.html
+index a90f83f..c8d1804 100644
+--- a/t/unicode.html
++++ b/t/unicode.html
+@@ -1,4 +1,4 @@
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>日本語のホムページ</title>
+diff --git a/t/unicode.t b/t/unicode.t
+index 9ca6370..679b48a 100644
+--- a/t/unicode.t
++++ b/t/unicode.t
+@@ -53,7 +53,7 @@ subtest 'Try send bytes to clean method.' => sub {
+ };
+ 
+ __DATA__
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <meta name="generator" content="Tidy">
diff --git a/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
new file mode 100644
index 0000000..e010fc9
--- /dev/null
+++ b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
@@ -0,0 +1,128 @@
+From: Simon McVittie <smcv at debian.org>
+Date: Fri, 22 Jul 2016 09:27:26 +0100
+Subject: t: use a pre-HTML5 DOCTYPE to get warnings about unescaped ampersand
+
+HTML5 defines an ampersand followed by whitespace to be unambiguously
+an ampersand, matching what browsers have always done in practice.
+As a result, tidy-html5 does not warn about them when the doctype
+is either HTML5 or missing (lack of a DOCTYPE is treated as HTML5,
+on the basis that HTML5 is a closer match for what browsers actually
+do than any previous standard). Discussion here:
+<https://github.com/htacg/tidy-html5/issues/207>
+
+Adding the DOCTYPE throws off some of the line numbering, which needs
+adjusting.
+
+t/ignore-text.t also seems to rely on the missing DOCTYPE provoking a
+warning, which is obviously not going to happen now that we've
+added one, to be able to verify that case-insensitive ignoring
+can work. Add a new error so we can ignore that instead.
+
+Signed-off-by: Simon McVittie <smcv at debian.org>
+---
+ t/ignore-text.t |  8 +++++---
+ t/ignore.t      | 10 +++++-----
+ t/levels.t      | 10 +++++-----
+ 3 files changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/t/ignore-text.t b/t/ignore-text.t
+index 9695a5a..a95e0e0 100644
+--- a/t/ignore-text.t
++++ b/t/ignore-text.t
+@@ -10,8 +10,8 @@ use HTML::Tidy;
+ my $html = do { local $/; <DATA> };
+ 
+ my @expected_messages = split /\n/, q{
+-DATA (24:XX) Warning: unescaped & which should be written as &
+-DATA (24:XX) Warning: unescaped & which should be written as &
++DATA (26:XX) Warning: unescaped & which should be written as &
++DATA (26:XX) Warning: unescaped & which should be written as &
+ };
+ 
+ chomp @expected_messages;
+@@ -22,7 +22,7 @@ IGNORE_BOGOTAG: {
+     isa_ok( $tidy, 'HTML::Tidy' );
+ 
+     $tidy->ignore( text => qr/bogotag/ );
+-    $tidy->ignore( text => [ qr/UNESCAPED/, qr/doctype/i ] );
++    $tidy->ignore( text => [ qr/UNESCAPED/, qr/case-insensitive/i ] );
+     # The qr/UNESCAPED/ should not ignore anything because there's no /i
+     my $rc = $tidy->parse( 'DATA', $html );
+     ok( $rc, 'Parsed OK' );
+@@ -44,6 +44,7 @@ sub munge_returned {
+     }
+ }
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
+@@ -67,6 +68,7 @@ DIV.TOC P {
+ </HEAD>
+ <BODY BGCOLOR="white">
+ <BOGOTAG>
++<CASE-INSENSITIVE>
+     <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT>
+ 	<DIV CLASS="TOC">
+ 	<h2>Perl, Programming & Writing</h2>
+diff --git a/t/ignore.t b/t/ignore.t
+index 3991733..c0a1317 100644
+--- a/t/ignore.t
++++ b/t/ignore.t
+@@ -10,16 +10,15 @@ use HTML::Tidy;
+ my $html = do { local $/ = undef; <DATA> };
+ 
+ my @expected_warnings = split /\n/, q{
+-- (1:1) Warning: missing <!DOCTYPE> declaration
+-- (23:1) Warning: discarding unexpected <bogotag>
+-- (24:XX) Warning: unescaped & which should be written as &
+-- (24:XX) Warning: unescaped & which should be written as &
++- (24:1) Warning: discarding unexpected <bogotag>
++- (25:XX) Warning: unescaped & which should be written as &
++- (25:XX) Warning: unescaped & which should be written as &
+ };
+ chomp @expected_warnings;
+ shift @expected_warnings; # First one's blank
+ 
+ my @expected_errors = split /\n/, q{
+-- (23:1) Error: <bogotag> is not recognized!
++- (24:1) Error: <bogotag> is not recognized!
+ };
+ chomp @expected_errors;
+ shift @expected_errors; # First one's blank
+@@ -71,6 +70,7 @@ sub munge_returned {
+     }
+ }
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
+diff --git a/t/levels.t b/t/levels.t
+index 01aeb3b..2ee3162 100644
+--- a/t/levels.t
++++ b/t/levels.t
+@@ -13,11 +13,10 @@ my $rc = $tidy->parse( '-', <DATA> );
+ ok( $rc, 'Parsed OK' );
+ 
+ my @expected = split /\n/, q{
+-- (1:1) Warning: missing <!DOCTYPE> declaration
+-- (23:1) Error: <bogotag> is not recognized!
+-- (23:1) Warning: discarding unexpected <bogotag>
+-- (24:XX) Warning: unescaped & which should be written as &
+-- (24:XX) Warning: unescaped & which should be written as &
++- (24:1) Error: <bogotag> is not recognized!
++- (24:1) Warning: discarding unexpected <bogotag>
++- (25:XX) Warning: unescaped & which should be written as &
++- (25:XX) Warning: unescaped & which should be written as &
+ };
+ chomp @expected;
+ shift @expected; # First one's blank
+@@ -41,6 +40,7 @@ sub munge_returned {
+ }
+ 
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
diff --git a/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch
new file mode 100644
index 0000000..004b09f
--- /dev/null
+++ b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch
@@ -0,0 +1,58 @@
+From: Simon McVittie <smcv at debian.org>
+Date: Fri, 22 Jul 2016 09:34:53 +0100
+Subject: t/venus.t: adjust expected result for tidy-html5
+
+tidy-html5 allows arbitrary block content inside <address>, whereas
+traditional tidy only allowed inline content.
+Reference: <https://github.com/htacg/tidy-html5/issues/55>
+
+This change will break with traditional tidy, and it isn't clear
+to me how to remain compatible with both.
+
+Signed-off-by: Simon McVittie <smcv at debian.org>
+---
+ t/venus.t | 32 +++++++++++++++++++-------------
+ 1 file changed, 19 insertions(+), 13 deletions(-)
+
+diff --git a/t/venus.t b/t/venus.t
+index 41ee597..bd94d4b 100755
+--- a/t/venus.t
++++ b/t/venus.t
+@@ -72,18 +72,24 @@ __DATA__
+       <a href="../../General/Credits.html">Credits</a> | 
+       <a href="../../General/Feedback.html">Feedback</a> |</h4>
+     </center>
+-    <center>
+-      <p>
+-        <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
+-      </p>
+-    </center>
+-    <div align="center"></div>
+-    <center>
+-      <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
+-      <address> </address>
+-      <address>Authors: Twin Groves Museums in the Classroom Team,</address>
+-      <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
+-    </center>
+-    <center>Created: 27 June 1998- Updated: 6 October 2003</center>
++    <address>
++      <center>
++        <p>
++          <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
++        </p>
++      </center>
++      <div align="center"></div>
++      <address>
++        <center>
++          <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
++          <address> </address>
++          <address>Authors: Twin Groves Museums in the Classroom Team,</address>
++          <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
++        </center>
++      </address>
++      <address>
++        <center>Created: 27 June 1998- Updated: 6 October 2003</center>
++      </address>
++    </address>
+   </body>
+ </html>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libhtml-tidy-perl.git



More information about the Pkg-perl-cvs-commits mailing list