[Po4a-devel][CVS] po4a/lib/Locale/Po4a Xml.pm,1.3,1.4

Sun, 25 Jul 2004 09:07:12 +0000

Update of /cvsroot/po4a/po4a/lib/Locale/Po4a
In directory haydn:/tmp/cvs-serv19936

Modified Files:
	Xml.pm 
Log Message:
- Integrated TODO list into the documentation
- Implemented the "caseinsensitive" option
- Now get_string_until receives a hash of options, and there's a new "unquoted"
  option, to skip matches between quotes. It's now used to enhance the end of
  tag search (in case there's a > quoted into an attribute, for example)
- tag_in_list adapted to work with the proposed tag options syntax (w<...>)
- Changed found_string to receive a hash with info about what it has found, and
  it creates the comment (doesn't receive it). This way, derived modules have
  all the information to create custom comments
- Implemented the w/W options in front of the tags, to override the default
  wrapping (proposed by Martin)


Index: Xml.pm
===================================================================
RCS file: /cvsroot/po4a/po4a/lib/Locale/Po4a/Xml.pm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4

--- Xml.pm	19 Jul 2004 14:59:53 -0000	1.3
+++ Xml.pm	25 Jul 2004 09:07:10 -0000	1.4
@@ -114,9 +114,10 @@
 =item wrap
 
 Canonizes the string to translate, considering that whitespaces are not
-important, and wraps the translated document.
+important, and wraps the translated document. This option can be overriden
+by custom tag options. See the "tags" option below.
 
-=item caseinsensitive (TODO)
+=item caseinsensitive
 
 It makes the tags and attributes searching to work in a case insensitive
 way.  If it's defined, it will treat <BooK>laNG and <BOOK>Lang as <book>lang.
@@ -139,6 +140,12 @@
 form <aaa>, but you can join some (<bbb><aaa>) to say that the contents of
 the tag <aaa> will only be translated when it's into a <bbb> tag.
 
+You can also specify some tag options putting some characters in front of
+the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)
+to override the default behavior specified by the global "wrap" option.
+
+Example: W<chapter><title>
+
 =item attributes (TODO)
 
 Space-separated list of the tag's attributes you want to translate.  You can
@@ -211,14 +218,16 @@
 There you can control which strings you want to translate, and perform
 transformations to them before or after the translation itself.
 
-It receives the extracted text, the reference on where it was, and a
-comment that tells if it's an attribute value, a tag content... It must
-return the text that will replace the original in the translated document.
-Here's a basic example of this function:
+It receives the extracted text, the reference on where it was, and a hash
+that contains extra information to control what strings to translate, how
+to translate them and to generate the comment.
+
+It must return the text that will replace the original in the translated
+document. Here's a basic example of this function:
 
   sub found_string {
-    my ($self,$text,$ref,$comment)=@_;
-    $text = $self->translate($text,$ref,$comment,
+    my ($self,$text,$ref,$options)=@_;
+    $text = $self->translate($text,$ref,"type".$options->{'type'},
       'wrap'=>$self->{options}{'wrap'});
     return $text;
   }
@@ -229,9 +238,25 @@
 =cut
 
 sub found_string {
-	my ($self,$text,$ref,$comment)=@_;
-	$text = $self->translate($text,$ref,$comment,
-		'wrap'=>$self->{options}{'wrap'});
+	my ($self,$text,$ref,$options)=@_;
+
+	my $comment;
+	my $wrap = $self->{options}{'wrap'};
+
+	if ($options->{'type'} eq "tag") {
+		$comment = "Contents of: ".$self->get_path;
+
+		if($options->{'tag_options'} =~ /w/) {
+			$wrap = 1;
+		}
+		if($options->{'tag_options'} =~ /W/) {
+			$wrap = 0;
+		}
+	} else {
+		die dgettext("po4a","po4a::xml: Internal error: unknown string type.")."\n";
+	}
+
+	$text = $self->translate($text,$ref,$comment,'wrap'=>$wrap);
 	return $text;
 }
 
@@ -296,7 +321,7 @@
 #	{	beginning	=> "?",
 #		end		=> "?",
 #		breaking	=> 1,
-#		f_translate	=> \&tag_trans_...},
+#		f_translate	=> \&tag_trans_procins},
 	{	beginning	=> "!DOCTYPE",
 		end		=> "]",
 		breaking	=> 1,
@@ -318,7 +343,7 @@
 
 sub tag_extract_comment {
 	my ($self,$remove)=(shift,shift);
-	my ($eof,@tag)=$self->get_string_until('-->',1,$remove);
+	my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove,unquoted=>1});
 	return ($eof,@tag);
 }
 
@@ -342,7 +367,12 @@
 
 sub tag_extract_doctype {
 	my ($self,$remove)=(shift,shift);
-	my ($eof,@tag)=$self->get_string_until(']>',1,$remove);
+	my ($eof,@tag)=$self->get_string_until(']>',{include=>1,unquoted=>1});
+	if ($eof) {
+		($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1});
+	} else {
+		($eof,@tag)=$self->get_string_until(']>',{include=>1,remove=>$remove,unquoted=>1});
+	}
 	return ($eof,@tag);
 }
 
@@ -501,7 +531,7 @@
 		($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end});
 		if ($line =~ /^<\Q$match1\E/) {
 			if (!defined($tag_types[$i]->{f_extract})) {
-				my ($eof,@lines) = $self->get_string_until(">",1,0);
+				my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1});
 				my $line2 = $self->join_lines(@lines);
 #print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n";
 				if (defined($line2) and $line2 =~ /\Q$match2\E>$/) {
@@ -544,7 +574,7 @@
 	if (defined($tag_types[$type]->{f_extract})) {
 		($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove);
 	} else {
-		($eof,@tag) = $self->get_string_until($match2.">",1,$remove);
+		($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1});
 	}
 	$tag[0] =~ /^<\Q$match1\E(.*)$/s;
 	$tag[0] = $1;
@@ -621,9 +651,11 @@
 
 =item tag_in_list
 
-This function returns a boolean value that says if the first argument (a tag
+This function returns a string value that says if the first argument (a tag
 hierarchy) matches any of the tags from the second argument (a list of tags
-or tag hierarchies).
+or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the
+matched tag options (the characters in front of the tag) or 1 (if that tag
+doesn't have options).
 
 =cut
 
@@ -633,12 +665,25 @@
 	my $i = 0;
 	
 	while (!$found && $i < @list) {
-		my $element = $list[$i];
-		if ( $tag =~ /\Q$element\E$/ ) {
-#print $tag."==".$element."\n";
-			$found = 1;
+		$list[$i] =~ /(.*?)(<.*)/;
+		my $options = $1;
+		my $element = $2;
+		if ($self->{options}{'caseinsensitive'}) {
+			if ( $tag =~ /\Q$element\E$/i ) {
+				$found = 1;
+			}
+		} else {
+			if ( $tag =~ /\Q$element\E$/ ) {
+				$found = 1;
+			}
+		}
+		if ($found) {
+			if ($options) {
+				$found = $options;
+			}
+		} else {
+			$i++;
 		}
-		$i++;
 	}
 	return $found;
 }
@@ -708,15 +753,15 @@
 sub treat_content {
 	my $self = shift;
 	my $blank="";
-	my ($eof,@paragraph)=$self->get_string_until('<',0,1);
+	my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1});
 
 	while (!$eof and !$self->breaking_tag) {
 		my @text;
 		# Append the found inline tag
-		($eof,@text)=$self->get_string_until('>',1,1);
+		($eof,@text)=$self->get_string_until('>',{include=>1,remove=>1,unquoted=>1});
 		push @paragraph, @text;
 
-		($eof,@text)=$self->get_string_until('<',0,1);
+		($eof,@text)=$self->get_string_until('<',{remove=>1});
 		if ($#text > 0) {
 			push @paragraph, @text;
 		}
@@ -767,14 +812,29 @@
 
 	if ( length($self->join_lines(@paragraph)) > 0 ) {
 		my $struc = $self->get_path;
-		my $inlist = $self->tag_in_list($struc,@{$self->{tags}});
+		my $options = $self->tag_in_list($struc,@{$self->{tags}});
+		my $inlist;
+		if ($options eq 0) {
+			$inlist = 0;
+			$options = "";
+		} elsif ($options eq 1) {
+			$inlist = 1;
+			$options = "";
+		} else {
+			$inlist = 1;
+		}
 #print $self->{options}{'tagsonly'}."==".$inlist."\n";
 		if ( $self->{options}{'tagsonly'} eq $inlist ) {
 #print "YES\n";
-			$self->pushline($self->found_string($self->join_lines(@paragraph),
-				$paragraph[1],"Content of tag ".$struc));
+			$self->pushline($self->found_string(
+				$self->join_lines(@paragraph),
+				$paragraph[1], {
+					type=>"tag",
+					tag_options=>$options
+				}));
 		} else {
 #print "NO\n";
+#TODO: should print that this tag isn't translated in verbose mode
 			$self->pushline($self->join_lines(@paragraph));
 		}
 	}
@@ -818,20 +878,35 @@
 =item get_string_until
 
 This function returns an array with the lines (and references) from the input
-stream until it finds the first argument.  The second argument is a boolean
-that says if the returned array should contain the searched text or not.  The
-third argument is another boolean that says if the returned stream should be
-removed from the input or not.
+stream until it finds the first argument.  The second argument is an options
+hash. Value 0 means disabled (the default) and 1, enabled.
+
+The valid options are:
+
+=over 4
+
+=item include
+
+This makes the returned array to contain the searched text
+
+=item remove
+
+This removes the returned stream from the input
+
+=item unquoted
+
+This ensures that the searched text is outside any quotes
 
 =cut
 
 sub get_string_until {
-	# search = the text we want to find (at the moment it can't have \n's)
-	# include = include the searched text in the returned paragraph
-	# remove = remove the returned text from input or leave it intact
-	my ($self,$search,$include,$remove) = (shift,shift,shift,shift);
-	if (!defined($include)) { $include = 0; }
-	if (!defined($remove)) { $remove = 0; }
+	my ($self,$search) = (shift,shift);
+	my $options = shift;
+	my ($include,$remove,$unquoted) = (0,0,0);
+
+	if (defined($options->{include})) { $include = $options->{include}; }
+	if (defined($options->{remove})) { $remove = $options->{remove}; }
+	if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; }
 
 	my ($line,$ref) = $self->shiftline();
 	my (@text,$paragraph);
@@ -840,9 +915,16 @@
 	while (defined($line) and !$found) {
 		push @text, ($line,$ref);
 		$paragraph .= $line;
-		if ( $paragraph =~ /.*\Q$search\E.*/s ) {
-			$found = 1;
+		if ($unquoted) {
+			if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*\Q$search\E.*/s ) {
+				$found = 1;
+			}
 		} else {
+			if ( $paragraph =~ /.*\Q$search\E.*/s ) {
+				$found = 1;
+			}
+		}
+		if (!$found) {
 			($line,$ref)=$self->shiftline();
 		}
 	}
@@ -850,15 +932,21 @@
 	if (!defined($line)) { $eof = 1; }
 
 	if ( $found ) {
-		if(!$include) {
-			$text[$#text-1] =~ /(.*?)(\Q$search\E.*)/s;
+		$line = "";
+		if($unquoted) {
+			$text[$#text-1] =~ /^(((\".*?\")|(\'.*?\')|[^\"\'])*?\Q$search\E)(.*)/s;
 			$text[$#text-1] = $1;
-			$line = $2;
+			$line = $5;
 		} else {
 			$text[$#text-1] =~ /(.*?\Q$search\E)(.*)/s;
 			$text[$#text-1] = $1;
 			$line = $2;
 		}
+		if(!$include) {
+			$text[$#text-1] =~ /(.*)(\Q$search\E.*)/s;
+			$text[$#text-1] = $1;
+			$line = $2.$line;
+		}
 		if (defined($line) and ($line ne "")) {
 			$self->unshiftline ($line,$text[$#text]);
 		}
@@ -898,6 +986,19 @@
 Well... hmm... If this works for you now, you're using a very simple
 document format ;)
 
+=head1 TODO LIST
+
+ATTRIBUTES
+
+MODIFY TAG TYPES FROM INHERITED MODULES
+(move the tag_types structure inside the $self hash?)
+
+XML HEADER (ENCODING)
+DOCTYPE (ENTITIES)
+INCLUDED FILES
+
+breaking tag inside non-breaking tag (possible?) causes ugly comments
+
 =head1 SEE ALSO
 
 L<po4a(7)>, L<Locale::Po4a::TransTranctor(3pm)>.
@@ -916,27 +1017,3 @@
 =cut
 
 1;
-
-
-##### TODO LIST #####
-#
-#OPTIONS
-#caseinsensitive
-#attributes
-#
-#MODIFY TAG TYPES FROM INHERITED MODULES
-#(move the tag_types structure inside the $self hash?)
-#
-#DOCTYPE (ENTITIES)
-#INCLUDED FILES
-#
-#XML HEADER (ENCODING)
-#
-#breaking tag inside non-breaking tag (possible?) causes ugly comments
-
-#               <abbrev>
-#               W<acronym>
-#               W<arg>
-#               <artheader>
-#    with 'w' meaning wrap (by default) and 'W' meaning don't wrap.
-# there should be the module option to select the default behavior