[libcatmandu-perl] 35/101: Fix Catmandu::Importer::Text split by character
Jonas Smedegaard
dr at jones.dk
Tue Feb 23 13:43:51 UTC 2016
This is an automated email from the git hooks/post-receive script.
js pushed a commit to branch master
in repository libcatmandu-perl.
commit 405e7374ec75b0514dd9be56084b9e823a16c332
Author: Jakob Voss <voss at gbv.de>
Date: Sat Dec 5 14:36:18 2015 +0100
Fix Catmandu::Importer::Text split by character
---
lib/Catmandu/Fix/parse_text.pm | 2 ++
lib/Catmandu/Importer/Text.pm | 45 +++++++++++++++++++++++-------------------
t/Catmandu-Importer-Text.t | 14 ++++++++-----
3 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/lib/Catmandu/Fix/parse_text.pm b/lib/Catmandu/Fix/parse_text.pm
index 3b00ee5..cd0a8b4 100644
--- a/lib/Catmandu/Fix/parse_text.pm
+++ b/lib/Catmandu/Fix/parse_text.pm
@@ -66,4 +66,6 @@ Catmandu::Fix::parse_text - parses a text into an array or hash of values
L<Catmandu::Fix>
+L<Catmandu::Importer::Text>
+
=cut
diff --git a/lib/Catmandu/Importer/Text.pm b/lib/Catmandu/Importer/Text.pm
index 9bf95bf..b398926 100644
--- a/lib/Catmandu/Importer/Text.pm
+++ b/lib/Catmandu/Importer/Text.pm
@@ -10,17 +10,17 @@ use namespace::clean;
with 'Catmandu::Importer';
has pattern => (
- is => 'ro',
- coerce => sub {
- $_[0] =~ /\n/m ? qr{$_[0]}x : qr{$_[0]}
+ is => 'ro',
+ coerce => sub {
+ $_[0] =~ /\n/m ? qr{$_[0]}x : qr{$_[0]};
},
);
has split => (
- is => 'ro',
+ is => 'ro',
coerce => sub {
- length $_[0] == 1 ? $_[0] : qr{$_[0]}
- }
+ length $_[0] == 1 ? quotemeta($_[0]) : qr{$_[0]};
+ }
);
sub generator {
@@ -31,21 +31,23 @@ sub generator {
state $count = 0;
state $line;
- while ( defined($line = $self->readline) ) {
+ while ( defined( $line = $self->readline ) ) {
chomp $line;
next if $pattern and $line !~ $pattern;
my $data = { _id => ++$count };
- if (@+ < 2) { # no capturing groups
+ if ( @+ < 2 ) { # no capturing groups
$data->{text} = $line;
- } elsif (%+) { # named capturing groups
- $data->{match} = { %+ };
- } else { # numbered capturing groups
+ }
+ elsif (%+) { # named capturing groups
+ $data->{match} = {%+};
+ }
+ else { # numbered capturing groups
no strict 'refs';
- $data->{match} = [ map { $$_ } 1.. at +-1 ];
+ $data->{match} = [ map { $$_ } 1 .. @+ - 1 ];
}
-
+
if ($split) {
$data->{text} = [ split $split, $line ];
}
@@ -91,10 +93,11 @@ In Perl code:
=head1 DESCRIPTION
-This L<Catmandu::Importer> reads each line of input as an item with line number
-in field C<_id> and text content in field C<text>. Line separators are not
-included. A regular expression can be specified to only import selected lines
-and parts of lines that match a given pattern.
+This L<Catmandu::Importer> reads textual input line by line. Each line is
+imported as item with line number in field C<_id> and text content in field
+C<text>. Line separators are not included. Lines can further be split by
+character or pattern and a regular expression can be specified to only import
+selected lines and to translate pattern groups to fields.
=head1 CONFIGURATION
@@ -120,7 +123,7 @@ An ARRAY of one or more fixes or file scripts to be applied to imported items.
=item split
-Character or regular expression (given as string with a least two characters),
+Single Character or regular expression (as string with a least two characters),
to split each line. Resulting parts are imported in field C<text> as array.
=item pattern
@@ -144,14 +147,16 @@ or as array with
=head1 METHODS
-Every L<Catmandu::Importer> is a L<Catmandu::Iterable> all its methods are
+Every L<Catmandu::Importer> is a L<Catmandu::Iterable> with all its methods
inherited.
=head1 SEE ALSO
L<Catmandu::Exporter::Text>
-L<awk|https://en.wikipedia.org/wiki/AWK> and
+L<Catmandu::Fix::parse_text>
+
+Unix tools L<awk|https://en.wikipedia.org/wiki/AWK> and
L<sed|https://en.wikipedia.org/wiki/Sed>
=cut
diff --git a/t/Catmandu-Importer-Text.t b/t/Catmandu-Importer-Text.t
index 373c4bc..563cd66 100644
--- a/t/Catmandu-Importer-Text.t
+++ b/t/Catmandu-Importer-Text.t
@@ -9,7 +9,7 @@ my $text = <<EOF;
Roses are red,
Violets are blue,
Sugar is sweet,
-And so are you.
+And so| are you.
EOF
sub text {
@@ -20,13 +20,13 @@ is_deeply text(), [
{_id => 1 , text => "Roses are red,"} ,
{_id => 2 , text => "Violets are blue,"},
{_id => 3 , text => "Sugar is sweet,"},
- {_id => 4 , text => "And so are you."},
+ {_id => 4 , text => "And so| are you."},
], 'simple text import';
is_deeply text( pattern => 'are' ), [
{_id => 1 , text => "Roses are red,"} ,
{_id => 2 , text => "Violets are blue,"},
- {_id => 3 , text => "And so are you."},
+ {_id => 3 , text => "And so| are you."},
], 'simple pattern match';
is_deeply text( pattern => '(\w+)(.).*\.$' ), [
@@ -52,12 +52,16 @@ is_deeply [ map { $_->{text} } @{ text( split => ' ' ) } ],
[ map { [ split ' ', $_ ] } split "\n", $text ],
'split by character';
+is_deeply [ map { $_->{text} } @{ text( split => '|' ) } ],
+ [ map { [ split '\\|', $_ ] } split "\n", $text ],
+ 'split by character (no regexp)';
+
is_deeply [ map { $_->{text} } @{ text( split => 'is|are' ) } ],
[ map { [ split /is|are/, $_ ] } split "\n", $text ],
'split by regexp';
-is_deeply text( split => ' is | are ', pattern => '^And so (.*)' ),
- [ { _id => 1, text => ['And so','you.'], match => ['are you.'] } ],
+is_deeply text( split => ' is | are ', pattern => '^And so. (.*)' ),
+ [ { _id => 1, text => ['And so|','you.'], match => ['are you.'] } ],
'split and pattern';
done_testing;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-perl.git
More information about the Pkg-perl-cvs-commits
mailing list