[libcatmandu-perl] 65/101: Adding seperate TSV export Tab delimited files folow a bit different rules than CSV
Jonas Smedegaard
dr at jones.dk
Tue Feb 23 13:43:56 UTC 2016
This is an automated email from the git hooks/post-receive script.
js pushed a commit to branch master
in repository libcatmandu-perl.
commit fe020d9ae79e12c199a1d8fc6e93250b936d6744
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date: Thu Feb 4 15:35:10 2016 +0100
Adding seperate TSV export
Tab delimited files folow a bit different rules than CSV
---
lib/Catmandu/Exporter/TSV.pm | 130 +++++++++++++++++++++++++++++++++++++++++++
lib/Catmandu/Importer/CSV.pm | 46 ++++++++++-----
lib/Catmandu/Importer/TSV.pm | 120 +++++++++++++++++++++++++++++++++++++++
t/Catmandu-Exporter-TSV.t | 72 ++++++++++++++++++++++++
t/Catmandu-Importer-TSV.t | 47 ++++++++++++++++
5 files changed, 401 insertions(+), 14 deletions(-)
diff --git a/lib/Catmandu/Exporter/TSV.pm b/lib/Catmandu/Exporter/TSV.pm
new file mode 100644
index 0000000..742f6f9
--- /dev/null
+++ b/lib/Catmandu/Exporter/TSV.pm
@@ -0,0 +1,130 @@
+package Catmandu::Exporter::TSV;
+
+use Catmandu::Sane;
+
+our $VERSION = '0.9505';
+
+use Catmandu::Exporter::CSV;
+use Moo;
+use namespace::clean;
+
+with 'Catmandu::TabularExporter';
+
+has csv => (is => 'lazy');
+
+sub _build_csv {
+ my ($self) = @_;
+ my $csv = Catmandu::Exporter::CSV->new(
+ header => $self->header,
+ collect_fields => $self->collect_fields,
+ sep_char => "\t",
+ quote_char => undef,
+ escape_char => undef,
+ file => $self->file,
+ );
+ $csv->{fields} = $self->fields;
+ $csv->{columns} = $self->columns;
+ $csv;
+}
+
+sub add {
+ my ($self, $data) = @_;
+ $self->csv->add($data);
+}
+
+1;
+
+__END__
+
+=pod
+
+=head1 NAME
+
+Catmandu::Exporter::TSV - a tab-delimited TSV exporter
+
+=head1 SYNOPSIS
+
+ # On the command line
+
+ $ catmandu convert JSON to TSV --fields "id,title,year" < data.json
+
+ # In a Perl script
+
+ use Catmandu::Exporter::TSV;
+
+ my $exporter = Catmandu::Exporter::TSV->new(
+ fix => 'myfix.txt',
+ quote_char => '"',
+ sep_char => ',',
+ escape_char => '"' ,
+ always_quote => 1,
+ header => 1);
+
+ $exporter->fields("f1,f2,f3");
+ $exporter->fields([qw(f1 f2 f3)]);
+
+ $exporter->add_many($arrayref);
+ $exporter->add_many($iterator);
+ $exporter->add_many(sub { });
+
+ $exporter->add($hashref);
+
+ printf "exported %d objects\n" , $exporter->count;
+
+=head1 DESCRIPTION
+
+This C<Catmandu::Exporter> exports items as rows with tab-separated values
+(TSV). A header line with field names will be included if option C<header> is
+set. See L<Catmandu::TabularExporter> on how to configure the field mapping
+and column names. Newlines and tabulator values in field values are escaped
+as C<\n>, C<\r>, and C<\t>.
+
+=head1 CONFIGURATION
+
+=over
+
+=item file
+
+Write output to a local file given by its path or file handle. Alternatively a
+scalar reference can be passed to write to a string and a code reference can be
+used to write to a callback function.
+
+=item fh
+
+Write the output to an L<IO::Handle>. If not specified,
+L<Catmandu::Util::io|Catmandu::Util/IO-functions> is used to create the output
+handle from the C<file> argument or by using STDOUT.
+
+=item fix
+
+An ARRAY of one or more fixes or file scripts to be applied to exported items.
+
+=item encoding
+
+Binmode of the output stream C<fh>. Set to "C<:utf8>" by default.
+
+=item fields
+
+See L<Catmandu::TabularExporter>.
+
+=item columns
+
+See L<Catmandu::TabularExporter>.
+
+=item header
+
+Include a header line with column names. Enabled by default.
+
+=back
+
+=head1 METHODS
+
+See L<Catmandu::TabularExporter>, L<Catmandu::Exporter>, L<Catmandu::Addable>,
+L<Catmandu::Fixable>, L<Catmandu::Counter>, and L<Catmandu::Logger> for a full
+list of methods.
+
+=head1 SEE ALSO
+
+L<Catmandu::Importer::TSV>
+
+=cut
diff --git a/lib/Catmandu/Importer/CSV.pm b/lib/Catmandu/Importer/CSV.pm
index 6142fe3..496a922 100644
--- a/lib/Catmandu/Importer/CSV.pm
+++ b/lib/Catmandu/Importer/CSV.pm
@@ -51,13 +51,16 @@ sub _build_csv {
sub generator {
my ($self) = @_;
sub {
- state $fh = $self->fh;
+ state $line = 0;
+ state $fh = $self->fh;
state $csv = do {
if ($self->header) {
if ($self->fields) {
$self->csv->getline($fh);
+ $line++;
} else {
$self->_set_fields($self->csv->getline($fh));
+ $line++;
}
}
if ($self->fields) {
@@ -69,6 +72,7 @@ sub generator {
# generate field names if needed
unless ($self->fields) {
my $row = $csv->getline($fh) // return;
+ $line++;
my $fields = [0 .. (@$row -1)];
$self->_set_fields($fields);
$csv->column_names($fields);
@@ -78,7 +82,16 @@ sub generator {
} +{}, @$fields;
}
- $csv->getline_hr($fh);
+ my $rec = $csv->getline_hr($fh);
+ $line++;
+
+ if (defined $rec || $csv->eof()) {
+ return $rec;
+ }
+ else {
+ my ($cde, $str, $pos) = $csv->error_diag ();
+ die "at line $line (byte $pos) found a Text::CSV parse error($cde) $str";
+ }
};
}
@@ -94,29 +107,34 @@ Catmandu::Importer::CSV - Package that imports CSV data
=head1 SYNOPSIS
- use Catmandu::Importer::CSV;
-
- my $importer = Catmandu::Importer::CSV->new(file => "/foo/bar.csv");
-
- my $n = $importer->each(sub {
- my $hashref = $_[0];
- # ...
- });
-
-Convert CSV to other formats with the catmandu command line client:
+ # From the command line
- # convert CSV file to JSON
+ # convert a CSV file to JSON
catmandu convert CSV to JSON < journals.csv
+
# set column names if CSV file has no header line
echo '12157,"The Journal of Headache and Pain",2193-1801' | \
catmandu convert CSV --header 0 --fields 'id,title,issn' to YAML
+
# set field separator and quote character
echo '12157;$The Journal of Headache and Pain$;2193-1801' | \
catmandu convert CSV --header 0 --fields 'id,title,issn' --sep_char ';' --quote_char '$' to XLSX --file journal.xlsx
+
+ # Or in a Perl script
+
+ use Catmandu;
+
+ my $importer = Catmandu->importer('CSV', file => "/foo/bar.csv");
+
+ my $n = $importer->each(sub {
+ my $hashref = $_[0];
+ # ...
+ });
+
=head1 DESCRIPTION
-This L<Catmandu::Importer> imports comma-separated values (CSV). The object
+The L<Catmandu::Importer> package imports comma-separated values (CSV). The object
fields are read from the CSV header line or given via the C<fields> parameter.
Strings in CSV are quoted by C<quote_char> and fields are separated by
C<sep_char>.
diff --git a/lib/Catmandu/Importer/TSV.pm b/lib/Catmandu/Importer/TSV.pm
new file mode 100644
index 0000000..97833df
--- /dev/null
+++ b/lib/Catmandu/Importer/TSV.pm
@@ -0,0 +1,120 @@
+package Catmandu::Importer::TSV;
+
+use Catmandu::Sane;
+
+our $VERSION = '0.9505';
+
+use Catmandu::Importer::CSV;
+use Moo;
+use namespace::clean;
+
+with 'Catmandu::Importer';
+
+has header => (is => 'ro', default => sub { 1 });
+has fields => (
+ is => 'rwp',
+ coerce => sub {
+ my $fields = $_[0];
+ if (ref $fields eq 'ARRAY') { return $fields }
+ if (ref $fields eq 'HASH') { return [sort keys %$fields] }
+ return [split ',', $fields];
+ },
+);
+
+has csv => (is => 'lazy');
+
+sub _build_csv {
+ my ($self) = @_;
+ my $csv = Catmandu::Importer::CSV->new(
+ header => $self->header,
+ sep_char => "\t",
+ quote_char => undef,
+ escape_char => undef,
+ file => $self->file,
+ );
+ $csv->{fields} = $self->fields;
+ $csv;
+}
+
+sub generator {
+ my ($self) = @_;
+ $self->csv->generator;
+}
+
+1;
+
+__END__
+=pod
+
+=head1 NAME
+
+Catmandu::Importer::TSV - Package that imports tab-separated values
+
+=head1 SYNOPSIS
+
+ # From the command line
+
+ # convert a TSV file to JSON
+ catmandu convert TSV to JSON < journals.tab
+
+ # Or in a Perl script
+
+ use Catmandu;
+
+ my $importer = Catmandu->importer('TSV', file => "/foo/bar.tab");
+
+ my $n = $importer->each(sub {
+ my $hashref = $_[0];
+ # ...
+ });
+
+=head1 DESCRIPTION
+
+The L<Catmandu::Importer> package imports tab-separated values (TSV). The object
+fields are read from the TSV header line or given via the C<fields> parameter.
+
+=head1 CONFIGURATION
+
+=over
+
+=item file
+
+Read input from a local file given by its path. Alternatively a scalar
+reference can be passed to read from a string.
+
+=item fh
+
+Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to
+create the input stream from the C<file> argument or by using STDIN.
+
+=item encoding
+
+Binmode of the input stream C<fh>. Set to C<:utf8> by default.
+
+=item fix
+
+An ARRAY of one or more fixes or file scripts to be applied to imported items.
+
+=item fields
+
+List of fields to be used as columns, given as array reference, comma-separated
+string, or hash reference. If C<header> is C<0> and C<fields> is C<undef> the
+fields will be named by column index ("0", "1", "2", ...).
+
+=item header
+
+Read fields from a header line with the column names, if set to C<1> (the
+default).
+
+=back
+
+=head1 METHODS
+
+Every L<Catmandu::Importer> is a L<Catmandu::Iterable> all its methods are
+inherited. The methods are not idempotent: CSV streams can only be read once.
+
+=head1 SEE ALSO
+
+L<Catmandu::Exporter::TSV>
+
+=cut
diff --git a/t/Catmandu-Exporter-TSV.t b/t/Catmandu-Exporter-TSV.t
new file mode 100644
index 0000000..3ec8cc4
--- /dev/null
+++ b/t/Catmandu-Exporter-TSV.t
@@ -0,0 +1,72 @@
+use strict;
+use warnings;
+use Test::More;
+use Test::Exception;
+
+my $pkg;
+BEGIN {
+ $pkg = 'Catmandu::Exporter::TSV';
+ use_ok $pkg;
+}
+require_ok $pkg;
+
+my $data = [{'a' => 'moose', b => '1'}, {'a' => 'pony', b => '2'}, {'a' => 'shrimp', b => '3'}];
+my $out = "";
+
+my $exporter = $pkg->new(file => \$out);
+isa_ok $exporter, $pkg;
+
+$exporter->add_many($data);
+$exporter->commit;
+
+my $tsv = <<EOF;
+a\tb
+moose\t1
+pony\t2
+shrimp\t3
+EOF
+
+is $out, $tsv, "TSV strings ok";
+is $exporter->count,3, "Count ok";
+
+$data = [{b => '1'}, {'a' => 'pony', b => '2'}, {'a' => 'shrimp', b => '3'}];
+$out = "";
+$exporter = $pkg->new(file => \$out);
+$exporter->add_many($data);
+$exporter->commit;
+$tsv = <<EOF;
+b
+1
+2
+3
+EOF
+is $out, $tsv, "first record determines fields without collect";
+
+$out = "";
+$exporter = $pkg->new(file => \$out, collect_fields => 1);
+$exporter->add_many($data);
+$exporter->commit;
+$tsv = <<EOF;
+a\tb
+\t1
+pony\t2
+shrimp\t3
+EOF
+is $out, $tsv, "collect field names";
+
+$out = "";
+$exporter = $pkg->new(fields => 'a,x', columns => 'Longname,X', file => \$out );
+$exporter->add( { a => 'Hello', b => 'World' } );
+$tsv = "Longname\tX\nHello\t\n";
+is $out, $tsv, "custom column names";
+
+$out="";
+my $fixer = Catmandu->fixer('if exists(foo) reject() end');
+my $importer = Catmandu->importer('JSON', file => 't/csv_test.json');
+
+$exporter = $pkg->new(file => \$out);
+$exporter->add_many($fixer->fix($importer));
+$tsv = "fob\ntest\n";
+is $out, $tsv, "custom column names as HASH with reject fix";
+
+done_testing;
diff --git a/t/Catmandu-Importer-TSV.t b/t/Catmandu-Importer-TSV.t
new file mode 100644
index 0000000..c45f74f
--- /dev/null
+++ b/t/Catmandu-Importer-TSV.t
@@ -0,0 +1,47 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Test::More;
+use Test::Exception;
+
+my $pkg;
+BEGIN {
+ $pkg = 'Catmandu::Importer::TSV';
+ use_ok $pkg;
+}
+require_ok $pkg;
+
+my $data = [
+ {name=>'Patrick',age=>'44'},
+ {name=>'Nicolas',age=>'39'},
+];
+
+my $tsv = <<EOF;
+name\tage
+Patrick\t44
+Nicolas\t39
+EOF
+
+my $importer = $pkg->new(file => \$tsv);
+
+isa_ok $importer, $pkg;
+
+is_deeply $importer->to_array, $data;
+
+$data = [
+ {0=>'Patrick',1=>'44'},
+ {0=>'Nicolas',1=>'39'},
+];
+
+$tsv = <<EOF;
+Patrick\t44
+Nicolas\t39
+EOF
+
+$importer = $pkg->new(file => \$tsv, header => 0);
+
+is_deeply $importer->to_array, $data;
+
+done_testing;
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-perl.git
More information about the Pkg-perl-cvs-commits
mailing list