[libcatmandu-perl] 65/101: Adding seperate TSV export Tab delimited files folow a bit different rules than CSV

Jonas Smedegaard dr at jones.dk
Tue Feb 23 13:43:56 UTC 2016


This is an automated email from the git hooks/post-receive script.

js pushed a commit to branch master
in repository libcatmandu-perl.

commit fe020d9ae79e12c199a1d8fc6e93250b936d6744
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date:   Thu Feb 4 15:35:10 2016 +0100

    Adding seperate TSV export
    Tab delimited files folow a bit different rules than CSV
---
 lib/Catmandu/Exporter/TSV.pm | 130 +++++++++++++++++++++++++++++++++++++++++++
 lib/Catmandu/Importer/CSV.pm |  46 ++++++++++-----
 lib/Catmandu/Importer/TSV.pm | 120 +++++++++++++++++++++++++++++++++++++++
 t/Catmandu-Exporter-TSV.t    |  72 ++++++++++++++++++++++++
 t/Catmandu-Importer-TSV.t    |  47 ++++++++++++++++
 5 files changed, 401 insertions(+), 14 deletions(-)

diff --git a/lib/Catmandu/Exporter/TSV.pm b/lib/Catmandu/Exporter/TSV.pm
new file mode 100644
index 0000000..742f6f9
--- /dev/null
+++ b/lib/Catmandu/Exporter/TSV.pm
@@ -0,0 +1,130 @@
+package Catmandu::Exporter::TSV;
+
+use Catmandu::Sane;
+
+our $VERSION = '0.9505';
+
+use Catmandu::Exporter::CSV;
+use Moo;
+use namespace::clean;
+
+with 'Catmandu::TabularExporter';
+
+has csv          => (is => 'lazy');
+
+sub _build_csv {
+    my ($self) = @_;
+    my $csv = Catmandu::Exporter::CSV->new(
+        header      => $self->header,
+        collect_fields => $self->collect_fields,
+        sep_char    => "\t",
+        quote_char  => undef,
+        escape_char => undef,
+        file => $self->file,
+    );
+    $csv->{fields}  = $self->fields;
+    $csv->{columns} = $self->columns;
+    $csv;
+}
+
+sub add {
+    my ($self, $data) = @_;
+    $self->csv->add($data);
+}
+
+1;
+
+__END__
+
+=pod
+
+=head1 NAME
+
+Catmandu::Exporter::TSV - a tab-delimited TSV exporter
+
+=head1 SYNOPSIS
+
+    # On the command line
+
+    $ catmandu convert JSON to TSV --fields "id,title,year" < data.json
+
+    # In a Perl script
+
+    use Catmandu::Exporter::TSV;
+
+    my $exporter = Catmandu::Exporter::TSV->new(
+                fix => 'myfix.txt',
+                quote_char => '"',
+                sep_char => ',',
+                escape_char => '"' ,
+                always_quote => 1,
+                header => 1);
+
+    $exporter->fields("f1,f2,f3");
+    $exporter->fields([qw(f1 f2 f3)]);
+
+    $exporter->add_many($arrayref);
+    $exporter->add_many($iterator);
+    $exporter->add_many(sub { });
+
+    $exporter->add($hashref);
+
+    printf "exported %d objects\n" , $exporter->count;
+
+=head1 DESCRIPTION
+
+This C<Catmandu::Exporter> exports items as rows with tab-separated values
+(TSV). A header line with field names will be included if option C<header> is 
+set. See L<Catmandu::TabularExporter> on how to configure the field mapping 
+and column names. Newlines and tabulator values in field values are escaped 
+as C<\n>, C<\r>, and C<\t>.
+
+=head1 CONFIGURATION
+
+=over
+
+=item file
+
+Write output to a local file given by its path or file handle.  Alternatively a
+scalar reference can be passed to write to a string and a code reference can be
+used to write to a callback function.
+
+=item fh
+
+Write the output to an L<IO::Handle>. If not specified,
+L<Catmandu::Util::io|Catmandu::Util/IO-functions> is used to create the output
+handle from the C<file> argument or by using STDOUT.
+
+=item fix
+
+An ARRAY of one or more fixes or file scripts to be applied to exported items.
+
+=item encoding
+
+Binmode of the output stream C<fh>. Set to "C<:utf8>" by default.
+
+=item fields
+
+See L<Catmandu::TabularExporter>.
+
+=item columns
+
+See L<Catmandu::TabularExporter>.
+
+=item header
+
+Include a header line with column names. Enabled by default.
+
+=back
+
+=head1 METHODS
+
+See L<Catmandu::TabularExporter>, L<Catmandu::Exporter>, L<Catmandu::Addable>,
+L<Catmandu::Fixable>, L<Catmandu::Counter>, and L<Catmandu::Logger> for a full
+list of methods.
+
+=head1 SEE ALSO
+
+L<Catmandu::Importer::TSV>
+
+=cut
diff --git a/lib/Catmandu/Importer/CSV.pm b/lib/Catmandu/Importer/CSV.pm
index 6142fe3..496a922 100644
--- a/lib/Catmandu/Importer/CSV.pm
+++ b/lib/Catmandu/Importer/CSV.pm
@@ -51,13 +51,16 @@ sub _build_csv {
 sub generator {
     my ($self) = @_;
     sub {
-        state $fh = $self->fh;
+        state $line = 0;
+        state $fh  = $self->fh;
         state $csv = do {
             if ($self->header) {
                 if ($self->fields) {
                     $self->csv->getline($fh);
+                    $line++;
                 } else {
                     $self->_set_fields($self->csv->getline($fh));
+                    $line++;
                 }
             }
             if ($self->fields) {
@@ -69,6 +72,7 @@ sub generator {
         # generate field names if needed
         unless ($self->fields) {
             my $row = $csv->getline($fh) // return;
+            $line++;
             my $fields = [0 .. (@$row -1)];
             $self->_set_fields($fields);
             $csv->column_names($fields);
@@ -78,7 +82,16 @@ sub generator {
             } +{}, @$fields;
         }
 
-        $csv->getline_hr($fh);
+        my $rec = $csv->getline_hr($fh);
+        $line++;
+
+        if (defined $rec || $csv->eof()) {
+            return $rec;
+        }
+        else {
+            my ($cde, $str, $pos) = $csv->error_diag ();
+            die "at line $line (byte $pos) found a Text::CSV parse error($cde) $str";
+        }
     };
 }
 
@@ -94,29 +107,34 @@ Catmandu::Importer::CSV - Package that imports CSV data
 
 =head1 SYNOPSIS
 
-    use Catmandu::Importer::CSV;
-
-    my $importer = Catmandu::Importer::CSV->new(file => "/foo/bar.csv");
-
-    my $n = $importer->each(sub {
-        my $hashref = $_[0];
-        # ...
-    });
-
-Convert CSV to other formats with the catmandu command line client:
+    # From the command line
 
-    # convert CSV file to JSON
+    # convert a CSV file to JSON
     catmandu convert CSV to JSON < journals.csv
+
     # set column names if CSV file has no header line
     echo '12157,"The Journal of Headache and Pain",2193-1801' | \
       catmandu convert CSV --header 0 --fields 'id,title,issn' to YAML
+    
     # set field separator and quote character 
     echo '12157;$The Journal of Headache and Pain$;2193-1801' | \
       catmandu convert CSV --header 0 --fields 'id,title,issn' --sep_char ';' --quote_char '$' to XLSX --file journal.xlsx
 
+
+    # Or in a Perl script
+
+    use Catmandu;
+
+    my $importer = Catmandu->importer('CSV', file => "/foo/bar.csv");
+
+    my $n = $importer->each(sub {
+        my $hashref = $_[0];
+        # ...
+    });
+
 =head1 DESCRIPTION
 
-This L<Catmandu::Importer> imports comma-separated values (CSV).  The object
+The L<Catmandu::Importer> package imports comma-separated values (CSV).  The object
 fields are read from the CSV header line or given via the C<fields> parameter.
 Strings in CSV are quoted by C<quote_char> and fields are separated by
 C<sep_char>.
diff --git a/lib/Catmandu/Importer/TSV.pm b/lib/Catmandu/Importer/TSV.pm
new file mode 100644
index 0000000..97833df
--- /dev/null
+++ b/lib/Catmandu/Importer/TSV.pm
@@ -0,0 +1,120 @@
+package Catmandu::Importer::TSV;
+
+use Catmandu::Sane;
+
+our $VERSION = '0.9505';
+
+use Catmandu::Importer::CSV;
+use Moo;
+use namespace::clean;
+
+with 'Catmandu::Importer';
+
+has header => (is => 'ro', default => sub { 1 });
+has fields => (
+    is     => 'rwp',
+    coerce => sub {
+        my $fields = $_[0];
+        if (ref $fields eq 'ARRAY') { return $fields }
+        if (ref $fields eq 'HASH')  { return [sort keys %$fields] }
+        return [split ',', $fields];
+    },
+);
+
+has csv => (is => 'lazy');
+
+sub _build_csv {
+	my ($self) = @_;
+	my $csv = Catmandu::Importer::CSV->new(
+		header => $self->header,
+		sep_char => "\t", 
+		quote_char => undef, 
+		escape_char => undef,
+		file => $self->file,
+	);
+	$csv->{fields} = $self->fields;
+	$csv;
+}
+
+sub generator {
+	my ($self) = @_;
+	$self->csv->generator;
+}
+
+1;
+
+__END__
+=pod
+
+=head1 NAME
+
+Catmandu::Importer::TSV - Package that imports tab-separated values
+
+=head1 SYNOPSIS
+
+    # From the command line
+
+    # convert a TSV file to JSON
+    catmandu convert TSV to JSON < journals.tab
+
+    # Or in a Perl script
+
+    use Catmandu;
+
+    my $importer = Catmandu->importer('TSV', file => "/foo/bar.tab");
+
+    my $n = $importer->each(sub {
+        my $hashref = $_[0];
+        # ...
+    });
+
+=head1 DESCRIPTION
+
+The L<Catmandu::Importer> package imports tab-separated values (TSV).  The object
+fields are read from the TSV header line or given via the C<fields> parameter.
+
+=head1 CONFIGURATION
+
+=over
+
+=item file
+
+Read input from a local file given by its path. Alternatively a scalar
+reference can be passed to read from a string.
+
+=item fh
+
+Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to
+create the input stream from the C<file> argument or by using STDIN.
+
+=item encoding
+
+Binmode of the input stream C<fh>. Set to C<:utf8> by default.
+
+=item fix
+
+An ARRAY of one or more fixes or file scripts to be applied to imported items.
+
+=item fields
+
+List of fields to be used as columns, given as array reference, comma-separated
+string, or hash reference. If C<header> is C<0> and C<fields> is C<undef> the
+fields will be named by column index ("0", "1", "2", ...).
+
+=item header
+
+Read fields from a header line with the column names, if set to C<1> (the
+default).
+
+=back
+
+=head1 METHODS
+
+Every L<Catmandu::Importer> is a L<Catmandu::Iterable> all its methods are
+inherited.  The methods are not idempotent: CSV streams can only be read once.
+
+=head1 SEE ALSO
+
+L<Catmandu::Exporter::TSV>
+
+=cut
diff --git a/t/Catmandu-Exporter-TSV.t b/t/Catmandu-Exporter-TSV.t
new file mode 100644
index 0000000..3ec8cc4
--- /dev/null
+++ b/t/Catmandu-Exporter-TSV.t
@@ -0,0 +1,72 @@
+use strict;
+use warnings;
+use Test::More;
+use Test::Exception;
+
+my $pkg;
+BEGIN {
+    $pkg = 'Catmandu::Exporter::TSV';
+    use_ok $pkg;
+}
+require_ok $pkg;
+
+my $data = [{'a' => 'moose', b => '1'}, {'a' => 'pony', b => '2'}, {'a' => 'shrimp', b => '3'}];
+my $out = "";
+
+my $exporter = $pkg->new(file => \$out);
+isa_ok $exporter, $pkg;
+
+$exporter->add_many($data);
+$exporter->commit;
+
+my $tsv = <<EOF;
+a\tb
+moose\t1
+pony\t2
+shrimp\t3
+EOF
+
+is $out, $tsv, "TSV strings ok";
+is $exporter->count,3, "Count ok";
+
+$data = [{b => '1'}, {'a' => 'pony', b => '2'}, {'a' => 'shrimp', b => '3'}];
+$out = "";
+$exporter = $pkg->new(file => \$out);
+$exporter->add_many($data);
+$exporter->commit;
+$tsv = <<EOF;
+b
+1
+2
+3
+EOF
+is $out, $tsv, "first record determines fields without collect";
+
+$out = "";
+$exporter = $pkg->new(file => \$out, collect_fields => 1);
+$exporter->add_many($data);
+$exporter->commit;
+$tsv = <<EOF;
+a\tb
+\t1
+pony\t2
+shrimp\t3
+EOF
+is $out, $tsv, "collect field names";
+
+$out = "";
+$exporter = $pkg->new(fields => 'a,x', columns => 'Longname,X', file => \$out );
+$exporter->add( { a => 'Hello', b => 'World' } );
+$tsv = "Longname\tX\nHello\t\n";
+is $out, $tsv, "custom column names";
+
+$out="";
+my $fixer    = Catmandu->fixer('if exists(foo) reject() end');
+my $importer = Catmandu->importer('JSON', file => 't/csv_test.json');
+
+$exporter = $pkg->new(file => \$out);
+$exporter->add_many($fixer->fix($importer));
+$tsv = "fob\ntest\n";
+is $out, $tsv, "custom column names as HASH with reject fix";
+
+done_testing;
diff --git a/t/Catmandu-Importer-TSV.t b/t/Catmandu-Importer-TSV.t
new file mode 100644
index 0000000..c45f74f
--- /dev/null
+++ b/t/Catmandu-Importer-TSV.t
@@ -0,0 +1,47 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Test::More;
+use Test::Exception;
+
+my $pkg;
+BEGIN {
+    $pkg = 'Catmandu::Importer::TSV';
+    use_ok $pkg;
+}
+require_ok $pkg;
+
+my $data = [
+   {name=>'Patrick',age=>'44'},
+   {name=>'Nicolas',age=>'39'},
+];
+
+my $tsv = <<EOF;
+name\tage
+Patrick\t44
+Nicolas\t39
+EOF
+
+my $importer = $pkg->new(file => \$tsv);
+
+isa_ok $importer, $pkg;
+
+is_deeply $importer->to_array, $data;
+
+$data = [
+   {0=>'Patrick',1=>'44'},
+   {0=>'Nicolas',1=>'39'},
+];
+
+$tsv = <<EOF;
+Patrick\t44
+Nicolas\t39
+EOF
+
+$importer = $pkg->new(file => \$tsv, header => 0);
+
+is_deeply $importer->to_array, $data;
+
+done_testing;
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-perl.git



More information about the Pkg-perl-cvs-commits mailing list