[libcatmandu-marc-perl] 21/208: Refactoring all marc processing outside emit functions
Jonas Smedegaard
dr at jones.dk
Sat Oct 28 03:42:31 UTC 2017
This is an automated email from the git hooks/post-receive script.
js pushed a commit to annotated tag upstream/1.19
in repository libcatmandu-marc-perl.
commit 23cfcdb846c96f60baf80b53f9e00df9a4be4ba4
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date: Thu Jun 30 13:27:12 2016 +0200
Refactoring all marc processing outside emit functions
---
README.md | 12 +-
lib/Catmandu/Fix/Inline/marc_add.pm | 56 +--
lib/Catmandu/Fix/Inline/marc_map.pm | 126 +------
lib/Catmandu/Fix/Inline/marc_remove.pm | 31 +-
lib/Catmandu/Fix/Inline/marc_set.pm | 111 +-----
lib/Catmandu/Fix/marc_add.pm | 54 +--
lib/Catmandu/Fix/marc_decode_dollar_subfields.pm | 40 +--
lib/Catmandu/Fix/marc_in_json.pm | 88 +----
lib/Catmandu/Fix/marc_map.pm | 162 ++-------
lib/Catmandu/Fix/marc_remove.pm | 101 +-----
lib/Catmandu/Fix/marc_set.pm | 117 +------
lib/Catmandu/Fix/marc_xml.pm | 15 +-
lib/Catmandu/MARC.pm | 422 ++++++++++++++++++++++-
t/07-inline-fix.t | 6 +-
t/18-inlineable.t | 4 +-
15 files changed, 520 insertions(+), 825 deletions(-)
diff --git a/README.md b/README.md
index 53721b5..29d77d4 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Catmandu::MARC - Catmandu modules for working with MARC data
$ catmandu convert MARC --fix myfixes.txt < data.mrc
myfixes:
-
+
marc_map("245a", title)
marc_map("5**", note.$append)
marc_map('710','my.authors.$append')
@@ -60,13 +60,13 @@ Catmandu::MARC - Catmandu modules for working with MARC data
# DESCRIPTION
-With Catmandu, LibreCat tools abstract digital library and research services as data
-warehouse processes. As stores we reuse MongoDB or ElasticSearch providing us with
-developer friendly APIs. Catmandu works with international library standards such as
-MARC, MODS and Dublin Core, protocols such as OAI-PMH, SRU and open repositories such
+With Catmandu, LibreCat tools abstract digital library and research services as data
+warehouse processes. As stores we reuse MongoDB or ElasticSearch providing us with
+developer friendly APIs. Catmandu works with international library standards such as
+MARC, MODS and Dublin Core, protocols such as OAI-PMH, SRU and open repositories such
as DSpace and Fedora. And, of course, we speak the evolving Semantic Web.
-Follow us on [http://librecat.org](http://librecat.org) and read an introduction into Catmandu data
+Follow us on [http://librecat.org](http://librecat.org) and read an introduction into Catmandu data
processing at [https://github.com/LibreCat/Catmandu/wiki](https://github.com/LibreCat/Catmandu/wiki).
# SEE ALSO
diff --git a/lib/Catmandu/Fix/Inline/marc_add.pm b/lib/Catmandu/Fix/Inline/marc_add.pm
index 6cf048e..245a09c 100644
--- a/lib/Catmandu/Fix/Inline/marc_add.pm
+++ b/lib/Catmandu/Fix/Inline/marc_add.pm
@@ -1,8 +1,6 @@
package Catmandu::Fix::Inline::marc_add;
-use Clone qw(clone);
-use Carp;
-use Catmandu::Util qw(:is);
+use Catmandu::MARC;
require Exporter;
@ISA = qw(Exporter);
@@ -13,52 +11,12 @@ our $VERSION = '0.219';
sub marc_add {
my ($data,$marc_path, at subfields) = @_;
- my (%subfields) = @subfields;
- my $ret = defined $data ? clone($data) : { record => [] };
-
- $ret->{'record'} = [] unless $ret->{'record'};
- croak "invalid marc path" unless $marc_path =~ /^\w{3}$/;
-
- my @field = ();
- push @field , $marc_path;
- push @field , $subfields{ind1} // ' ';
- push @field , $subfields{ind2} // ' ';
- for (my $i = 0 ; $i < @subfields ; $i += 2) {
- my $code = $subfields[$i];
- next unless length $code == 1;
- my $value = $subfields[$i+1];
-
- if ($value =~ /^\$\.(\S+)/) {
- my $path = $1;
- $value = Catmandu::Util::data_at($path,$data);
- }
-
- if (is_array_ref $value) {
- for (@$value) {
- push @field , $code;
- push @field , $_;
- }
- }
- elsif (is_hash_ref $value) {
- for (keys %$value) {
- push @field , $code;
- push @field , $value->{$_};
- }
- }
- elsif (is_value($value) && length($value) > 0) {
- push @field , $code;
- push @field , $value;
- }
- }
-
- push @{ $ret->{record} } , \@field;
-
- return $ret;
+ return Catmandu::MARC::marc_add($data, $marc_path, @subfields);
}
=head1 NAME
-Catmandu::Fix::Inline::marc_add- A marc_add-er for Perl scripts
+Catmandu::Fix::Inline::marc_add- A marc_add-er for Perl scripts (DEPRECATED)
=head1 SYNOPSIS
@@ -70,11 +28,13 @@ Catmandu::Fix::Inline::marc_add- A marc_add-er for Perl scripts
# Set to a copy of a deeply nested JSON path
my $data = marc_add($data, '245', a => '$.my.deep.field');
+=head1 DEPRECATED
+
+This module is deprecated. Use the inline functionality of L<Catmandu::Fix::marc_add> instead.
+
=head1 SEE ALSO
-L<Catmandu::Fix::Inline::marc_set> ,
-L<Catmandu::Fix::Inline::marc_map> ,
-L<Catmandu::Fix::Inline::marc_remove>
+L<Catmandu::Fix::marc_add>
=cut
diff --git a/lib/Catmandu/Fix/Inline/marc_map.pm b/lib/Catmandu/Fix/Inline/marc_map.pm
index dcfbc32..1328570 100644
--- a/lib/Catmandu/Fix/Inline/marc_map.pm
+++ b/lib/Catmandu/Fix/Inline/marc_map.pm
@@ -1,6 +1,6 @@
=head1 NAME
-Catmandu::Fix::Inline::marc_map - A marc_map-er for Perl scripts
+Catmandu::Fix::Inline::marc_map - A marc_map-er for Perl scripts (DEPRECATED)
=head1 SYNOPSIS
@@ -24,7 +24,7 @@ Catmandu::Fix::Inline::marc_map - A marc_map-er for Perl scripts
# Get the 245-$c$b$a subfields orders as given in the mapping
$str = marc_map($data,'245cba', -pluck => 1);
- # Get the 008 characters 35-35
+ # Get the 008 characters 35-35
$str = marc_map($data,'008_/35-35');
# Get all 100 subfields except the digits
@@ -46,16 +46,19 @@ Catmandu::Fix::Inline::marc_map - A marc_map-er for Perl scripts
['245' , ' ', ' ' , 'a' , 'Learning Per' , 'c', '/ by Randal L. Schwartz'],
]};
+=head1 DEPRECATED
+
+This module is deprecated. Use the inline functionality of L<Catmandu::Fix::marc_map> instead.
+
=head1 SEE ALSO
-L<Catmandu::Fix::Inline::marc_set> ,
-L<Catmandu::Fix::Inline::marc_add> ,
-L<Catmandu::Fix::Inline::marc_remove>
+L<Catmandu::Fix::Inline::marc_map>
=cut
package Catmandu::Fix::Inline::marc_map;
+use Catmandu::MARC;
require Exporter;
@ISA = qw(Exporter);
@@ -67,118 +70,7 @@ our $VERSION = '0.219';
sub marc_map {
my ($data,$marc_path,%opts) = @_;
- return unless exists $data->{'record'};
-
- my $record = $data->{'record'};
-
- unless (defined $record && ref $record eq 'ARRAY') {
- return wantarray ? () : undef;
- }
-
- my $split = $opts{'-split'};
- my $join_char = $opts{'-join'} // '';
- my $pluck = $opts{'-pluck'};
- my $value_set = $opts{'-value'};
- my $attrs = {};
-
- if ($marc_path =~ /(\S{3})(\[([^,])?,?([^,])?\])?([_a-z0-9^]+)?(\/(\d+)(-(\d+))?)?/) {
- $attrs->{field} = $1;
- $attrs->{ind1} = $3;
- $attrs->{ind2} = $4;
- $attrs->{subfield_regex} = defined $5 ? "[$5]" : "[a-z0-9_]";
- $attrs->{from} = $7;
- $attrs->{to} = $9;
- } else {
- return wantarray ? () : undef;
- }
-
- $attrs->{field_regex} = $attrs->{field};
- $attrs->{field_regex} =~ s/\*/./g;
-
- my $add_subfields = sub {
- my $var = shift;
- my $start = shift;
-
- my @v = ();
-
- if ($pluck) {
- # Treat the subfield_regex as a hash index
- my $_h = {};
- for (my $i = $start; $i < @$var; $i += 2) {
- push @{ $_h->{ $var->[$i] } } , $var->[$i + 1];
- }
- for my $c (split('',$attrs->{subfield_regex})) {
- push @v , @{ $_h->{$c} } if exists $_h->{$c};
- }
- }
- else {
- for (my $i = $start; $i < @$var; $i += 2) {
- if ($var->[$i] =~ /$attrs->{subfield_regex}/) {
- push(@v, $var->[$i + 1]);
- }
- }
- }
-
- return \@v;
- };
-
- my @vals = ();
-
- for my $var (@$record) {
- next if $var->[0] !~ /$attrs->{field_regex}/;
- next if defined $attrs->{ind1} && $var->[1] ne $attrs->{ind1};
- next if defined $attrs->{ind2} && $var->[2] ne $attrs->{ind2};
-
- my $v;
-
- if ($value_set) {
- for (my $i = 3; $i < @$var; $i += 2) {
- if ($var->[$i] =~ /$attrs->{subfield_regex}/) {
- $v = $value_set;
- last;
- }
- }
- }
- else {
- if ($var->[0] =~ /LDR|00./) {
- $v = $add_subfields->($var,3);
- }
- elsif (defined $var->[3] && $var->[3] eq '_') {
- $v = $add_subfields->($var,5);
- }
- else {
- $v = $add_subfields->($var,3);
- }
-
- if (@$v) {
- if (!$split) {
- $v = join $join_char, @$v;
- }
-
- if (defined(my $off = $attrs->{from})) {
- $v = join $join_char, @$v if (ref $v eq 'ARRAY');
- my $len = defined $attrs->{to} ? $attrs->{to} - $off + 1 : 1;
- $v = substr($v,$off,$len);
- }
- }
- }
- if (ref $v eq 'ARRAY' && @$v) {
- push (@vals,@$v);
- }
- elsif (ref $v eq '' && length $v ) {
- push (@vals,$v);
- }
- }
-
- if (wantarray) {
- return @vals;
- }
- elsif (@vals > 0) {
- return join $join_char , @vals;
- }
- else {
- return undef;
- }
+ return Catmandu::MARC::marc_map($data,$marc_path,%opts);
}
1;
diff --git a/lib/Catmandu/Fix/Inline/marc_remove.pm b/lib/Catmandu/Fix/Inline/marc_remove.pm
index ab6a079..048dabc 100644
--- a/lib/Catmandu/Fix/Inline/marc_remove.pm
+++ b/lib/Catmandu/Fix/Inline/marc_remove.pm
@@ -1,7 +1,6 @@
package Catmandu::Fix::Inline::marc_remove;
-use Clone qw(clone);
-use Carp;
+use Catmandu::MARC;
require Exporter;
@ISA = qw(Exporter);
@@ -12,26 +11,12 @@ our $VERSION = '0.219';
sub marc_remove {
my ($data,$marc_path) = @_;
- my $ret = defined $data ? clone($data) : { record => [] };
-
- $ret->{'record'} = [] unless $ret->{'record'};
- croak "invalid marc path" unless $marc_path =~ /^\w{3}$/;
-
- my @fields = ();
- for my $field (@{$ret->{record}}) {
- unless ($field->[0] eq $marc_path) {
- push @fields , $field;
- }
- }
-
- $ret->{record} = \@fields;
-
- return $ret;
+ return Catmandu::MARC::marc_remove($data,$marc_path);
}
=head1 NAME
-Catmandu::Fix::Inline::marc_remove - remove marc fields
+Catmandu::Fix::Inline::marc_remove - remove marc fields (DEPRECATED)
=head1 SYNOPSIS
@@ -39,12 +24,14 @@ Catmandu::Fix::Inline::marc_remove - remove marc fields
my $data = marc_remove($data,'CAT');
+=head1 DEPRECATED
+
+This module is deprecated. Use the inline functionality of L<Catmandu::Fix::marc_remove> instead.
+
=head1 SEE ALSO
-L<Catmandu::Fix::Inline::marc_set> ,
-L<Catmandu::Fix::Inline::marc_add> ,
-L<Catmandu::Fix::Inline::marc_map>
+L<Catmandu::Fix::marc_remove>
=cut
-1;
\ No newline at end of file
+1;
diff --git a/lib/Catmandu/Fix/Inline/marc_set.pm b/lib/Catmandu/Fix/Inline/marc_set.pm
index 808c0c7..006e775 100644
--- a/lib/Catmandu/Fix/Inline/marc_set.pm
+++ b/lib/Catmandu/Fix/Inline/marc_set.pm
@@ -1,8 +1,6 @@
package Catmandu::Fix::Inline::marc_set;
-use Clone qw(clone);
-use Carp;
-use Catmandu::Util qw(:is);
+use Catmandu::MARC;
require Exporter;
@ISA = qw(Exporter);
@@ -13,105 +11,12 @@ our $VERSION = '0.219';
sub marc_set {
my ($data,$marc_path,$value) = @_;
- my $record = $data->{record};
-
- return $data unless defined $record;
-
- if ($value =~ /^\$\.(\S+)/) {
- my $path = $1;
- $value = Catmandu::Util::data_at($path,$data);
- }
-
- if (is_array_ref $value) {
- $value = $value->[-1];
- }
- elsif (is_hash_ref $value) {
- my $last;
- for (keys %$value) {
- $last = $value->{$_};
- }
- $value = $last;
- }
-
- my $field_regex;
- my ($field,$ind1,$ind2,$subfield_regex,$from,$to,$len);
-
- if ($marc_path =~ /(\S{3})(\[(.)?,?(.)?\])?([_a-z0-9])?(\/(\d+)(-(\d+))?)?/) {
- $field = $1;
- $ind1 = $3;
- $ind2 = $4;
- if (defined $5) {
- $subfield_regex = "$5";
- }
- else {
- $subfield_regex = ($field =~ /^LDR|^00/) ? "_" : "a";
- }
- $from = $7;
- $to = $9;
- $len = defined $to ? $to - $from + 1 : 1;
- }
- else {
- confess "invalid marc path";
- }
-
- $field_regex = $field;
- $field_regex =~ s/\*/./g;
-
- for (@$record) {
- if ($_->[0] !~ /$field_regex/) {
- next;
- }
-
- if (defined $ind1) {
- if (!defined $_->[1] || $_->[1] ne $ind1) {
- next;
- }
- }
- if (defined $ind2) {
- if (!defined $_->[2] || $_->[2] ne $ind2) {
- next;
- }
- }
-
- my $start;
-
- if ($_->[0] =~ /^LDR|^00/) {
- $start = 3;
- }
- elsif (defined $_->[5] && $_->[5] eq '_') {
- $start = 5;
- }
- else {
- $start = 3;
- }
-
- my $found = 0;
- for (my $i = $start; $i < @$_; $i += 2) {
-
- if ($_->[$i] eq $subfield_regex) {
- if (defined $from) {
- substr($_->[$i + 1], $from, $len) = $value;
- }
- else {
- $_->[$i + 1] = $value;
- }
-
- $found = 1;
- }
- }
-
- if ($found == 0) {
- push(@$_,$subfield_regex,$value);
- }
-
- }
-
- $data;
+ return Catmandu::MARC::marc_set($data,$marc_path,$value);
}
=head1 NAME
-Catmandu::Fix::Inline::marc_set - A marc_set-er for Perl scripts
+Catmandu::Fix::Inline::marc_set - A marc_set-er for Perl scripts (DEPRECATED)
=head1 SYNOPSIS
@@ -123,11 +28,13 @@ Catmandu::Fix::Inline::marc_set - A marc_set-er for Perl scripts
# Set to a copy of a deeply nested JSON path
my $data = marc_set($data,'245[1]a', '$.my.deep.field');
-=head1 SEE ALSO
+ =head1 DEPRECATED
-L<Catmandu::Fix::Inline::marc_add> ,
-L<Catmandu::Fix::Inline::marc_remove> ,
-L<Catmandu::Fix::Inline::marc_map>
+ This module is deprecated. Use the inline functionality of L<Catmandu::Fix::marc_set> instead.
+
+ =head1 SEE ALSO
+
+ L<Catmandu::Fix::marc_set>
=cut
diff --git a/lib/Catmandu/Fix/marc_add.pm b/lib/Catmandu/Fix/marc_add.pm
index 0b687fe..6322e5c 100644
--- a/lib/Catmandu/Fix/marc_add.pm
+++ b/lib/Catmandu/Fix/marc_add.pm
@@ -1,7 +1,7 @@
package Catmandu::Fix::marc_add;
use Catmandu::Sane;
-use Catmandu::Util qw(:is);
+use Catmandu::MARC;
use Moo;
use Catmandu::Fix::Has;
@@ -9,59 +9,15 @@ with 'Catmandu::Fix::Inlineable';
our $VERSION = '0.219';
-has marc_tag => (fix_arg => 1);
+has marc_path => (fix_arg => 1);
has subfields => (fix_arg => 'collect');
sub fix {
my ($self, $data) = @_;
- my $marc_tag = $self->marc_tag;
-
+ my $marc_path = $self->marc_path;
my @subfields = @{$self->subfields};
- my %subfields = @subfields;
- my $record_key = $subfields{'-record'} // 'record';
- my $marc = $data->{$record_key} // [];
-
- if ($marc_tag =~ /^\w{3}$/) {
- my @field = ();
- push @field , $marc_tag;
- push @field , $subfields{ind1} // ' ';
- push @field , $subfields{ind2} // ' ';
-
-
- for (my $i = 0 ; $i < @subfields ; $i += 2) {
- my $code = $subfields[$i];
- next unless length $code == 1;
- my $value = $subfields[$i+1];
-
- if ($value =~ /^\$\.(\S+)$/) {
- my $path = $1;
- $value = Catmandu::Util::data_at($path,$data);
- }
-
- if (is_array_ref $value) {
- for (@$value) {
- push @field , $code;
- push @field , $_;
- }
- }
- elsif (is_hash_ref $value) {
- for (keys %$value) {
- push @field , $code;
- push @field , $value->{$_};
- }
- }
- elsif (is_value($value) && length($value) > 0) {
- push @field , $code;
- push @field , $value;
- }
- }
-
- push @{ $marc } , \@field if @field > 3;
- }
-
- $data->{$record_key} = $marc;
-
- $data;
+
+ return Catmandu::MARC::marc_add($data,$marc_path, at subfields);
}
=head1 NAME
diff --git a/lib/Catmandu/Fix/marc_decode_dollar_subfields.pm b/lib/Catmandu/Fix/marc_decode_dollar_subfields.pm
index f03cceb..bb4441e 100644
--- a/lib/Catmandu/Fix/marc_decode_dollar_subfields.pm
+++ b/lib/Catmandu/Fix/marc_decode_dollar_subfields.pm
@@ -1,46 +1,20 @@
package Catmandu::Fix::marc_decode_dollar_subfields;
+use Catmandu::Sane;
+use Catmandu::MARC;
use Moo;
-use Data::Dumper;
+use Catmandu::Fix::Has;
with 'Catmandu::Fix::Inlineable';
our $VERSION = '0.219';
+has record => (fix_opt => 1);
+
sub fix {
my ($self,$data) = @_;
-
- my $old_record = $data->{record};
- my $new_record = [];
-
- for my $field (@$old_record) {
- my ($field,$ind1,$ind2, at subfields) = @$field;
-
- my $fixed_field = [$field,$ind1,$ind2];
-
- for (my $i = 0 ; $i < @subfields ; $i += 2) {
- my $code = $subfields[$i];
- my $value = $subfields[$i+1];
-
- # If a subfield contains fields coded like: data$xmore$yevenmore
- # chunks = (data,x,y,evenmore)
- my @chunks = split( /\$([a-z])/, $value );
-
- my $real_value = shift @chunks;
-
- push @$fixed_field , ( $code, $real_value);
-
- while (@chunks) {
- push @$fixed_field , ( splice @chunks, 0, 2 );
- }
- }
-
- push @$new_record , $fixed_field;
- }
-
- $data->{record} = $new_record;
-
- $data;
+ my $record_key = $self->record // 'record';
+ return Catmandu::MARC::marc_decode_dollar_subfields($data, record => $record_key);
}
=head1 NAME
diff --git a/lib/Catmandu/Fix/marc_in_json.pm b/lib/Catmandu/Fix/marc_in_json.pm
index 843b1ca..0b49f5f 100644
--- a/lib/Catmandu/Fix/marc_in_json.pm
+++ b/lib/Catmandu/Fix/marc_in_json.pm
@@ -1,7 +1,7 @@
package Catmandu::Fix::marc_in_json;
use Catmandu::Sane;
-use Catmandu::Util qw(:is);
+use Catmandu::MARC;
use Moo;
use Catmandu::Fix::Has;
@@ -17,90 +17,14 @@ has reverse => (fix_opt => 1);
# http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
sub fix {
my ($self, $data) = @_;
+ my $record_key = $self->record // 'record';
- $self->reverse ? $self->_json_record($data) : $self->_record_json($data);
-}
-
-sub _json_record {
- my ($self, $data) = @_;
- my $marc_pointer = $self->record // 'record';
-
- my $record = [];
-
- if (is_string($data->{leader})) {
- push @$record , [ 'LDR', undef, undef, '_', $data->{leader} ],
- }
-
- if (is_array_ref($data->{fields})) {
- for my $field (@{$data->{fields}}) {
- next unless is_hash_ref($field);
-
- my ($tag) = keys %$field;
- my $val = $field->{$tag};
-
- if ($tag eq 'FMT' || substr($tag, 0, 2) eq '00') {
- push @$record , [ $tag, undef, undef, '_', $val ],
- }
- elsif (is_hash_ref($val)) {
- my $ind1 = $val->{ind1};
- my $ind2 = $val->{ind2};
- next unless is_array_ref($val->{subfields});
-
- my $sfs = [ '_' , ''];
- for my $sf (@{ $val->{subfields} }) {
- next unless is_hash_ref($sf);
-
- my ($code) = keys %$sf;
- my $sval = $sf->{$code};
-
- push @$sfs , [ $code , $sval];
- }
-
- push @$record , [ $tag , $ind1 , $ind2 , @$sfs];
- }
- }
- }
-
- if (@$record > 0) {
- delete $data->{fields};
- delete $data->{leader};
- $data->{$marc_pointer} = $record;
+ if ($self->reverse) {
+ return Catmandu::MARC::marc_json_to_record($data, record => $record_key);
}
-
- $data;
-}
-
-sub _record_json {
- my ($self, $data) = @_;
- my $marc_pointer = $self->record // 'record';
-
- if (my $marc = delete $data->{$marc_pointer}) {
- for my $field (@$marc) {
- my ($tag, $ind1, $ind2, @subfields) = @$field;
-
- if ($tag eq 'LDR') {
- shift @subfields;
- $data->{leader} = join "", @subfields;
- }
- elsif ($tag eq 'FMT' || substr($tag, 0, 2) eq '00') {
- shift @subfields;
- push @{$data->{fields} ||= []} , { $tag => join "" , @subfields };
- }
- else {
- my @sf;
- my $start = !defined($subfields[0]) || $subfields[0] eq '_' ? 2 : 0;
- for (my $i = $start; $i < @subfields; $i += 2) {
- push @sf, { $subfields[$i] => $subfields[$i+1] };
- }
- push @{$data->{fields} ||= []} , { $tag => {
- subfields => \@sf,
- ind1 => $ind1,
- ind2 => $ind2 } };
- }
- }
+ else {
+ return Catmandu::MARC::marc_record_to_json($data, record => $record_key);
}
-
- $data;
}
=head1 NAME
diff --git a/lib/Catmandu/Fix/marc_map.pm b/lib/Catmandu/Fix/marc_map.pm
index afca073..c9736d1 100644
--- a/lib/Catmandu/Fix/marc_map.pm
+++ b/lib/Catmandu/Fix/marc_map.pm
@@ -1,7 +1,7 @@
package Catmandu::Fix::marc_map;
use Catmandu::Sane;
-use Carp qw(confess);
+use Catmandu::MARC;
use Moo;
use Catmandu::Fix::Has;
@@ -20,140 +20,38 @@ has pluck => (fix_opt => 1);
sub emit {
my ($self,$fixer) = @_;
my $path = $fixer->split_path($self->path);
- my $record_key = $fixer->emit_string($self->record // 'record');
- my $join_char = $fixer->emit_string($self->join // '');
- my $marc_path = $self->marc_path;
-
- my $field_regex;
- my ($field,$ind1,$ind2,$subfield_regex,$from,$to);
-
- if ($marc_path =~ /(\S{3})(\[([^,])?,?([^,])?\])?([_a-z0-9^]+)?(\/(\d+)(-(\d+))?)?/) {
- $field = $1;
- $ind1 = $3;
- $ind2 = $4;
- $subfield_regex = defined $5 ? "[$5]" : "[a-z0-9_]";
- $from = $7;
- $to = $9;
- }
- else {
- confess "invalid marc path";
- }
-
- $field_regex = $field;
- $field_regex =~ s/\*/./g;
-
- my $var = $fixer->var;
- my $vals = $fixer->generate_var;
- my $perl = $fixer->emit_declare_vars($vals, '[]');
-
- $perl .= $fixer->emit_foreach("${var}->{${record_key}}", sub {
- my $var = shift;
- my $v = $fixer->generate_var;
- my $perl = "";
-
- $perl .= "next if ${var}->[0] !~ /${field_regex}/;";
-
- if (defined $ind1) {
- $perl .= "next if (!defined ${var}->[1] || ${var}->[1] ne '${ind1}');";
- }
- if (defined $ind2) {
- $perl .= "next if (!defined ${var}->[2] || ${var}->[2] ne '${ind2}');";
- }
-
- if ($self->value) {
- $perl .= $fixer->emit_declare_vars($v, $fixer->emit_string($self->value));
- $perl .= $fixer->emit_create_path($fixer->var, $path, sub {
- my $var2 = shift;
- my $i = $fixer->generate_var;
- return
- "for (my ${i} = 3; ${i} < \@{${var}}; ${i} += 2) {".
- "if (${var}->[${i}] =~ /${subfield_regex}/) {".
- "${var2} = ${v}; last;".
- "}".
- "}";
- });
- } else {
- my $i = $fixer->generate_var;
- my $add_subfields = sub {
- my $start = shift;
- if ($self->pluck) {
- # Treat the subfield_regex as a hash index
- my $pluck = $fixer->generate_var;
- return
- "my ${pluck} = {};" .
- "for (my ${i} = ${start}; ${i} < \@{${var}}; ${i} += 2) {".
- "push(\@{ ${pluck}->{ ${var}->[${i}] } }, ${var}->[${i} + 1]);" .
- "}" .
- "for my ${i} (split('','${subfield_regex}')) { " .
- "push(\@{${v}}, \@{ ${pluck}->{${i}} }) if exists ${pluck}->{${i}};" .
- "}";
- }
- else {
- # Treat the subfield_regex as regex that needs to match the subfields
- return
- "for (my ${i} = ${start}; ${i} < \@{${var}}; ${i} += 2) {".
- "if (${var}->[${i}] =~ /${subfield_regex}/) {".
- "push(\@{${v}}, ${var}->[${i} + 1]);".
- "}".
- "}";
- }
- };
- $perl .= $fixer->emit_declare_vars($v, "[]");
- $perl .= "if (${var}->[0] =~ /^LDR|^00/) {";
- $perl .= $add_subfields->(3);
- # Old Catmandu::MARC contained a bug/feature to allow
- # for '_' subfields in non-control elements ..for beackwards
- # compatibility we ignore them
- $perl .= "} elsif (defined ${var}->[3] && ${var}->[3] eq '_') {";
- $perl .= $add_subfields->(5);
- $perl .= "} else {";
- $perl .= $add_subfields->(3);
- $perl .= "}";
- $perl .= "if (\@{${v}}) {";
-
- if (!$self->split) {
- $perl .= "${v} = join(${join_char}, \@{${v}});";
- }
- if (defined(my $off = $from)) {
- my $len = defined $to ? $to - $off + 1 : 1;
- $perl .= "${v} = join(${join_char}, \@{${v}}) if (is_array_ref(${v}));";
- $perl .= "if (length(${v}) > ${off}) {" .
- " ${v} = substr(${v}, ${off}, ${len});" .
- "} else {" .
- " ${v} = undef;".
- "}";
+ my $marc_path = $fixer->emit_string($self->marc_path);
+ my $record_opt = $fixer->emit_string($self->record // 'record');
+ my $join_opt = $fixer->emit_string($self->join // '');
+ my $split_opt = $fixer->emit_string($self->split // 0);
+ my $pluck_opt = $fixer->emit_string($self->pluck // 0);
+ my $value_opt = $self->value ?
+ $fixer->emit_string($self->value) : 'undef';
+ my $var = $fixer->var;
+ my $result = $fixer->generate_var;
+
+ my $perl =<<EOF;
+if (my ${result} = Catmandu::MARC::marc_map(
+ ${var},
+ ${marc_path},
+ -split => ${split_opt},
+ -join => ${join_opt},
+ -pluck => ${pluck_opt},
+ -value => ${value_opt}) ) {
+EOF
+ $perl .= $fixer->emit_create_path(
+ $var,
+ $path,
+ sub {
+ my $var2 = shift;
+ "${var2} = ${result}"
}
+ );
- $perl .= $fixer->emit_create_path($fixer->var, $path, sub {
- my $var = shift;
- my $perl = "";
- $perl .= "if (defined ${v}) {";
- if ($self->split) {
- $perl .=
- "${v} = [ ${v} ] unless ref ${v} eq 'ARRAY';" .
- "if (is_array_ref(${var})) {".
- "push \@{${var}}, \@{${v}};".
- "} else {".
- "${var} = [\@{${v}}];".
- "}";
- } else {
- $perl .=
- "if (is_string(${var})) {".
- "${var} = join(${join_char}, ${var}, ${v});".
- "} else {".
- "${var} = ${v};".
- "}";
- }
- $perl .= "}";
- $perl;
- });
-
- $perl .= "}";
- }
- $perl;
- });
-
+ $perl .=<<EOF;
+}
+EOF
$perl;
}
diff --git a/lib/Catmandu/Fix/marc_remove.pm b/lib/Catmandu/Fix/marc_remove.pm
index c055941..6679b2a 100644
--- a/lib/Catmandu/Fix/marc_remove.pm
+++ b/lib/Catmandu/Fix/marc_remove.pm
@@ -1,109 +1,22 @@
package Catmandu::Fix::marc_remove;
use Catmandu::Sane;
-use Carp qw(confess);
+use Catmandu::MARC;
use Moo;
use Catmandu::Fix::Has;
-with 'Catmandu::Fix::Base';
+with 'Catmandu::Fix::Inlineable';
our $VERSION = '0.219';
has marc_path => (fix_arg => 1);
has record => (fix_opt => 1);
-sub emit {
- my ($self,$fixer) = @_;
- my $record_key = $fixer->emit_string($self->record // 'record');
- my $marc_path = $self->marc_path;
-
- my $field_regex;
- my ($field,$ind1,$ind2,$subfield_regex,$from,$to,$len);
-
- if ($marc_path =~ /(\S{3})(\[(.)?,?(.)?\])?([_a-z0-9^]+)?(\/(\d+)(-(\d+))?)?/) {
- $field = $1;
- $ind1 = $3;
- $ind2 = $4;
- $subfield_regex = defined $5 ? "[$5]" : undef;
- $from = $7;
- $to = $9;
- }
- else {
- confess "invalid marc path";
- }
-
- $field_regex = $field;
- $field_regex =~ s/\*/./g;
-
- my $var = $fixer->var;
- my $new_record = $fixer->generate_var;
- my $perl = $fixer->emit_declare_vars($new_record,[]);
-
- $perl .= $fixer->emit_foreach("${var}->{${record_key}}", sub {
- my $var = shift;
- my $perl = "";
-
- $perl .= "if (${var}->[0] =~ /${field_regex}/) { ";
-
- if (defined $ind1) {
- $perl .= "next if (defined ${var}->[1] && ${var}->[1] eq '${ind1}');";
- }
-
- if (defined $ind2) {
- $perl .= "next if (defined ${var}->[2] && ${var}->[2] eq '${ind2}');";
- }
-
- unless (defined $ind1 || defined $ind2 || defined $subfield_regex) {
- $perl .= "next;";
- }
-
- $perl .= "}";
-
- my $i = $fixer->generate_var;
-
- my $new_subf = $fixer->generate_var;
- $perl .= $fixer->emit_declare_vars($new_subf,'[]');
-
- my $del_subfields = sub {
- my $start = shift;
- my $perl =<<EOF;
-${new_subf} = [];
-for (my ${i} = ${start}; ${i} < \@{${var}}; ${i} += 2) {
- unless (${var}->[${i}] =~ /${subfield_regex}/) {
- push \@{${new_subf}} , ${var}->[${i}];
- push \@{${new_subf}} , ${var}->[${i}+1];
- }
-}
-splice \@{${var}} , ${start} , int(\@{${var}}), \@{${new_subf}};
-EOF
- $perl;
- };
-
- if (defined $subfield_regex) {
- $perl .= "if ( ${var}->[0] =~ /${field_regex}/) {";
- $perl .= "if (${var}->[0] =~ /^LDR|^00/) {";
- $perl .= $del_subfields->(3);
-
- # Old Catmandu::MARC contained a bug/feature to allow
- # for '_' subfields in non-control elements ..for backwards
- # compatibility we ignore them
- $perl .= "} elsif (defined ${var}->[5] && ${var}->[5] eq '_') {";
- $perl .= $del_subfields->(5);
- $perl .= "} else {";
-
- $perl .= $del_subfields->(3);
- $perl .= "}";
- $perl .= "}";
- }
-
- $perl .= "push \@${new_record} , ${var} ";
-
- $perl;
- });
-
- $perl .= "${var}->{${record_key}} = ${new_record};";
-
- $perl;
+sub fix {
+ my ($self,$data) = @_;
+ my $marc_path = $self->marc_path;
+ my $record_key = $self->record // 'record';
+ return Catmandu::MARC::marc_remove($data, $marc_path, record => $record_key);
}
=head1 NAME
diff --git a/lib/Catmandu/Fix/marc_set.pm b/lib/Catmandu/Fix/marc_set.pm
index 64c4c93..ab9f367 100644
--- a/lib/Catmandu/Fix/marc_set.pm
+++ b/lib/Catmandu/Fix/marc_set.pm
@@ -1,12 +1,11 @@
package Catmandu::Fix::marc_set;
use Catmandu::Sane;
-use Catmandu::Util qw(:is);
-use Carp qw(confess);
use Moo;
+use Catmandu::MARC;
use Catmandu::Fix::Has;
-with 'Catmandu::Fix::Base';
+with 'Catmandu::Fix::Inlineable';
our $VERSION = '0.219';
@@ -14,114 +13,12 @@ has marc_path => (fix_arg => 1);
has value => (fix_arg => 1);
has record => (fix_opt => 1);
-sub emit {
- my ($self,$fixer) = @_;
- my $record_key = $fixer->emit_string($self->record // 'record');
+sub fix {
+ my ($self,$data) = @_;
my $marc_path = $self->marc_path;
-
- my $field_regex;
- my ($field,$ind1,$ind2,$subfield_regex,$from,$to,$len);
-
- if ($marc_path =~ /(\S{3})(\[(.)?,?(.)?\])?([_a-z0-9])?(\/(\d+)(-(\d+))?)?/) {
- $field = $1;
- $ind1 = $3;
- $ind2 = $4;
- if (defined $5) {
- $subfield_regex = "$5";
- }
- else {
- $subfield_regex = ($field =~ /^LDR|^00/) ? "_" : "a";
- }
- $from = $7;
- $to = $9;
- $len = defined $to ? $to - $from + 1 : 1;
- }
- else {
- confess "invalid marc path";
- }
-
- my $perl = "";
-
- # Find out if we need to insert a literal value or a value from a JSON path
- my $value;
-
- if ($self->value =~ /^\$\.(\S+)$/) {
- my $path = $fixer->split_path($1);
- my $key = pop @$path;
- $value = $fixer->generate_var;
- $perl .= $fixer->emit_declare_vars($value, '""');
- $perl .= $fixer->emit_walk_path($fixer->var, $path, sub {
- my $var = shift;
- $fixer->emit_get_key($var, $key, sub {
- my $var = shift;
- "${value} = ${var};";
- });
- });
- }
- else {
- $value = $fixer->emit_string($self->value);
- }
- ##############
-
- $field_regex = $field;
- $field_regex =~ s/\*/./g;
-
- my $var = $fixer->var;
-
- $perl .= $fixer->emit_foreach("${var}->{${record_key}}", sub {
- my $var = shift;
- my $perl = "";
-
- $perl .= "next unless is_value ${value};";
- $perl .= "next if ${var}->[0] !~ /${field_regex}/;";
-
- if (defined $ind1) {
- $perl .= "next if (!defined ${var}->[1] || ${var}->[1] ne '${ind1}');";
- }
- if (defined $ind2) {
- $perl .= "next if (!defined ${var}->[2] || ${var}->[2] ne '${ind2}');";
- }
-
- my $i = $fixer->generate_var;
- my $set_subfields = sub {
- my $start = shift;
- my $found = $fixer->generate_var;
- my $perl = "my ${found} = 0;".
- "for (my ${i} = ${start}; ${i} < \@{${var}}; ${i} += 2) {".
- "if (${var}->[${i}] eq '${subfield_regex}') {";
- if (defined $from) {
- $perl .= "substr(${var}->[${i}+1],$from,$len) = ${value};";
- }
- else {
- $perl .= "${var}->[${i}+1] = ${value};";
- }
-
- $perl .= "${found} = 1;";
- $perl .= "}".
- "}";
- $perl .= "if (${found} == 0) {".
- "push(\@${var},'${subfield_regex}',${value});".
- "}";
- $perl;
- };
-
- $perl .= "if (${var}->[0] =~ /^LDR|^00/) {";
- $perl .= $set_subfields->(3);
-
- # Old Catmandu::MARC contained a bug/feature to allow
- # for '_' subfields in non-control elements ..for backwards
- # compatibility we ignore them
- $perl .= "} elsif (defined ${var}->[5] && ${var}->[5] eq '_') {";
- $perl .= $set_subfields->(5);
- $perl .= "} else {";
-
- $perl .= $set_subfields->(3);
- $perl .= "}";
-
- $perl;
- });
-
- $perl;
+ my $value = $self->value;
+ my $record_key = $self->record;
+ return Catmandu::MARC::marc_set($data,$marc_path,$value, record => $record_key);
}
=head1 NAME
diff --git a/lib/Catmandu/Fix/marc_xml.pm b/lib/Catmandu/Fix/marc_xml.pm
index b39c433..8f60cea 100644
--- a/lib/Catmandu/Fix/marc_xml.pm
+++ b/lib/Catmandu/Fix/marc_xml.pm
@@ -2,9 +2,7 @@ package Catmandu::Fix::marc_xml;
use Catmandu::Sane;
use Moo;
-use IO::String;
-use Catmandu::Exporter::MARC::XML;
-use Catmandu::Util qw(:is :data);
+use Catmandu::MARC;
use Catmandu::Fix::Has;
with 'Catmandu::Fix::Inlineable';
@@ -16,15 +14,8 @@ has path => (fix_arg => 1);
# Transform a raw MARC array into MARCXML
sub fix {
my ($self, $data) = @_;
- my $path = $self->path;
-
- my $xml;
- my $exporter = Catmandu::Exporter::MARC::XML->new(file => \$xml , xml_declaration => 0 , collection => 0);
- $exporter->add($data);
- $exporter->commit;
-
- $data->{$path} = $xml;
-
+ my $xml = Catmandu::MARC::marc_xml($data);
+ $data->{$self->path} = $xml;
$data;
}
diff --git a/lib/Catmandu/MARC.pm b/lib/Catmandu/MARC.pm
index 721e692..130dd20 100644
--- a/lib/Catmandu/MARC.pm
+++ b/lib/Catmandu/MARC.pm
@@ -1,21 +1,132 @@
package Catmandu::MARC;
+use Catmandu::Sane;
use Catmandu::Util;
use Catmandu::Exporter::MARC::XML;
+use Carp;
our $VERSION = '0.219';
+sub marc_map {
+ my ($data,$marc_path,%opts) = @_;
+ my $record_key = $opts{record} // 'record';
+
+ return undef unless exists $data->{$record_key};
+
+ my $record = $data->{$record_key};
+
+ unless (defined $record && ref $record eq 'ARRAY') {
+ return wantarray ? () : undef;
+ }
+
+ my $split = $opts{'-split'} // 0;
+ my $join_char = $opts{'-join'} // '';
+ my $pluck = $opts{'-pluck'};
+ my $value_set = $opts{'-value'};
+ my $attrs = {};
+
+ my $add_subfields = sub {
+ my ($field,%context) = @_;
+
+ my @v = ();
+
+ if ($pluck) {
+ # Treat the subfield as a hash index
+ my $_h = {};
+ for (my $i = $context{start}; $i < $context{end}; $i += 2) {
+ push @{ $_h->{ $field->[$i] } } , $field->[$i + 1];
+ }
+ for my $c (split('',$context{subfield})) {
+ push @v , @{ $_h->{$c} } if exists $_h->{$c};
+ }
+ }
+ else {
+ for (my $i = $context{start}; $i < $context{end}; $i += 2) {
+ if ($field->[$i] =~ /$context{subfield}/) {
+ push(@v, $field->[$i + 1]);
+ }
+ }
+ }
+
+ return @v ? \@v : undef;
+ };
+
+ my $vals;
+
+ marc_at_field($record, $marc_path, sub {
+ my ($field, %context) = @_;
+ my $v;
+
+ if ($value_set) {
+ for (my $i = $context{start}; $i < $context{end}; $i += 2) {
+ if ($field->[$i] =~ /$context{subfield}/) {
+ $v = $value_set;
+ last;
+ }
+ }
+ }
+ else {
+ $v = $add_subfields->($field,%context);
+
+ if (defined $v && @$v) {
+ if (!$split) {
+ $v = join $join_char, @$v;
+ }
+
+ if (defined(my $off = $context{from})) {
+ $v = join $join_char, @$v if (ref $v eq 'ARRAY');
+ my $len = $context{len};
+ if (length(${v}) > $off) {
+ $v = substr($v, $off, $len);
+ } else {
+ $v = undef;
+ }
+ }
+ }
+ }
+
+ if (defined $v) {
+ if ($split) {
+ $v = [ $v ] unless Catmandu::Util::is_array_ref($v);
+ if (Catmandu::Util::is_array_ref($vals)) {
+ push @$vals , @$v;
+ }
+ else {
+ $vals = [ @$v ];
+ }
+ }
+ else {
+ if (Catmandu::Util::is_string($vals)) {
+ $vals = join $join_char , $vals , $v;
+ }
+ else {
+ $vals = $v;
+ }
+ }
+ }
+ }, subfield_wildcard => 1);
+
+ if (!defined $vals) {
+ return undef;
+ }
+ elsif (wantarray) {
+ return Catmandu::Util::is_array_ref($vals) ? @$vals : ($vals);
+ }
+ else {
+ return $vals;
+ }
+}
+
sub marc_add {
- my ($data,$marc_tag,$subfield_array) = @_;
+ my ($data,$marc_path, at subfields) = @_;
- my @subfields = @{$subfield_array};
my %subfields = @subfields;
my $record_key = $subfields{'-record'} // 'record';
my $marc = $data->{$record_key} // [];
- if ($marc_tag =~ /^\w{3}$/) {
+ if ($marc_path =~ /^\w{3}$/) {
my @field = ();
- push @field , $marc_tag;
+ push @field , $marc_path;
push @field , $subfields{ind1} // ' ';
push @field , $subfields{ind2} // ' ';
@@ -56,20 +167,305 @@ sub marc_add {
$data;
}
+sub marc_set {
+ my ($data,$marc_path,$value,%opts) = @_;
+ my $record_key = $opts{record} // 'record';
+ my $record = $data->{$record_key};
+
+ return $data unless defined $record;
+
+ if ($value =~ /^\$\.(\S+)/) {
+ my $path = $1;
+ $value = Catmandu::Util::data_at($path,$data);
+ }
+
+ if (Catmandu::Util::is_array_ref $value) {
+ $value = $value->[-1];
+ }
+ elsif (Catmandu::Util::is_hash_ref $value) {
+ my $last;
+ for (keys %$value) {
+ $last = $value->{$_};
+ }
+ $value = $last;
+ }
+
+ marc_at_field($record, $marc_path, sub {
+ my ($field,%context) = @_;
+
+ my $found = 0;
+ for (my $i = $context{start}; $i < $context{end}; $i += 2) {
+ if ($field->[$i] eq $context{subfield}) {
+ if (defined $context{from}) {
+ substr($field->[$i + 1], $context{from}, $context{len}) = $value;
+ }
+ else {
+ $field->[$i + 1] = $value;
+ }
+ $found = 1;
+ }
+ }
+
+ if ($found == 0) {
+ push(@$field,$context{subfield},$value);
+ }
+ }, subfield_default => 1);
+
+ $data;
+}
+
+sub marc_remove {
+ my ($data, $marc_path,%opts) = @_;
+ my $record_key = $opts{record} // 'record';
+ my $record = $data->{$record_key};
+
+ my $new_record;
+
+ marc_at_field($record, $marc_path, sub {
+ my ($field,%context) = @_;
+
+ if ($field->[0] =~ /$context{field_regex}/) {
+ if (defined $context{ind1}) {
+ return if (defined $field->[1] && $field->[1] eq $context{ind1});
+ }
+
+ if (defined $context{ind2}) {
+ return if (defined $field->[2] && $field->[2] eq $context{ind2});
+ }
+
+ unless (defined $context{ind1} || defined $context{ind2} || defined $context{subfield}) {
+ return;
+ }
+ }
+
+ if (defined $context{subfield}) {
+ if ( $field->[0] =~ /$context{field_regex}/) {
+ my $new_subf = [];
+ for (my $i = $context{start}; $i < $context{end}; $i += 2) {
+ unless ($field->[$i] =~ /$context{subfield}/) {
+ push @$new_subf , $field->[$i];
+ push @$new_subf , $field->[$i+1];
+ }
+ }
+ splice @$field , $context{start} , int(@$field), @$new_subf;
+ }
+ }
+
+ push @$new_record , $field;
+
+ }, nofilter => 1);
+
+ $data->{$record_key} = $new_record;
+
+ return $data;
+}
+
sub marc_xml {
- my ($data,$path) = @_;
- $path //= 'record';
+ my ($data) = @_;
my $xml;
my $exporter = Catmandu::Exporter::MARC::XML->new(file => \$xml , xml_declaration => 0 , collection => 0);
$exporter->add($data);
$exporter->commit;
- $data->{$path} = $xml;
+ $xml;
+}
+
+sub marc_record_to_json {
+ my ($data,%opts) = @_;
+ my $record_key = $opts{record} // 'record';
+
+ if (my $marc = delete $data->{$record_key}) {
+ for my $field (@$marc) {
+ my ($tag, $ind1, $ind2, @subfields) = @$field;
+
+ if ($tag eq 'LDR') {
+ shift @subfields;
+ $data->{leader} = join "", @subfields;
+ }
+ elsif ($tag eq 'FMT' || substr($tag, 0, 2) eq '00') {
+ shift @subfields;
+ push @{$data->{fields} ||= []} , { $tag => join "" , @subfields };
+ }
+ else {
+ my @sf;
+ my $start = !defined($subfields[0]) || $subfields[0] eq '_' ? 2 : 0;
+ for (my $i = $start; $i < @subfields; $i += 2) {
+ push @sf, { $subfields[$i] => $subfields[$i+1] };
+ }
+ push @{$data->{fields} ||= []} , { $tag => {
+ subfields => \@sf,
+ ind1 => $ind1,
+ ind2 => $ind2 } };
+ }
+ }
+ }
$data;
}
+sub marc_json_to_record {
+ my ($data,%opts) = @_;
+ my $record_key = $opts{record} // 'record';
+
+ my $record = [];
+
+ if (Catmandu::Util::is_string($data->{leader})) {
+ push @$record , [ 'LDR', undef, undef, '_', $data->{leader} ],
+ }
+
+ if (Catmandu::Util::is_array_ref($data->{fields})) {
+ for my $field (@{$data->{fields}}) {
+ next unless Catmandu::Util::is_hash_ref($field);
+
+ my ($tag) = keys %$field;
+ my $val = $field->{$tag};
+
+ if ($tag eq 'FMT' || substr($tag, 0, 2) eq '00') {
+ push @$record , [ $tag, undef, undef, '_', $val ],
+ }
+ elsif (Catmandu::Util::is_hash_ref($val)) {
+ my $ind1 = $val->{ind1};
+ my $ind2 = $val->{ind2};
+ next unless Catmandu::Util::is_array_ref($val->{subfields});
+
+ my $sfs = [ '_' , ''];
+ for my $sf (@{ $val->{subfields} }) {
+ next unless Catmandu::Util::is_hash_ref($sf);
+
+ my ($code) = keys %$sf;
+ my $sval = $sf->{$code};
+
+ push @$sfs , [ $code , $sval];
+ }
+
+ push @$record , [ $tag , $ind1 , $ind2 , @$sfs];
+ }
+ }
+ }
+
+ if (@$record > 0) {
+ delete $data->{fields};
+ delete $data->{leader};
+ $data->{$record_key} = $record;
+ }
+
+ $data;
+}
+
+sub marc_decode_dollar_subfields {
+ my ($data,%opts) = @_;
+ my $record_key = $opts{record} // 'record';
+ my $old_record = $data->{$record_key};
+ my $new_record = [];
+
+ for my $field (@$old_record) {
+ my ($field,$ind1,$ind2, at subfields) = @$field;
+
+ my $fixed_field = [$field,$ind1,$ind2];
+
+ for (my $i = 0 ; $i < @subfields ; $i += 2) {
+ my $code = $subfields[$i];
+ my $value = $subfields[$i+1];
+
+ # If a subfield contains fields coded like: data$xmore$yevenmore
+ # chunks = (data,x,y,evenmore)
+ my @chunks = split( /\$([a-z])/, $value );
+
+ my $real_value = shift @chunks;
+
+ push @$fixed_field , ( $code, $real_value);
+
+ while (@chunks) {
+ push @$fixed_field , ( splice @chunks, 0, 2 );
+ }
+ }
+
+ push @$new_record , $fixed_field;
+ }
+
+ $data->{$record_key} = $new_record;
+
+ $data;
+}
+
+sub marc_at_field {
+ my ($record,$marc_path,$callback,%opts) = @_;
+
+ croak "need a marc_path and callback" unless defined($marc_path) && defined($callback);
+
+ my $field_regex;
+ my ($field,$ind1,$ind2,$subfield_regex,$from,$to,$len);
+
+ if ($marc_path =~ /(\S{3})(\[([^,])?,?([^,])?\])?([_a-z0-9^]+)?(\/(\d+)(-(\d+))?)?/) {
+ $field = $1;
+ $ind1 = $3;
+ $ind2 = $4;
+ if (defined $5) {
+ $subfield_regex = "$5";
+ }
+ elsif ($opts{subfield_default}) {
+ $subfield_regex = $field =~ /^0|LDR/ ? '_' : 'a';
+ }
+ elsif ($opts{subfield_wildcard}) {
+ $subfield_regex = '[a-z0-9_]';
+ }
+ $from = $7;
+ $to = $9;
+ $len = defined $to ? $to - $from + 1 : 1;
+ }
+ else {
+ confess "invalid marc path";
+ }
+
+ $field_regex = $field;
+ $field_regex =~ s/\*/./g;
+
+ for (@$record) {
+ unless ($opts{nofilter}) {
+ if ($_->[0] !~ /$field_regex/) {
+ next;
+ }
+
+ if (defined $ind1) {
+ if (!defined $_->[1] || $_->[1] ne $ind1) {
+ next;
+ }
+ }
+ if (defined $ind2) {
+ if (!defined $_->[2] || $_->[2] ne $ind2) {
+ next;
+ }
+ }
+ }
+
+ my $start;
+
+ if ($_->[0] =~ /^LDR|^00/) {
+ $start = 3;
+ }
+ elsif (defined $_->[5] && $_->[5] eq '_') {
+ $start = 5;
+ }
+ else {
+ $start = 3;
+ }
+
+ $callback->($_,
+ field => $field ,
+ field_regex => $field_regex ,
+ subfield => $subfield_regex ,
+ start => $start ,
+ end => int(@$_) ,
+ ind1 => $ind1 ,
+ ind2 => $ind2 ,
+ from => $from ,
+ to => $to ,
+ len => $len
+ );
+ }
+}
+
1;
__END__
@@ -101,7 +497,7 @@ Catmandu::MARC - Catmandu modules for working with MARC data
$ catmandu convert MARC --fix myfixes.txt < data.mrc
myfixes:
-
+
marc_map("245a", title)
marc_map("5**", note.$append)
marc_map('710','my.authors.$append')
@@ -156,13 +552,13 @@ Catmandu::MARC - Catmandu modules for working with MARC data
=head1 DESCRIPTION
-With Catmandu, LibreCat tools abstract digital library and research services as data
-warehouse processes. As stores we reuse MongoDB or ElasticSearch providing us with
-developer friendly APIs. Catmandu works with international library standards such as
-MARC, MODS and Dublin Core, protocols such as OAI-PMH, SRU and open repositories such
+With Catmandu, LibreCat tools abstract digital library and research services as data
+warehouse processes. As stores we reuse MongoDB or ElasticSearch providing us with
+developer friendly APIs. Catmandu works with international library standards such as
+MARC, MODS and Dublin Core, protocols such as OAI-PMH, SRU and open repositories such
as DSpace and Fedora. And, of course, we speak the evolving Semantic Web.
-Follow us on L<http://librecat.org> and read an introduction into Catmandu data
+Follow us on L<http://librecat.org> and read an introduction into Catmandu data
processing at L<https://github.com/LibreCat/Catmandu/wiki>.
=head1 SEE ALSO
diff --git a/t/07-inline-fix.t b/t/07-inline-fix.t
index 5f7ee0e..bb54e9b 100644
--- a/t/07-inline-fix.t
+++ b/t/07-inline-fix.t
@@ -1,6 +1,6 @@
use strict;
use warnings;
-
+use Catmandu::Util;
use Test::More tests => 18;
use Catmandu::Fix::Inline::marc_map qw(marc_map);
@@ -31,7 +31,7 @@ ok(@$records == 2 , "Found 2 records");
{
my @res = marc_map($records->[0],'630');
- ok(@res == 2 , q|marc_map(630)|);
+ ok(@res == 1 , q|marc_map(630)|);
}
{
@@ -92,4 +92,4 @@ ok(@$records == 2 , "Found 2 records");
{
my @arr = marc_map($records->[1],'020a',-split=>1);
ok @arr == 3;
-}
\ No newline at end of file
+}
diff --git a/t/18-inlineable.t b/t/18-inlineable.t
index ffef943..aeca035 100644
--- a/t/18-inlineable.t
+++ b/t/18-inlineable.t
@@ -1,6 +1,6 @@
use strict;
use warnings;
-
+use Catmandu::Util;
use Test::More tests => 16;
use Catmandu::Fix::marc_map as => 'marc_map';
@@ -33,7 +33,7 @@ ok(@$records == 2 , "Found 2 records");
{
my $res = marc_map($records->[0],'630','test.$append')->{test};
- ok(@$res == 2 , q|marc_map(630)|);
+ ok(Catmandu::Util::is_array_ref($res), q|marc_map(630)|);
}
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-marc-perl.git
More information about the Pkg-perl-cvs-commits
mailing list