[libmarc-parser-raw-perl] 05/29: major refactoring
Jonas Smedegaard
dr at jones.dk
Tue Aug 4 11:57:19 UTC 2015
This is an automated email from the git hooks/post-receive script.
js pushed a commit to branch master
in repository libmarc-parser-raw-perl.
commit 799d2bcadc4ec8f1e4cebc8d29987d08a3e375ad
Author: Johann Rolschewski <rolschewski at gmail.com>
Date: Mon May 11 18:18:11 2015 +0200
major refactoring
---
LICENSE | 6 ++---
README.md | 13 ++++++++++
lib/MARC/Parser/RAW.pm | 69 ++++++++++++++++++++++++++++++++------------------
t/01-parser.t | 48 ++++++++++++++++++++++-------------
t/camel.mrc | 2 +-
5 files changed, 92 insertions(+), 46 deletions(-)
diff --git a/LICENSE b/LICENSE
index 48678db..932b9cf 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-This software is copyright (c) 2015 by Johann Rolschewski.
+This software is copyright (c) 2014- by Johann Rolschewski <jorol at cpan.org>.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
@@ -12,7 +12,7 @@ b) the "Artistic License"
--- The GNU General Public License, Version 1, February 1989 ---
-This software is Copyright (c) 2015 by Johann Rolschewski.
+This software is Copyright (c) 2014- by Johann Rolschewski <jorol at cpan.org>.
This is free software, licensed under:
@@ -272,7 +272,7 @@ That's all there is to it!
--- The Artistic License 1.0 ---
-This software is Copyright (c) 2015 by Johann Rolschewski.
+This software is Copyright (c) 2014- by Johann Rolschewski <jorol at cpan.org>.
This is free software, licensed under:
diff --git a/README.md b/README.md
index a9e67ee..d275960 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,19 @@ Deserialize a raw MARC record to an ARRAY of ARRAYs.
Split MARC field string in individual components.
+# AUTHOR
+
+Johann Rolschewski <jorol at cpan.org>
+
+# COPYRIGHT
+
+Copyright 2014- Johann Rolschewski
+
+# LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
# SEEALSO
[Catmandu](https://metacpan.org/pod/Catmandu), [Catmandu::Importer::MARC](https://metacpan.org/pod/Catmandu::Importer::MARC).
diff --git a/lib/MARC/Parser/RAW.pm b/lib/MARC/Parser/RAW.pm
index f20e6dc..718489f 100644
--- a/lib/MARC/Parser/RAW.pm
+++ b/lib/MARC/Parser/RAW.pm
@@ -7,6 +7,7 @@ use warnings;
use charnames qw< :full >;
use Carp qw(croak carp);
use Encode qw(find_encoding);
+use English;
use Readonly;
Readonly my $LEADER_LEN => 24;
@@ -93,8 +94,7 @@ sub new {
$file or croak "first argument must be a file or filehandle";
if ($encoding) {
- find_encoding($encoding)
- or croak "encoding \"$_[0]\" is not a valid encoding";
+ find_encoding($encoding) or croak "encoding \"$_[0]\" not found";
}
my $self = {
@@ -130,25 +130,24 @@ Reads the next record from MARC input stream. Returns a Perl hash.
sub next {
my $self = shift;
my $fh = $self->{fh};
- local $/ = $END_OF_RECORD;
- if ( my $record = <$fh> ) {
+ local $INPUT_RECORD_SEPARATOR = $END_OF_RECORD;
+ if ( defined (my $raw = <$fh>) ) {
$self->{rec_number}++;
# remove illegal garbage that sometimes occurs between records
- $record
+ $raw
=~ s/^[\N{SPACE}\N{NUL}\N{LINE FEED}\N{CARRIAGE RETURN}\N{SUB}]+//;
- return unless $record;
+ return unless $raw;
- my $record = _decode($record);
- if ( scalar @{$record} > 1 ) {
- return $record;
+
+ if ( my $marc = $self->_decode($raw) ) {
+ return $marc;
+ }
+ else {
+ return $self->next();
}
- carp $record->[0] . $self->{rec_number};
- $self->next();
- }
- else {
- return;
}
+ return;
}
=head2 _decode($record)
@@ -158,34 +157,41 @@ Deserialize a raw MARC record to an ARRAY of ARRAYs.
=cut
sub _decode {
- my $raw = shift;
+ my ( $self, $raw ) = @_;
chop $raw;
my ( $head, @fields ) = split $END_OF_FIELD, $raw;
if ( !@fields ) {
- return ["no fields found in record "];
+ carp "no fields found in record " . $self->{rec_number};
+ return;
}
# ToDO: better RegEX for leader
- if ( $head !~ /(.{$LEADER_LEN})/cg ) {
- return ["no record leader found in record "];
+ my $leader;
+ if ( $head =~ /(.{$LEADER_LEN})/cg ) {
+ $leader = $1;
+ }
+ else {
+ carp "no valid record leader found in record " . $self->{rec_number};
+ return;
}
- my $leader = $1;
- my @tags = $head =~ /\G(\d{3})\d{9}/cg;
+ my @tags = $head =~ /\G(\d{3})\d{9}/cg;
if ( scalar @tags != scalar @fields ) {
- return ["different number of tags and fields in record "];
+ carp "different number of tags and fields in record "
+ . $self->{rec_number};
+ return;
}
if ( $head !~ /\G$/cg ) {
- my $tail = $1 if $head =~ /(.*)/cg;
- return ["incomplete directory entry in record "];
+ carp "incomplete directory entry in record " . $self->{rec_number};
+ return;
}
return [
[ 'LDR', undef, undef, '_', $leader ],
- map [ shift(@tags), _field($_) ],
+ map [ shift(@tags), $self->_field($_) ],
@fields
];
}
@@ -197,7 +203,7 @@ Split MARC field string in individual components.
=cut
sub _field {
- my ($field) = @_;
+ my ( $self, $field ) = @_;
my @chunks = split( /$SUBFIELD_INDICATOR(.)/, $field );
return ( undef, undef, '_', @chunks ) if @chunks == 1;
my @subfields;
@@ -208,6 +214,19 @@ sub _field {
return ( $indicator1, $indicator2, @subfields );
}
+=head1 AUTHOR
+
+Johann Rolschewski E<lt>jorol at cpan.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright 2014- Johann Rolschewski
+
+=head1 LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
=head1 SEEALSO
L<Catmandu>, L<Catmandu::Importer::MARC>.
diff --git a/t/01-parser.t b/t/01-parser.t
index 422f1b8..e047a34 100644
--- a/t/01-parser.t
+++ b/t/01-parser.t
@@ -4,13 +4,24 @@ use Test::More;
use MARC::Parser::RAW;
-my $failure = eval {MARC::Parser::RAW->new()};
-is( $failure, undef, 'croak missing argument');
+new_ok( 'MARC::Parser::RAW' => ['./t/camel.mrc'] );
+new_ok( 'MARC::Parser::RAW' => ['./t/camel.mrc', 'UTF-8'] );
+can_ok( 'MARC::Parser::RAW', qw{ next });
+my $failure = eval { MARC::Parser::RAW->new() };
+is( $failure, undef, 'croak missing argument' );
+$failure = eval { MARC::Parser::RAW->new('./t/camel.mrk') };
+is( $failure, undef, 'croak cannot find file');
+$failure = eval { MARC::Parser::RAW->new('./t/camel.mrc', 'XXX-0') };
+is( $failure, undef, 'croak unavailable encoding');
my $parser = MARC::Parser::RAW->new('./t/camel.mrc');
-isa_ok( $parser, 'MARC::Parser::RAW' );
my $record = $parser->next();
is_deeply(
+ $record->[0],
+ [ 'LDR', undef, undef, '_', '00755cam 22002414a 4500' ],
+ 'LDR'
+);
+is_deeply(
$record->[1],
[ '001', undef, undef, '_', 'fol05731351 ' ],
'first field'
@@ -20,20 +31,23 @@ is_deeply(
[ '020', ' ', ' ', 'a', '0471383147 (paper/cd-rom : alk. paper)' ],
'sixth field'
);
-$record = $parser->next();
-is_deeply(
- $record->[1],
- [ '001', undef, undef, '_', 'fol05754809 ' ],
- 'first field'
-);
-$parser = MARC::Parser::RAW->new('./t/camel.mrc', 'UTF-8');
-isa_ok( $parser, 'MARC::Parser::RAW' );
-$record = $parser->next();
-is_deeply(
- $record->[1],
- [ '001', undef, undef, '_', 'fol05731351 ' ],
- 'first field'
-);
+{
+ my @warnings;
+ local $SIG{__WARN__} = sub {
+ push @warnings, @_;
+ };
+ my $record = $parser->next();
+ is_deeply(
+ $record->[0],
+ [ 'LDR', undef, undef, '_', '00665nam 22002298a 4500' ],
+ 'skipped faulty records'
+ );
+ is scalar(@warnings), 4, 'got warnings';
+ like $warnings[0], qr{no fields found in record}, 'carp no fields found in record';
+ like $warnings[1], qr{no valid record leader found in record}, 'carp no valid record leader found in record';
+ like $warnings[2], qr{different number of tags and fields in record}, 'carp different number of tags and fields in record';
+ like $warnings[3], qr{incomplete directory entry in record}, 'carp incomplete directory entry in record';
+}
done_testing;
\ No newline at end of file
diff --git a/t/camel.mrc b/t/camel.mrc
index 68d6dad..7127bd2 100644
--- a/t/camel.mrc
+++ b/t/camel.mrc
@@ -1 +1 @@
-00755cam 22002414a 4500001001300000003000600013005001700019008004100036010001700077020004300094040001800137042000800155050002600163082001700189100003100206245005400237260004200291300007200333500003300405650003700438630002500475630001300500
fol05731351
IMchF
20000613133448.0
000107s2000 nyua 001 0 eng
a 00020737
a0471383147 (paper/cd-rom : alk. paper)
aDLCcDLCdDLC
apcc
00aQA76.73.P22bM33 2000
00a005.13/3221
1 aMartinsson, Tobias,d1976-
10aActivePerl [...]
\ No newline at end of file
+00755cam 22002414a 4500001001300000003000600013005001700019008004100036010001700077020004300094040001800137042000800155050002600163082001700189100003100206245005400237260004200291300007200333500003300405650003700438630002500475630001300500
fol05731351
IMchF
20000613133448.0
000107s2000 nyua 001 0 eng
a 00020737
a0471383147 (paper/cd-rom : alk. paper)
aDLCcDLCdDLC
apcc
00aQA76.73.P22bM33 2000
00a005.13/3221
1 aMartinsson, Tobias,d1976-
10aActivePerl [...]
\ No newline at end of file
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmarc-parser-raw-perl.git
More information about the Pkg-perl-cvs-commits
mailing list