r41024 - in /trunk/libmarc-xml-perl: Changes MANIFEST META.yml README debian/README.source debian/changelog debian/compat debian/control debian/rules lib/MARC/File/SAX.pm lib/MARC/File/XML.pm t/escape.mrc t/escape.t
jawnsy-guest at users.alioth.debian.org
jawnsy-guest at users.alioth.debian.org
Thu Jul 30 20:16:49 UTC 2009
Author: jawnsy-guest
Date: Thu Jul 30 20:16:42 2009
New Revision: 41024
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=41024
Log:
* New upstream release
+ Apply the leader modification for USMARC to signal UTF-8 encoding
+ Escape '<', '>' and '&' used as indicator values and subfield labels
+ Clarify names of header output switches (RT#34082)
+ MARC::File::SAX can also be used to build multiple records now
* Standards-Version 3.8.2
* Depend on debhelper >= 7, compat bumped to 7
* Changed to short rules format
* Rewrote description
Added:
trunk/libmarc-xml-perl/t/escape.mrc
Removed:
trunk/libmarc-xml-perl/debian/README.source
Modified:
trunk/libmarc-xml-perl/Changes
trunk/libmarc-xml-perl/MANIFEST
trunk/libmarc-xml-perl/META.yml
trunk/libmarc-xml-perl/README
trunk/libmarc-xml-perl/debian/changelog
trunk/libmarc-xml-perl/debian/compat
trunk/libmarc-xml-perl/debian/control
trunk/libmarc-xml-perl/debian/rules
trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm
trunk/libmarc-xml-perl/lib/MARC/File/XML.pm
trunk/libmarc-xml-perl/t/escape.t
Modified: trunk/libmarc-xml-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/Changes?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/Changes (original)
+++ trunk/libmarc-xml-perl/Changes Thu Jul 30 20:16:42 2009
@@ -1,4 +1,22 @@
Revision history for Perl extension MARC-XML
+
+0.91 Tue Jul 28 2009
+ - Nixing the stray space (always been there) in the schemaLocation
+ attribute for single-record output
+ - Properly apply the leader modification for USMARC to signal UTF-8
+ encoding.
+ - Escape '<', '>', and '&' used as indicator values and subfield labels
+ during XML output (Bill Erickson)
+ - CPAN RT#34082: clarify names of header output switches
+
+0.90 Fri Dec 14 2007
+ - modifications to MARC::File::SAX to use LocalName rather than Name
+ Name can contain a namespace prefix and cause parsing to fail
+ Should be ok to rely on LocalName since the parser factory is
+ requiring Namespace support?
+ - MARC::File::SAX also can build up multiple records now, for use
+ in other SAX contexts like Net::OAI::Harvester. This required
+ a few changes in MARC::File::XML as well.
0.88 Wed Nov 28 2007
- String test for subfield code to avoid dropping $0 (Galen Charlton)
Modified: trunk/libmarc-xml-perl/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/MANIFEST?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/MANIFEST (original)
+++ trunk/libmarc-xml-perl/MANIFEST Thu Jul 30 20:16:42 2009
@@ -1,19 +1,20 @@
-Changes
-Makefile.PL
-MANIFEST
-README
bin/entity-escape-xml
bin/marc2xml
bin/xml2marc
+Changes
+lib/MARC/File/SAX.pm
lib/MARC/File/XML.pm
-lib/MARC/File/SAX.pm
-t/subfield0.t
-t/subfield0.xml
+Makefile.PL
+MANIFEST
+META.yml
+README
t/batch.t
t/batch.xml
t/encode.t
+t/escape.mrc
t/escape.t
t/namespace.t
t/namespace.xml
t/record.dat
-META.yml
+t/subfield0.t
+t/subfield0.xml
Modified: trunk/libmarc-xml-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/META.yml?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/META.yml (original)
+++ trunk/libmarc-xml-perl/META.yml Thu Jul 30 20:16:42 2009
@@ -1,16 +1,24 @@
--- #YAML:1.0
-name: MARC-XML
-version: 0.88
-abstract: ~
-license: ~
-generated_by: ExtUtils::MakeMaker version 6.36
-distribution_type: module
-requires:
- MARC::Charset: 0.98
- MARC::Record: 2
- XML::SAX: 0.12
-meta-spec:
- url: http://module-build.sourceforge.net/META-spec-v1.2.html
- version: 1.2
+name: MARC-XML
+version: 0.91
+abstract: ~
author:
- Ed Summers <ehs at pobox.com>
+license: unknown
+distribution_type: module
+configure_requires:
+ ExtUtils::MakeMaker: 0
+build_requires:
+ ExtUtils::MakeMaker: 0
+requires:
+ MARC::Charset: 0.98
+ MARC::Record: 2
+ XML::SAX: 0.12
+no_index:
+ directory:
+ - t
+ - inc
+generated_by: ExtUtils::MakeMaker version 6.52
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
Modified: trunk/libmarc-xml-perl/README
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/README?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/README (original)
+++ trunk/libmarc-xml-perl/README Thu Jul 30 20:16:42 2009
@@ -25,8 +25,6 @@
COPYRIGHT AND LICENCE
-Copyright (C) 2003,2004 Ed Summers
-
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
Modified: trunk/libmarc-xml-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/debian/changelog?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/debian/changelog (original)
+++ trunk/libmarc-xml-perl/debian/changelog Thu Jul 30 20:16:42 2009
@@ -1,4 +1,15 @@
-libmarc-xml-perl (0.88-2) UNRELEASED; urgency=low
+libmarc-xml-perl (0.91-1) UNRELEASED; urgency=low
+
+ [ Jonathan Yu ]
+ * New upstream release
+ + Apply the leader modification for USMARC to signal UTF-8 encoding
+ + Escape '<', '>' and '&' used as indicator values and subfield labels
+ + Clarify names of header output switches (RT#34082)
+ + MARC::File::SAX can also be used to build multiple records now
+ * Standards-Version 3.8.2
+ * Depend on debhelper >= 7, compat bumped to 7
+ * Changed to short rules format
+ * Rewrote description
[ gregor herrmann ]
* debian/watch: use dist-based URL.
@@ -10,7 +21,7 @@
[ Nathan Handler ]
* debian/watch: Update to ignore development releases.
- -- gregor herrmann <gregoa at debian.org> Wed, 06 Aug 2008 18:23:43 -0300
+ -- Jonathan Yu <frequency at cpan.org> Thu, 30 Jul 2009 12:04:43 -0400
libmarc-xml-perl (0.88-1) unstable; urgency=low
Modified: trunk/libmarc-xml-perl/debian/compat
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/debian/compat?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/debian/compat (original)
+++ trunk/libmarc-xml-perl/debian/compat Thu Jul 30 20:16:42 2009
@@ -1,1 +1,1 @@
-5
+7
Modified: trunk/libmarc-xml-perl/debian/control
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/debian/control?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/debian/control (original)
+++ trunk/libmarc-xml-perl/debian/control Thu Jul 30 20:16:42 2009
@@ -1,31 +1,23 @@
Source: libmarc-xml-perl
Section: perl
Priority: optional
-Build-Depends: debhelper (>= 5.0.0), quilt
-Build-Depends-Indep: perl (>= 5.8.8-7), libxml-sax-perl (>= 0.12),
- libmarc-charset-perl (>= 0.98), libmarc-record-perl (>= 2)
+Build-Depends: debhelper (>= 7)
+Build-Depends-Indep: perl (>= 5.8.8-7), libmarc-charset-perl (>= 0.98),
+ libxml-sax-perl (>= 0.12), libmarc-record-perl (>= 2)
Maintainer: Debian Perl Group <pkg-perl-maintainers at lists.alioth.debian.org>
Uploaders: Vincent Danjean <vdanjean at debian.org>,
- Damyan Ivanov <dmn at debian.org>
-Standards-Version: 3.7.3
+ Damyan Ivanov <dmn at debian.org>, Jonathan Yu <frequency at cpan.org>
+Standards-Version: 3.8.2
Homepage: http://search.cpan.org/dist/MARC-XML/
Vcs-Svn: svn://svn.debian.org/pkg-perl/trunk/libmarc-xml-perl/
Vcs-Browser: http://svn.debian.org/viewsvn/pkg-perl/trunk/libmarc-xml-perl/
Package: libmarc-xml-perl
Architecture: all
-Depends: ${perl:Depends}, ${misc:Depends}, libxml-sax-perl (>= 0.12),
- libmarc-charset-perl (>= 0.98), libmarc-record-perl (>= 2)
+Depends: ${perl:Depends}, ${misc:Depends}, libmarc-charset-perl (>= 0.98),
+ libxml-sax-perl (>= 0.12), libmarc-record-perl (>= 2)
Description: Perl library to access MARC data encoded as XML
- The MARC-XML distribution is an extension to the MARC-Record distribution for
- working with MARC21 data that is encoded as XML. The XML encoding used is the
- MARC21slim schema supplied by the Library of Congress. More information may be
- obtained here: http://www.loc.gov/standards/marcxml/
- .
- Once you install the MARC-XML distribution you will most likely not use it
- directly, but will have an additional file format available to you when you
- use MARC::Batch.
- .
- This version of MARC-XML supersedes an the versions ending with 0.25 which
- were used with the MARC.pm framework. MARC-XML now uses MARC::Record
- exclusively.
+ MARC::XML is an extension to the Marc::Record distribution for working with
+ MARC21 data that is encoded as XML. The XML encoding used is the MARC21slim
+ schema supplied by the Library of Congress. Installing MARC-XML automatically
+ adds support for the additional file format for use with MARC::Batch.
Modified: trunk/libmarc-xml-perl/debian/rules
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/debian/rules?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/debian/rules (original)
+++ trunk/libmarc-xml-perl/debian/rules Thu Jul 30 20:16:42 2009
@@ -1,80 +1,4 @@
#!/usr/bin/make -f
-# This debian/rules file is provided as a template for normal perl
-# packages. It was created by Marc Brockschmidt <marc at dch-faq.de> for
-# the Debian Perl Group (http://pkg-perl.alioth.debian.org/) but may
-# be used freely wherever it is useful.
-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
-
-# If set to a true value then MakeMaker's prompt function will
-# always return the default without waiting for user input.
-export PERL_MM_USE_DEFAULT=1
-
-PACKAGE=$(shell dh_listpackages)
-
-ifndef PERL
-PERL = /usr/bin/perl
-endif
-
-TMP =$(CURDIR)/debian/$(PACKAGE)
-
-build: build-stamp
-build-stamp:
- dh_testdir
-
- # As this is a architecture independent package, we are not
- # supposed to install stuff to /usr/lib. MakeMaker creates
- # the dirs, we prevent this by setting the INSTALLVENDORARCH
- # and VENDORARCHEXP environment variables.
-
- # Add commands to compile the package here
- $(PERL) Makefile.PL INSTALLDIRS=vendor \
- INSTALLVENDORARCH=/usr/share/perl5/ \
- VENDORARCHEXP=/usr/share/perl5/
- $(MAKE)
- $(MAKE) test
-
- touch $@
-
-clean:
- dh_testdir
- dh_testroot
-
- dh_clean build-stamp install-stamp
-
- # Add commands to clean up after the build process here
- [ ! -f Makefile ] || $(MAKE) realclean
-
-install: install-stamp
-install-stamp: build-stamp
- dh_testdir
- dh_testroot
- dh_clean -k
-
- # Add commands to install the package into debian/$PACKAGE_NAME here
- $(MAKE) install DESTDIR=$(TMP) PREFIX=/usr
-
- touch $@
-
-binary-arch:
-# We have nothing to do here for an architecture-independent package
-
-binary-indep: build install
- dh_testdir
- dh_testroot
- dh_installdocs
- dh_installchangelogs Changes
- dh_perl
- dh_compress
- dh_fixperms
- dh_installdeb
- dh_gencontrol
- dh_md5sums
- dh_builddeb
-
-source diff:
- @echo >&2 'source and diff are obsolete - use dpkg-source -b'; false
-
-binary: binary-indep binary-arch
-.PHONY: build clean binary-indep binary-arch binary
+%:
+ dh $@
Modified: trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm (original)
+++ trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm Thu Jul 30 20:16:42 2009
@@ -13,76 +13,117 @@
use XML::SAX;
use base qw( XML::SAX::Base );
use Data::Dumper;
+use MARC::Record;
use MARC::Charset qw(utf8_to_marc8);
+
+=head2 new()
+
+Create the handler.
+
+=cut
+
+sub new {
+ my $class = shift;
+ return bless {records => []}, ref($class) || $class;
+}
+
+=head2 records()
+
+Get all the MARC::Records that were parsed out of the XML.
+
+=cut
+
+sub records {
+ return shift->{records};
+}
+
+=head2 record()
+
+In some contexts you might only expect there to be one record parsed. This
+is a shorthand for getting it.
+
+=cut
+
+sub record {
+ return shift->{records}[0];
+}
sub start_element {
my ( $self, $element ) = @_;
- my $name = $element->{ Name };
- if ( $name eq 'leader' ) {
- $self->{ tag } = 'LDR';
+ my $name = $element->{ LocalName };
+ if ( $name eq 'record' ) {
+ $self->{ record } = MARC::Record->new();
+ } elsif ( $name eq 'leader' ) {
+ $self->{ tag } = 'LDR';
} elsif ( $name eq 'controlfield' ) {
- $self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
+ $self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
} elsif ( $name eq 'datafield' ) {
- $self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
- $self->{ i1 } = $element->{ Attributes }{ '{}ind1' }{ Value };
- $self->{ i2 } = $element->{ Attributes }{ '{}ind2' }{ Value };
+ $self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
+ $self->{ i1 } = $element->{ Attributes }{ '{}ind1' }{ Value };
+ $self->{ i2 } = $element->{ Attributes }{ '{}ind2' }{ Value };
} elsif ( $name eq 'subfield' ) {
- $self->{ subcode } = $element->{ Attributes }{ '{}code' }{ Value };
+ $self->{ subcode } = $element->{ Attributes }{ '{}code' }{ Value };
}
}
sub end_element {
my ( $self, $element ) = @_;
- my $name = $element->{ Name };
+ my $name = $element->{ LocalName };
if ( $name eq 'subfield' ) {
- push @{ $self->{ subfields } }, $self->{ subcode };
-
- if ($self->{ transcode }) {
- push @{ $self->{ subfields } }, utf8_to_marc8($self->{ chars });
- } else {
- push @{ $self->{ subfields } }, $self->{ chars } ;
- }
+ push @{ $self->{ subfields } }, $self->{ subcode };
- $self->{ chars } = '';
- $self->{ subcode } = '';
+ if ($self->{ transcode }) {
+ push @{ $self->{ subfields } }, utf8_to_marc8($self->{ chars });
+ } else {
+ push @{ $self->{ subfields } }, $self->{ chars } ;
+ }
+
+ $self->{ chars } = '';
+ $self->{ subcode } = '';
} elsif ( $name eq 'controlfield' ) {
- $self->{ record }->append_fields(
- MARC::Field->new( $self->{ tag }, $self->{ chars } )
- );
- $self->{ chars } = '';
- $self->{ tag } = '';
+ $self->{ record }->append_fields(
+ MARC::Field->new( $self->{ tag }, $self->{ chars } )
+ );
+ $self->{ chars } = '';
+ $self->{ tag } = '';
} elsif ( $name eq 'datafield' ) {
- $self->{ record }->append_fields(
- MARC::Field->new(
- $self->{ tag },
- $self->{ i1 },
- $self->{ i2 },
- @{ $self->{ subfields } }
- )
- );
- $self->{ tag } = '';
- $self->{ i1 } = '';
- $self->{ i2 } = '';
- $self->{ subfields } = [];
- $self->{ chars } = '';
+ $self->{ record }->append_fields(
+ MARC::Field->new(
+ $self->{ tag },
+ $self->{ i1 },
+ $self->{ i2 },
+ @{ $self->{ subfields } }
+ )
+ );
+ $self->{ tag } = '';
+ $self->{ i1 } = '';
+ $self->{ i2 } = '';
+ $self->{ subfields } = [];
+ $self->{ chars } = '';
} elsif ( $name eq 'leader' ) {
- my $ldr = $self->{ chars };
- $self->{ transcode }++
- if (substr($ldr,9,1) eq 'a' and $self->{toMARC8});
-
- substr($ldr,9,1,' ') if ($self->{ transcode });
- $self->{ record }->leader( $ldr );
- $self->{ chars } = '';
- $self->{ tag } = '';
+ my $ldr = $self->{ chars };
+
+ $self->{ transcode }++
+ if (substr($ldr,9,1) eq 'a' and $self->{toMARC8});
+
+ substr($ldr,9,1,' ') if ($self->{ transcode });
+
+ $self->{ record }->leader( $ldr );
+ $self->{ chars } = '';
+ $self->{ tag } = '';
+ } elsif ( $name eq 'record' ) {
+ push(@{ $self->{ records } }, $self->{ record });
+ undef $self->{ record };
}
-
}
sub characters {
my ( $self, $chars ) = @_;
- if ( ( exists $self->{ subcode } and $self->{ subcode } ne '') or ( $self->{ tag } and
- ( $self->{ tag } eq 'LDR' or $self->{ tag } < 10 ) ) ) {
- $self->{ chars } .= $chars->{ Data };
+ if (
+ ( exists $self->{ subcode } && $self->{ subcode } ne '')
+ || ( $self->{ tag } && ( $self->{ tag } eq 'LDR' || $self->{ tag } < 10 ))
+ ) {
+ $self->{ chars } .= $chars->{ Data };
}
}
Modified: trunk/libmarc-xml-perl/lib/MARC/File/XML.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/lib/MARC/File/XML.pm?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/lib/MARC/File/XML.pm (original)
+++ trunk/libmarc-xml-perl/lib/MARC/File/XML.pm Thu Jul 30 20:16:42 2009
@@ -14,22 +14,16 @@
use Carp qw( croak );
use Encode ();
-$VERSION = '0.88';
-
-my $handler = MARC::File::SAX->new();
+$VERSION = '0.91';
my $factory = XML::SAX::ParserFactory->new();
$factory->require_feature(Namespaces);
-my $parser = $factory->parser( Handler => $handler, ProtocolEncoding => 'UTF-8' );
-
sub import {
- my $class = shift;
- %_load_args = @_;
- $_load_args{ DefaultEncoding } ||= 'UTF-8';
- $_load_args{ RecordFormat } ||= 'USMARC';
-
- $parser = $factory->parser( Handler => $handler, ProtocolEncoding => $_load_args{DefaultEncoding} );
+ my $class = shift;
+ %_load_args = @_;
+ $_load_args{ DefaultEncoding } ||= 'UTF-8';
+ $_load_args{ RecordFormat } ||= 'USMARC';
}
=head1 NAME
@@ -105,12 +99,12 @@
=cut
sub default_record_format {
- my $self = shift;
- my $format = shift;
-
- $_load_args{RecordFormat} = $format if ($format);
-
- return $_load_args{RecordFormat};
+ my $self = shift;
+ my $format = shift;
+
+ $_load_args{RecordFormat} = $format if ($format);
+
+ return $_load_args{RecordFormat};
}
@@ -227,7 +221,7 @@
}
## print the XML header if we haven't already
if ( ! $self->{ header } ) {
- $enc ||= $self->{ encoding } || $_load_args{DefaultEncoding};
+ $enc ||= $self->{ encoding } || $_load_args{DefaultEncoding};
$self->{ fh }->print( header( $enc ) );
$self->{ header } = 1;
}
@@ -313,7 +307,7 @@
sub record {
my $record = shift;
my $format = shift;
- my $without_header = shift;
+ my $include_full_record_header = shift;
my $enc = shift;
$format ||= $_load_args{RecordFormat};
@@ -326,16 +320,18 @@
if ($original_encoding ne 'a' && lc($format) !~ /^unimarc/o) {
# If not, we'll make it so
$_transcode++;
+ substr($ldr,9,1,'a');
+ $record->leader( $ldr );
}
my @xml = ();
- if ($without_header) {
+ if ($include_full_record_header) {
push @xml, <<HEADER
<?xml version="1.0" encoding="$enc"?>
<record
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd"
+ xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
xmlns="http://www.loc.gov/MARC21/slim">
HEADER
@@ -346,17 +342,17 @@
push( @xml, " <leader>" . escape( $record->leader ) . "</leader>" );
foreach my $field ( $record->fields() ) {
- my $tag = $field->tag();
+ my ($tag) = escape( $field->tag() );
if ( $field->is_control_field() ) {
my $data = $field->data;
push( @xml, qq( <controlfield tag="$tag">) .
escape( ($_transcode ? marc8_to_utf8($data) : $data) ). qq(</controlfield>) );
} else {
- my $i1 = $field->indicator( 1 );
- my $i2 = $field->indicator( 2 );
+ my ($i1) = escape( $field->indicator( 1 ) );
+ my ($i2) = escape( $field->indicator( 2 ) );
push( @xml, qq( <datafield tag="$tag" ind1="$i1" ind2="$i2">) );
foreach my $subfield ( $field->subfields() ) {
- my ( $code, $data ) = @$subfield;
+ my ( $code, $data ) = ( escape( $$subfield[0] ), $$subfield[1] );
push( @xml, qq( <subfield code="$code">).
escape( ($_transcode ? marc8_to_utf8($data) : $data) ).qq(</subfield>) );
}
@@ -422,7 +418,6 @@
=cut
sub decode {
-
my $text;
my $location = '';
my $self = shift;
@@ -430,34 +425,35 @@
## see MARC::File::USMARC::decode for explanation of what's going on
## here
if ( ref($self) =~ /^MARC::File/ ) {
- $location = 'in record '.$self->{recnum};
- $text = shift;
+ $location = 'in record '.$self->{recnum};
+ $text = shift;
} else {
- $location = 'in record 1';
- $text = $self=~/MARC::File/ ? shift : $self;
+ $location = 'in record 1';
+ $text = $self=~/MARC::File/ ? shift : $self;
}
my $enc = shift || $_load_args{BinaryEncoding};
my $format = shift || $_load_args{RecordFormat};
- $parser->{ tagStack } = [];
- $parser->{ subfields } = [];
- $parser->{ Handler }{ record } = MARC::Record->new();
+ my $handler = MARC::File::SAX->new();
+ my $parser = $factory->parser(
+ Handler => $handler,
+ ProtocolEncoding => $_load_args{DefaultEncoding}
+ );
$parser->{ Handler }{ toMARC8 } = decideMARC8Binary($format,$enc);
$parser->parse_string( $text );
- return( $parser->{ Handler }{ record } );
-
+ return( $handler->record() );
}
sub decideMARC8Binary {
- my $format = shift;
- my $enc = shift;
-
- return 0 if (defined($format) && lc($format) =~ /^unimarc/o);
- return 0 if (defined($enc) && lc($enc) =~ /^utf-?8/o);
- return 1;
+ my $format = shift;
+ my $enc = shift;
+
+ return 0 if (defined($format) && lc($format) =~ /^unimarc/o);
+ return 0 if (defined($enc) && lc($enc) =~ /^utf-?8/o);
+ return 1;
}
@@ -473,7 +469,7 @@
sub encode {
my $record = shift;
my $format = shift || $_load_args{RecordFormat};
- my $without_header = shift;
+ my $without_collection_header = shift;
my $enc = shift || $_load_args{DefaultEncoding};
if (lc($format) =~ /^unimarc/o) {
@@ -481,29 +477,33 @@
}
my @xml = ();
- push( @xml, header( $enc ) ) unless ($without_header);
- push( @xml, record( $record, $format, $without_header, $enc ) );
- push( @xml, footer() ) unless ($without_header);
+ push( @xml, header( $enc ) ) unless ($without_collection_header);
+ # verbose, but naming the header output flags this way to avoid
+ # the potential confusion identified in CPAN bug #34082
+ # http://rt.cpan.org/Public/Bug/Display.html?id=34082
+ my $include_full_record_header = ($without_collection_header) ? 1 : 0;
+ push( @xml, record( $record, $format, $include_full_record_header, $enc ) );
+ push( @xml, footer() ) unless ($without_collection_header);
return( join( "\n", @xml ) );
}
sub _unimarc_encoding {
- my $f = shift;
- my $r = shift;
-
- my $pos = 26;
- $pos = 13 if (lc($f) eq 'unimarcauth');
-
- my $enc = substr( $r->subfield(100 => 'a'), $pos, 2 );
-
- if ($enc eq '01' || $enc eq '03') {
- return 'ISO-8859-1';
- } elsif ($enc eq '50') {
- return 'UTF-8';
- } else {
- die "Unsupported UNIMARC character encoding [$enc] for XML output for $f; 100$a -> " . $r->subfield(100 => 'a');
- }
+ my $f = shift;
+ my $r = shift;
+
+ my $pos = 26;
+ $pos = 13 if (lc($f) eq 'unimarcauth');
+
+ my $enc = substr( $r->subfield(100 => 'a'), $pos, 2 );
+
+ if ($enc eq '01' || $enc eq '03') {
+ return 'ISO-8859-1';
+ } elsif ($enc eq '50') {
+ return 'UTF-8';
+ } else {
+ die "Unsupported UNIMARC character encoding [$enc] for XML output for $f; 100\$a -> " . $r->subfield(100 => 'a');
+ }
}
=head1 TODO
Added: trunk/libmarc-xml-perl/t/escape.mrc
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/t/escape.mrc?rev=41024&op=file
==============================================================================
--- trunk/libmarc-xml-perl/t/escape.mrc (added)
+++ trunk/libmarc-xml-perl/t/escape.mrc Thu Jul 30 20:16:42 2009
@@ -1,0 +1,1 @@
+00727nam 2200205 a 4500001001100000005001700011008004100028035002000069050002200089100002200111245005200133250002600185260004200211300002000253650005600273650005600329650005600385949007400441596000600515
03-0016458
19971103184734.0
970701s1997 oru u000 0 eng u
a(Sirsi) a351664
00aML270.2b.A6 1997
1 aAnthony, James R.
00aFrench baroque music from Beaujoyeulx to Rameau
aRev. and expanded ed.
aPortland, OR :bAmadeus Press,c1997.
a586 p. :bmusic
0aMusic<Francey16th centuryxHistory and criticism.
0aMusiczFrancey17th centuryxHistory and criticism.
0aMusiczFrancey18th centuryxHistory and criticism.
aML 270.2 A6 1997wLCi30007006841505rYtBOOKSlHUNT-CIRCmHUNTINGTON
a1
Modified: trunk/libmarc-xml-perl/t/escape.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/t/escape.t?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/t/escape.t (original)
+++ trunk/libmarc-xml-perl/t/escape.t Thu Jul 30 20:16:42 2009
@@ -1,6 +1,6 @@
use strict;
use warnings;
-use Test::More tests => 10;
+use Test::More tests => 14;
# test escaping of < > and & for XML
@@ -12,17 +12,36 @@
use_ok( 'MARC::Record' );
use_ok( 'MARC::Field' );
+use_ok( 'MARC::Batch' );
-my $r = MARC::Record->new();
-isa_ok( $r, 'MARC::Record' );
+my $r1 = MARC::Record->new();
+isa_ok( $r1, 'MARC::Record' );
-$r->leader( '&xyz<123>' );
-$r->append_fields(
+$r1->leader( '&xyz<123>' );
+$r1->append_fields(
MARC::Field->new( '005', '&abc<def>' ),
MARC::Field->new( '245', 0, 1, a => 'Foo&Bar<Baz>' )
);
-my $xml = $r->as_xml();
-like( $xml, qr/&xyz<123>/, 'escaped leader' );
-like( $xml, qr/&abc<def>/, 'escape control field' );
-like( $xml, qr/Foo&Bar<Baz>/, 'escaped field' );
+my $xml1 = $r1->as_xml();
+like( $xml1, qr/&xyz<123>/, 'escaped leader' );
+like( $xml1, qr/&abc<def>/, 'escape control field' );
+like( $xml1, qr/Foo&Bar<Baz>/, 'escaped field' );
+
+# check escaping of subfield labels
+my $b = MARC::Batch->new( 'USMARC', 't/escape.mrc' );
+my $r2 = $b->next();
+is($r2->subfield('650', '<'), 'France', 'read subfield $< parsed from ISO2709 blob');
+my $xml2 = $r2->as_xml();
+my $r3;
+SKIP: {
+ eval { $r3 = MARC::Record->new_from_xml($xml2); };
+ if ($@) {
+ fail('failed to parse MARCXML generated from record containing a subfield $<');
+ skip 'no point in checking further', 1;
+ } else {
+ is($r3->subfield('650', '<'), 'France', 'read subfield $< parsed from MARCXML');
+ is_deeply($r2, $r3, 'record with subfield $< the same parsed from ISO2709 or MARCXML');
+ }
+}
+
More information about the Pkg-perl-cvs-commits
mailing list