r41024 - in /trunk/libmarc-xml-perl: Changes MANIFEST META.yml README debian/README.source debian/changelog debian/compat debian/control debian/rules lib/MARC/File/SAX.pm lib/MARC/File/XML.pm t/escape.mrc t/escape.t

jawnsy-guest at users.alioth.debian.org jawnsy-guest at users.alioth.debian.org
Thu Jul 30 20:16:49 UTC 2009


Author: jawnsy-guest
Date: Thu Jul 30 20:16:42 2009
New Revision: 41024

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=41024
Log:
* New upstream release
  + Apply the leader modification for USMARC to signal UTF-8 encoding
  + Escape '<', '>' and '&' used as indicator values and subfield labels
  + Clarify names of header output switches (RT#34082)
  + MARC::File::SAX can also be used to build multiple records now
* Standards-Version 3.8.2
* Depend on debhelper >= 7, compat bumped to 7
* Changed to short rules format
* Rewrote description

Added:
    trunk/libmarc-xml-perl/t/escape.mrc
Removed:
    trunk/libmarc-xml-perl/debian/README.source
Modified:
    trunk/libmarc-xml-perl/Changes
    trunk/libmarc-xml-perl/MANIFEST
    trunk/libmarc-xml-perl/META.yml
    trunk/libmarc-xml-perl/README
    trunk/libmarc-xml-perl/debian/changelog
    trunk/libmarc-xml-perl/debian/compat
    trunk/libmarc-xml-perl/debian/control
    trunk/libmarc-xml-perl/debian/rules
    trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm
    trunk/libmarc-xml-perl/lib/MARC/File/XML.pm
    trunk/libmarc-xml-perl/t/escape.t

Modified: trunk/libmarc-xml-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/Changes?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/Changes (original)
+++ trunk/libmarc-xml-perl/Changes Thu Jul 30 20:16:42 2009
@@ -1,4 +1,22 @@
 Revision history for Perl extension MARC-XML
+
+0.91 Tue Jul 28 2009
+      - Nixing the stray space (always been there) in the schemaLocation
+        attribute for single-record output
+      - Properly apply the leader modification for USMARC to signal UTF-8
+        encoding.
+      - Escape '<', '>', and '&' used as indicator values and subfield labels 
+        during XML output (Bill Erickson)
+      - CPAN RT#34082: clarify names of header output switches
+
+0.90 Fri Dec 14 2007
+      - modifications to MARC::File::SAX to use LocalName rather than Name
+        Name can contain a namespace prefix and cause parsing to fail
+        Should be ok to rely on LocalName since the parser factory is
+        requiring Namespace support?
+      - MARC::File::SAX also can build up multiple records now, for use 
+        in other SAX contexts like Net::OAI::Harvester. This required
+        a few changes in MARC::File::XML as well.
 
 0.88 Wed Nov 28 2007
       - String test for subfield code to avoid dropping $0 (Galen Charlton)

Modified: trunk/libmarc-xml-perl/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/MANIFEST?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/MANIFEST (original)
+++ trunk/libmarc-xml-perl/MANIFEST Thu Jul 30 20:16:42 2009
@@ -1,19 +1,20 @@
-Changes
-Makefile.PL
-MANIFEST
-README
 bin/entity-escape-xml
 bin/marc2xml
 bin/xml2marc
+Changes
+lib/MARC/File/SAX.pm
 lib/MARC/File/XML.pm
-lib/MARC/File/SAX.pm
-t/subfield0.t
-t/subfield0.xml
+Makefile.PL
+MANIFEST
+META.yml
+README
 t/batch.t
 t/batch.xml
 t/encode.t
+t/escape.mrc
 t/escape.t
 t/namespace.t
 t/namespace.xml
 t/record.dat
-META.yml
+t/subfield0.t
+t/subfield0.xml

Modified: trunk/libmarc-xml-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/META.yml?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/META.yml (original)
+++ trunk/libmarc-xml-perl/META.yml Thu Jul 30 20:16:42 2009
@@ -1,16 +1,24 @@
 --- #YAML:1.0
-name:                MARC-XML
-version:             0.88
-abstract:            ~
-license:             ~
-generated_by:        ExtUtils::MakeMaker version 6.36
-distribution_type:   module
-requires:     
-    MARC::Charset:                 0.98
-    MARC::Record:                  2
-    XML::SAX:                      0.12
-meta-spec:
-    url:     http://module-build.sourceforge.net/META-spec-v1.2.html
-    version: 1.2
+name:               MARC-XML
+version:            0.91
+abstract:           ~
 author:
     - Ed Summers <ehs at pobox.com>
+license:            unknown
+distribution_type:  module
+configure_requires:
+    ExtUtils::MakeMaker:  0
+build_requires:
+    ExtUtils::MakeMaker:  0
+requires:
+    MARC::Charset:  0.98
+    MARC::Record:   2
+    XML::SAX:       0.12
+no_index:
+    directory:
+        - t
+        - inc
+generated_by:       ExtUtils::MakeMaker version 6.52
+meta-spec:
+    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
+    version:  1.4

Modified: trunk/libmarc-xml-perl/README
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/README?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/README (original)
+++ trunk/libmarc-xml-perl/README Thu Jul 30 20:16:42 2009
@@ -25,8 +25,6 @@
 
 COPYRIGHT AND LICENCE
 
-Copyright (C) 2003,2004 Ed Summers
-
 This library is free software; you can redistribute it and/or modify
 it under the same terms as Perl itself. 
 

Modified: trunk/libmarc-xml-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/debian/changelog?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/debian/changelog (original)
+++ trunk/libmarc-xml-perl/debian/changelog Thu Jul 30 20:16:42 2009
@@ -1,4 +1,15 @@
-libmarc-xml-perl (0.88-2) UNRELEASED; urgency=low
+libmarc-xml-perl (0.91-1) UNRELEASED; urgency=low
+
+  [ Jonathan Yu ]
+  * New upstream release
+    + Apply the leader modification for USMARC to signal UTF-8 encoding
+    + Escape '<', '>' and '&' used as indicator values and subfield labels
+    + Clarify names of header output switches (RT#34082)
+    + MARC::File::SAX can also be used to build multiple records now
+  * Standards-Version 3.8.2
+  * Depend on debhelper >= 7, compat bumped to 7
+  * Changed to short rules format
+  * Rewrote description
 
   [ gregor herrmann ]
   * debian/watch: use dist-based URL.
@@ -10,7 +21,7 @@
   [ Nathan Handler ]
   * debian/watch: Update to ignore development releases.
 
- -- gregor herrmann <gregoa at debian.org>  Wed, 06 Aug 2008 18:23:43 -0300
+ -- Jonathan Yu <frequency at cpan.org>  Thu, 30 Jul 2009 12:04:43 -0400
 
 libmarc-xml-perl (0.88-1) unstable; urgency=low
 

Modified: trunk/libmarc-xml-perl/debian/compat
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/debian/compat?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/debian/compat (original)
+++ trunk/libmarc-xml-perl/debian/compat Thu Jul 30 20:16:42 2009
@@ -1,1 +1,1 @@
-5
+7

Modified: trunk/libmarc-xml-perl/debian/control
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/debian/control?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/debian/control (original)
+++ trunk/libmarc-xml-perl/debian/control Thu Jul 30 20:16:42 2009
@@ -1,31 +1,23 @@
 Source: libmarc-xml-perl
 Section: perl
 Priority: optional
-Build-Depends: debhelper (>= 5.0.0), quilt
-Build-Depends-Indep: perl (>= 5.8.8-7), libxml-sax-perl (>= 0.12), 
- libmarc-charset-perl (>= 0.98), libmarc-record-perl (>= 2)
+Build-Depends: debhelper (>= 7)
+Build-Depends-Indep: perl (>= 5.8.8-7), libmarc-charset-perl (>= 0.98),
+ libxml-sax-perl (>= 0.12), libmarc-record-perl (>= 2)
 Maintainer: Debian Perl Group <pkg-perl-maintainers at lists.alioth.debian.org>
 Uploaders: Vincent Danjean <vdanjean at debian.org>,
- Damyan Ivanov <dmn at debian.org>
-Standards-Version: 3.7.3
+ Damyan Ivanov <dmn at debian.org>, Jonathan Yu <frequency at cpan.org>
+Standards-Version: 3.8.2
 Homepage: http://search.cpan.org/dist/MARC-XML/
 Vcs-Svn: svn://svn.debian.org/pkg-perl/trunk/libmarc-xml-perl/
 Vcs-Browser: http://svn.debian.org/viewsvn/pkg-perl/trunk/libmarc-xml-perl/
 
 Package: libmarc-xml-perl
 Architecture: all
-Depends: ${perl:Depends}, ${misc:Depends}, libxml-sax-perl (>= 0.12),
- libmarc-charset-perl (>= 0.98), libmarc-record-perl (>= 2)
+Depends: ${perl:Depends}, ${misc:Depends}, libmarc-charset-perl (>= 0.98),
+ libxml-sax-perl (>= 0.12), libmarc-record-perl (>= 2)
 Description: Perl library to access MARC data encoded as XML
- The MARC-XML distribution is an extension to the MARC-Record distribution for
- working with MARC21 data that is encoded as XML. The XML encoding used is the
- MARC21slim schema supplied by the Library of Congress. More information may be
- obtained here: http://www.loc.gov/standards/marcxml/
- .
- Once you install the MARC-XML distribution you will most likely not use it
- directly, but will have an additional file format available to you when you
- use MARC::Batch.
- .
- This version of MARC-XML supersedes an the versions ending with 0.25 which
- were used with the MARC.pm framework. MARC-XML now uses MARC::Record
- exclusively.
+ MARC::XML is an extension to the Marc::Record distribution for working with
+ MARC21 data that is encoded as XML. The XML encoding used is the MARC21slim
+ schema supplied by the Library of Congress. Installing MARC-XML automatically
+ adds support for the additional file format for use with MARC::Batch.

Modified: trunk/libmarc-xml-perl/debian/rules
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/debian/rules?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/debian/rules (original)
+++ trunk/libmarc-xml-perl/debian/rules Thu Jul 30 20:16:42 2009
@@ -1,80 +1,4 @@
 #!/usr/bin/make -f
-# This debian/rules file is provided as a template for normal perl
-# packages. It was created by Marc Brockschmidt <marc at dch-faq.de> for
-# the Debian Perl Group (http://pkg-perl.alioth.debian.org/) but may
-# be used freely wherever it is useful.
 
-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
-
-# If set to a true value then MakeMaker's prompt function will
-# always return the default without waiting for user input.
-export PERL_MM_USE_DEFAULT=1
-
-PACKAGE=$(shell dh_listpackages)
-
-ifndef PERL
-PERL = /usr/bin/perl
-endif
-
-TMP     =$(CURDIR)/debian/$(PACKAGE)
-
-build: build-stamp
-build-stamp:
-	dh_testdir
-	
-	# As this is a architecture independent package, we are not
-	# supposed to install stuff to /usr/lib. MakeMaker creates
-	# the dirs, we prevent this by setting the INSTALLVENDORARCH
-	# and VENDORARCHEXP environment variables.
-	
-	# Add commands to compile the package here
-	$(PERL) Makefile.PL INSTALLDIRS=vendor \
-		INSTALLVENDORARCH=/usr/share/perl5/ \
-		VENDORARCHEXP=/usr/share/perl5/
-	$(MAKE)
-	$(MAKE) test
-	
-	touch $@
-
-clean:
-	dh_testdir
-	dh_testroot
-	
-	dh_clean build-stamp install-stamp
-	
-	# Add commands to clean up after the build process here
-	[ ! -f Makefile ] || $(MAKE) realclean
-
-install: install-stamp
-install-stamp: build-stamp
-	dh_testdir
-	dh_testroot
-	dh_clean -k
-	
-	# Add commands to install the package into debian/$PACKAGE_NAME here
-	$(MAKE) install DESTDIR=$(TMP) PREFIX=/usr
-	
-	touch $@
-
-binary-arch:
-# We have nothing to do here for an architecture-independent package
-
-binary-indep: build install
-	dh_testdir
-	dh_testroot
-	dh_installdocs
-	dh_installchangelogs Changes
-	dh_perl
-	dh_compress
-	dh_fixperms
-	dh_installdeb
-	dh_gencontrol
-	dh_md5sums
-	dh_builddeb
-
-source diff:
-	@echo >&2 'source and diff are obsolete - use dpkg-source -b'; false
-
-binary: binary-indep binary-arch
-.PHONY: build clean binary-indep binary-arch binary
+%:
+	dh $@

Modified: trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm (original)
+++ trunk/libmarc-xml-perl/lib/MARC/File/SAX.pm Thu Jul 30 20:16:42 2009
@@ -13,76 +13,117 @@
 use XML::SAX;
 use base qw( XML::SAX::Base );
 use Data::Dumper;
+use MARC::Record;
 use MARC::Charset qw(utf8_to_marc8);
+
+=head2 new()
+
+Create the handler.
+
+=cut
+
+sub new {
+    my $class = shift;
+    return bless {records => []}, ref($class) || $class;
+}
+
+=head2 records()
+
+Get all the MARC::Records that were parsed out of the XML.
+
+=cut
+
+sub records {
+    return shift->{records};
+}
+
+=head2 record()
+
+In some contexts you might only expect there to be one record parsed. This
+is a shorthand for getting it.
+
+=cut 
+
+sub record {
+    return shift->{records}[0];
+}
 
 sub start_element {
     my ( $self, $element ) = @_;
-    my $name = $element->{ Name };
-    if ( $name eq 'leader' ) { 
-	$self->{ tag } = 'LDR';
+    my $name = $element->{ LocalName };
+    if ( $name eq 'record' ) {
+        $self->{ record } = MARC::Record->new();
+    } elsif ( $name eq 'leader' ) { 
+        $self->{ tag } = 'LDR';
     } elsif ( $name eq 'controlfield' ) {
-	$self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
+        $self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
     } elsif ( $name eq 'datafield' ) { 
-	$self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
-	$self->{ i1 } = $element->{ Attributes }{ '{}ind1' }{ Value };
-	$self->{ i2 } = $element->{ Attributes }{ '{}ind2' }{ Value };
+        $self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
+        $self->{ i1 } = $element->{ Attributes }{ '{}ind1' }{ Value };
+        $self->{ i2 } = $element->{ Attributes }{ '{}ind2' }{ Value };
     } elsif ( $name eq 'subfield' ) { 
-	$self->{ subcode } = $element->{ Attributes }{ '{}code' }{ Value };
+        $self->{ subcode } = $element->{ Attributes }{ '{}code' }{ Value };
     }
 }
 
 sub end_element { 
     my ( $self, $element ) = @_;
-    my $name = $element->{ Name };
+    my $name = $element->{ LocalName };
     if ( $name eq 'subfield' ) { 
-	push @{ $self->{ subfields } }, $self->{ subcode };
-	
-	if ($self->{ transcode }) {
-           push @{ $self->{ subfields } }, utf8_to_marc8($self->{ chars });
-	} else {
-           push @{ $self->{ subfields } }, $self->{ chars } ;
-	}
+        push @{ $self->{ subfields } }, $self->{ subcode };
 
-	$self->{ chars } = '';
-	$self->{ subcode } = '';
+        if ($self->{ transcode }) {
+            push @{ $self->{ subfields } }, utf8_to_marc8($self->{ chars });
+        } else {
+            push @{ $self->{ subfields } }, $self->{ chars } ;
+        }
+
+        $self->{ chars } = '';
+        $self->{ subcode } = '';
     } elsif ( $name eq 'controlfield' ) { 
-	$self->{ record }->append_fields(
-	    MARC::Field->new( $self->{ tag }, $self->{ chars } )
-	);
-	$self->{ chars } = '';
-	$self->{ tag } = '';
+        $self->{ record }->append_fields(
+            MARC::Field->new( $self->{ tag }, $self->{ chars } )
+        );
+        $self->{ chars } = '';
+        $self->{ tag } = '';
     } elsif ( $name eq 'datafield' ) { 
-	$self->{ record }->append_fields( 
-	    MARC::Field->new( 
-		$self->{ tag }, 
-		$self->{ i1 }, 
-		$self->{ i2 },
-		@{ $self->{ subfields } }
-	    )
-	);
-	$self->{ tag } = '';
-	$self->{ i1 } = '';
-	$self->{ i2 } = '';
-	$self->{ subfields } = [];
-	$self->{ chars } = '';
+        $self->{ record }->append_fields( 
+            MARC::Field->new( 
+                $self->{ tag }, 
+                $self->{ i1 }, 
+                $self->{ i2 },
+                @{ $self->{ subfields } }
+            )
+        );
+        $self->{ tag } = '';
+        $self->{ i1 } = '';
+        $self->{ i2 } = '';
+        $self->{ subfields } = [];
+        $self->{ chars } = '';
     } elsif ( $name eq 'leader' ) { 
-	my $ldr = $self->{ chars };
-	$self->{ transcode }++
-		if (substr($ldr,9,1) eq 'a' and $self->{toMARC8});
-	
-	substr($ldr,9,1,' ') if ($self->{ transcode });
-	$self->{ record }->leader( $ldr );
-	$self->{ chars } = '';
-	$self->{ tag } = '';
+        my $ldr = $self->{ chars };
+
+        $self->{ transcode }++
+            if (substr($ldr,9,1) eq 'a' and $self->{toMARC8});
+
+        substr($ldr,9,1,' ') if ($self->{ transcode });
+
+        $self->{ record }->leader( $ldr );
+        $self->{ chars } = '';
+        $self->{ tag } = '';
+    } elsif ( $name eq 'record' ) {
+        push(@{ $self->{ records } }, $self->{ record });
+        undef $self->{ record };
     }
-
 }
 
 sub characters {
     my ( $self, $chars ) = @_;
-    if ( ( exists $self->{ subcode } and $self->{ subcode } ne '') or ( $self->{ tag } and 
-	( $self->{ tag } eq 'LDR' or $self->{ tag } < 10 ) ) ) { 
-	$self->{ chars } .= $chars->{ Data };
+    if (
+        ( exists $self->{ subcode } && $self->{ subcode } ne '')
+        || ( $self->{ tag } && ( $self->{ tag } eq 'LDR' || $self->{ tag } < 10 ))
+    ) { 
+        $self->{ chars } .= $chars->{ Data };
     } 
 }
 

Modified: trunk/libmarc-xml-perl/lib/MARC/File/XML.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/lib/MARC/File/XML.pm?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/lib/MARC/File/XML.pm (original)
+++ trunk/libmarc-xml-perl/lib/MARC/File/XML.pm Thu Jul 30 20:16:42 2009
@@ -14,22 +14,16 @@
 use Carp qw( croak );
 use Encode ();
 
-$VERSION = '0.88';
-
-my $handler = MARC::File::SAX->new();
+$VERSION = '0.91';
 
 my $factory = XML::SAX::ParserFactory->new();
 $factory->require_feature(Namespaces);
 
-my $parser = $factory->parser( Handler => $handler, ProtocolEncoding => 'UTF-8' );
-
 sub import {
-	my $class = shift;
-	%_load_args = @_;
-	$_load_args{ DefaultEncoding } ||= 'UTF-8';
-	$_load_args{ RecordFormat } ||= 'USMARC';
-
- 	$parser = $factory->parser( Handler => $handler, ProtocolEncoding => $_load_args{DefaultEncoding} );
+    my $class = shift;
+    %_load_args = @_;
+    $_load_args{ DefaultEncoding } ||= 'UTF-8';
+    $_load_args{ RecordFormat } ||= 'USMARC';
 }
 
 =head1 NAME
@@ -105,12 +99,12 @@
 =cut
 
 sub default_record_format {
-	my $self = shift;
-	my $format = shift;
-
-	$_load_args{RecordFormat} = $format if ($format);
-
-	return $_load_args{RecordFormat};
+    my $self = shift;
+    my $format = shift;
+
+    $_load_args{RecordFormat} = $format if ($format);
+
+    return $_load_args{RecordFormat};
 }
 
 
@@ -227,7 +221,7 @@
     }
     ## print the XML header if we haven't already
     if ( ! $self->{ header } ) { 
-    	$enc ||= $self->{ encoding } || $_load_args{DefaultEncoding};
+        $enc ||= $self->{ encoding } || $_load_args{DefaultEncoding};
         $self->{ fh }->print( header( $enc ) );
         $self->{ header } = 1;
     } 
@@ -313,7 +307,7 @@
 sub record {
     my $record = shift;
     my $format = shift;
-    my $without_header = shift;
+    my $include_full_record_header = shift;
     my $enc = shift;
 
     $format ||= $_load_args{RecordFormat};
@@ -326,16 +320,18 @@
     if ($original_encoding ne 'a' && lc($format) !~ /^unimarc/o) {
         # If not, we'll make it so
         $_transcode++;
+        substr($ldr,9,1,'a');
+        $record->leader( $ldr );
     }
 
     my @xml = ();
 
-    if ($without_header) {
+    if ($include_full_record_header) {
         push @xml, <<HEADER
 <?xml version="1.0" encoding="$enc"?>
 <record
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd"
+    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
     xmlns="http://www.loc.gov/MARC21/slim">
 HEADER
 
@@ -346,17 +342,17 @@
     push( @xml, "  <leader>" . escape( $record->leader ) . "</leader>" );
 
     foreach my $field ( $record->fields() ) {
-        my $tag = $field->tag();
+        my ($tag) = escape( $field->tag() );
         if ( $field->is_control_field() ) { 
             my $data = $field->data;
             push( @xml, qq(  <controlfield tag="$tag">) .
                     escape( ($_transcode ? marc8_to_utf8($data) : $data) ). qq(</controlfield>) );
         } else {
-            my $i1 = $field->indicator( 1 );
-            my $i2 = $field->indicator( 2 );
+            my ($i1) = escape( $field->indicator( 1 ) );
+            my ($i2) = escape( $field->indicator( 2 ) );
             push( @xml, qq(  <datafield tag="$tag" ind1="$i1" ind2="$i2">) );
             foreach my $subfield ( $field->subfields() ) { 
-                my ( $code, $data ) = @$subfield;
+                my ( $code, $data ) = ( escape( $$subfield[0] ), $$subfield[1] );
                 push( @xml, qq(    <subfield code="$code">).
                         escape( ($_transcode ? marc8_to_utf8($data) : $data) ).qq(</subfield>) );
             }
@@ -422,7 +418,6 @@
 =cut
 
 sub decode { 
-
     my $text; 
     my $location = '';
     my $self = shift;
@@ -430,34 +425,35 @@
     ## see MARC::File::USMARC::decode for explanation of what's going on
     ## here
     if ( ref($self) =~ /^MARC::File/ ) {
-	$location = 'in record '.$self->{recnum};
-	$text = shift;
+        $location = 'in record '.$self->{recnum};
+        $text = shift;
     } else {
-	$location = 'in record 1';
-	$text = $self=~/MARC::File/ ? shift : $self;
+        $location = 'in record 1';
+        $text = $self=~/MARC::File/ ? shift : $self;
     }
 
     my $enc = shift || $_load_args{BinaryEncoding};
     my $format = shift || $_load_args{RecordFormat};
 
-    $parser->{ tagStack } = [];
-    $parser->{ subfields } = [];
-    $parser->{ Handler }{ record } = MARC::Record->new();
+    my $handler = MARC::File::SAX->new();
+    my $parser = $factory->parser(
+        Handler => $handler, 
+        ProtocolEncoding => $_load_args{DefaultEncoding}
+    );
     $parser->{ Handler }{ toMARC8 } = decideMARC8Binary($format,$enc);
 
     $parser->parse_string( $text );
 
-    return( $parser->{ Handler }{ record } );
-    
+    return( $handler->record() );
 }
 
 sub decideMARC8Binary {
-	my $format = shift;
-	my $enc = shift;
-
-	return 0 if (defined($format) && lc($format) =~ /^unimarc/o);
-	return 0 if (defined($enc) && lc($enc) =~ /^utf-?8/o);
-	return 1;
+    my $format = shift;
+    my $enc = shift;
+
+    return 0 if (defined($format) && lc($format) =~ /^unimarc/o);
+    return 0 if (defined($enc) && lc($enc) =~ /^utf-?8/o);
+    return 1;
 }
 
 
@@ -473,7 +469,7 @@
 sub encode {
     my $record = shift;
     my $format = shift || $_load_args{RecordFormat};
-    my $without_header = shift;
+    my $without_collection_header = shift;
     my $enc = shift || $_load_args{DefaultEncoding};
 
     if (lc($format) =~ /^unimarc/o) {
@@ -481,29 +477,33 @@
     }
 
     my @xml = ();
-    push( @xml, header( $enc ) ) unless ($without_header);
-    push( @xml, record( $record, $format, $without_header, $enc ) );
-    push( @xml, footer() ) unless ($without_header);
+    push( @xml, header( $enc ) ) unless ($without_collection_header);
+    # verbose, but naming the header output flags this way to avoid
+    # the potential confusion identified in CPAN bug #34082
+    # http://rt.cpan.org/Public/Bug/Display.html?id=34082
+    my $include_full_record_header = ($without_collection_header) ? 1 : 0;
+    push( @xml, record( $record, $format, $include_full_record_header, $enc ) );
+    push( @xml, footer() ) unless ($without_collection_header);
 
     return( join( "\n", @xml ) );
 }
 
 sub _unimarc_encoding {
-	my $f = shift;
-	my $r = shift;
-
-	my $pos = 26;
-	$pos = 13 if (lc($f) eq 'unimarcauth');
-
-	my $enc = substr( $r->subfield(100 => 'a'), $pos, 2 );
-
-	if ($enc eq '01' || $enc eq '03') {
-		return 'ISO-8859-1';
-	} elsif ($enc eq '50') {
-		return 'UTF-8';
-	} else {
-		die "Unsupported UNIMARC character encoding [$enc] for XML output for $f; 100$a -> " . $r->subfield(100 => 'a');
-	}
+    my $f = shift;
+    my $r = shift;
+
+    my $pos = 26;
+    $pos = 13 if (lc($f) eq 'unimarcauth');
+
+    my $enc = substr( $r->subfield(100 => 'a'), $pos, 2 );
+
+    if ($enc eq '01' || $enc eq '03') {
+        return 'ISO-8859-1';
+    } elsif ($enc eq '50') {
+        return 'UTF-8';
+    } else {
+        die "Unsupported UNIMARC character encoding [$enc] for XML output for $f; 100\$a -> " . $r->subfield(100 => 'a');
+    }
 }
 
 =head1 TODO

Added: trunk/libmarc-xml-perl/t/escape.mrc
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/t/escape.mrc?rev=41024&op=file
==============================================================================
--- trunk/libmarc-xml-perl/t/escape.mrc (added)
+++ trunk/libmarc-xml-perl/t/escape.mrc Thu Jul 30 20:16:42 2009
@@ -1,0 +1,1 @@
+00727nam  2200205 a 4500001001100000005001700011008004100028035002000069050002200089100002200111245005200133250002600185260004200211300002000253650005600273650005600329650005600385949007400441596000600515
03-0016458
19971103184734.0
970701s1997    oru          u000 0 eng u
  a(Sirsi) a351664
00aML270.2b.A6 1997
1 aAnthony, James R.
00aFrench baroque music from Beaujoyeulx to Rameau
  aRev. and expanded ed.
  aPortland, OR :bAmadeus Press,c1997.
  a586 p. :bmusic
 0aMusic<Francey16th centuryxHistory and criticism.
 0aMusiczFrancey17th centuryxHistory and criticism.
 0aMusiczFrancey18th centuryxHistory and criticism.
  aML 270.2 A6 1997wLCi30007006841505rYtBOOKSlHUNT-CIRCmHUNTINGTON
  a1



Modified: trunk/libmarc-xml-perl/t/escape.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libmarc-xml-perl/t/escape.t?rev=41024&op=diff
==============================================================================
--- trunk/libmarc-xml-perl/t/escape.t (original)
+++ trunk/libmarc-xml-perl/t/escape.t Thu Jul 30 20:16:42 2009
@@ -1,6 +1,6 @@
 use strict;
 use warnings;
-use Test::More tests => 10; 
+use Test::More tests => 14; 
 
 # test escaping of < > and & for XML
 
@@ -12,17 +12,36 @@
 
 use_ok( 'MARC::Record' );
 use_ok( 'MARC::Field' );
+use_ok( 'MARC::Batch' );
 
-my $r = MARC::Record->new();
-isa_ok( $r, 'MARC::Record' );
+my $r1 = MARC::Record->new();
+isa_ok( $r1, 'MARC::Record' );
 
-$r->leader( '&xyz<123>' );
-$r->append_fields(
+$r1->leader( '&xyz<123>' );
+$r1->append_fields(
     MARC::Field->new( '005', '&abc<def>' ),
     MARC::Field->new( '245', 0, 1, a => 'Foo&Bar<Baz>' )
 );
 
-my $xml = $r->as_xml();
-like( $xml, qr/&amp;xyz&lt;123&gt;/, 'escaped leader' );
-like( $xml, qr/&amp;abc&lt;def&gt;/, 'escape control field' );
-like( $xml, qr/Foo&amp;Bar&lt;Baz&gt;/, 'escaped field' );
+my $xml1 = $r1->as_xml();
+like( $xml1, qr/&amp;xyz&lt;123&gt;/, 'escaped leader' );
+like( $xml1, qr/&amp;abc&lt;def&gt;/, 'escape control field' );
+like( $xml1, qr/Foo&amp;Bar&lt;Baz&gt;/, 'escaped field' );
+
+# check escaping of subfield labels
+my $b = MARC::Batch->new( 'USMARC', 't/escape.mrc' );
+my $r2 = $b->next();
+is($r2->subfield('650', '<'), 'France', 'read subfield $< parsed from ISO2709 blob');
+my $xml2 = $r2->as_xml();
+my $r3;
+SKIP: {
+    eval { $r3 = MARC::Record->new_from_xml($xml2); };
+    if ($@) {
+        fail('failed to parse MARCXML generated from record containing a subfield $<');
+	skip 'no point in checking further', 1;
+    } else {
+        is($r3->subfield('650', '<'), 'France', 'read subfield $< parsed from MARCXML');
+	is_deeply($r2, $r3, 'record with subfield $< the same parsed from ISO2709 or MARCXML');
+    }
+}
+




More information about the Pkg-perl-cvs-commits mailing list