#!/usr/bin/perl -w use strict; ################## # # bib.pl - parseBib function # # convert HIP v2/v3 XML output into a Perl data structure # # Knocked together by Dave Pattern # If you spot any bugs, please let me know! # ############# # # version 0.01 - last updated 22/Apr/2006 # ######### # # (cc) 2005 # # http://creativecommons.org/licenses/by-nc-sa/2.5/ # ##### use LWP::UserAgent; use XML::Simple; use Data::Dumper; use Encode; # use HTML::Entities qw(:DEFAULT encode_entities_numeric); # use HTML::Entities::Numbered; $Data::Dumper::Indent = 2; $Data::Dumper::Sortkeys = 1; my $proxy = ''; # $proxy = 'http://leed-cache-2.server.ntli.net:8080'; # BIB URL TO PARSE... my $url = 'http://webcat.hud.ac.uk/ipac20/ipac.jsp?session=TS45720408K82.6491&profile=cls&source=~!horizon&view=items&uri=full=3100001~!26231~!54&ri=9&aspect=subtab33&menu=search&ipp=20&spp=20&staffonly=&term=Fran%C3%A7ois&index=.GW&uindex=&aspect=subtab33&menu=search&ri=9'; $url =~ s/\#focus//g; if( $url !~ /&GetXML=true/ ) { $url .= '&GetXML=true' } my $ua = LWP::UserAgent->new; $ua->timeout(20); if( $proxy ) { $ua->proxy( ['http'], $proxy ) } my $response = $ua->get( $url ); unless( $response->is_success ) { die $response->status_line } my $content = $response->content; my $info = parseBib( \$content ); # DUMP THE DATA STRUCTURE FOR REFERENCE... open( OUT, ">./dump_output.txt" ); print OUT Dumper($info); close( OUT ); print "TITLE = ".$info->{title}."\n"; sub parseBib { my $content = shift; if( ref($content) eq 'SCALAR' ) { $content = $$content } my %ret = ( ); # USE EVAL JUST IN CASE WE GET AN XML PARSING ERROR... my $xml = eval { XMLin( $content, ForceArray => 1 ) }; if( $@ ) { $ret{error} = 'XML parse failed'; $ret{errorText} = $@; $ret{errorType} = 'fatal'; return( \%ret ); } # DUMP THE XML STRUCTURE FOR REFERENCE... open( OUT, ">./dump_input.txt" ); print OUT Dumper($xml); close( OUT ); # ITEM TITLE(S) { my $check = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{TITLE}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); if( $count ) { $ret{title} = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{TITLE}[0]->{data}[0]->{text}[0] || ''; } foreach ( 1 .. $count ) { my $value = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{TITLE}[0]->{data}[($_-1)]->{text}[0] || ''; if( $value && ref( $value ) ne 'HASH' ) { push @array, $value; } } if( @array ) { $ret{titleCount} = scalar(@array); $ret{titles} = \@array; } } # AUTHOR(S) { my $check = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{AUTHOR}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{AUTHOR}[0]->{data}[($_-1)]->{text}[0] || ''; if( $value && ref( $value ) ne 'HASH' ) { push @array, $value } } if( @array ) { $ret{authorCount} = scalar(@array); $ret{authors} = \@array; } else { $ret{authorCount} = 0 } } # SUBJECT(S) { my $check = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{SUBJECT}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{SUBJECT}[0]->{data}[($_-1)]->{text}[0] || ''; if( $value && ref( $value ) ne 'HASH' ) { push @array, $value; } } if( @array ) { $ret{subjectCount} = scalar(@array); $ret{subjects} = \@array; } else { $ret{subjectCount} = 0 } } # PUBLISHER(S) { my $check = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{PUBLISHER}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{PUBLISHER}[0]->{data}[($_-1)]->{text}[0] || ''; if( $value && ref( $value ) ne 'HASH' ) { push @array, convert( $value ); } } if( @array ) { $ret{publisherCount} = scalar(@array); $ret{publishers} = \@array; } else { $ret{publisherCount} = 0 } } # PUBLISH DATE(S) { my $check = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{PUBDATE}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{PUBDATE}[0]->{data}[($_-1)]->{text}[0] || ''; if( $value && ref( $value ) ne 'HASH' ) { push @array, $value; } } if( @array ) { $ret{publishDateCount} = scalar(@array); $ret{publishDates} = \@array; } else { $ret{publishDateCount} = 0 } } # CLASS/CALL NUMBER(S) { my $check = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{CALL}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{CALL}[0]->{data}[($_-1)]->{text}[0] || ''; if( $value && ref( $value ) eq 'HASH' ) { $value = '' } push @array, $value; $ret{call}{$_} = $value; } if( @array ) { $ret{callCount} = $count; $ret{calls} = \@array; } else { $ret{callCount} = 0 } } # 856 URL(S) { my $check = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{URL856}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{URL856}[0]->{data}[($_-1)]->{text}[0] || ''; if( $value && ref( $value ) ne 'HASH' ) { push @array, $value; $ret{url}{$_} = $value; } } if( @array ) { $ret{urlCount} = $count; $ret{urls} = \@array; } else{ $ret{urlCount} = 0 } } # ISBN { $ret{isbn} = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{isbn}[0] || ''; } # BIB NUMBER { $ret{bib} = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{key}[0] || ''; } # BIB HEADERS & CONTENT { my $check = $xml->{fullnonmarc}[0]->{searchresults}[0]->{header}[0]->{col} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); my %dupes = ( ); foreach my $x ( 1 .. $count ) { my $header = $xml->{fullnonmarc}[0]->{searchresults}[0]->{header}[0]->{col}[($x-1)]->{label}[0] || ''; $dupes{$header}++; if( $dupes{$header} > 1 ) { $header .= " {$dupes{$header}}" } push @array, $header; my $check2 = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{cell}[($x-1)]->{data} || [ ]; my $count2 = scalar(@$check2) || 0; my @array2 = ( ); foreach my $y ( 1 .. $count2 ) { my $value = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{cell}[($x-1)]->{data}[($y-1)]->{text}[0] || ''; if( ref( $value ) eq 'HASH' ) { $value = ''; } if( $value ) { push @array2, $value; } } $ret{bibContents}{$header} = \@array2; } $ret{bibHeaderCount} = $count; $ret{bibHeaders} = \@array; } $ret{itemCount} = 0; # ITEM INFO { my $check1 = $xml->{items}[0]->{searchresults}[0]->{header}[0]->{col} || [ ]; my $headCount = scalar(@$check1) || 0; my @headArray = ( ); my @allItemDetails = ( ); my @allItemFixedDetails = ( ); foreach my $x ( 1 .. $headCount ) { my $value = $xml->{items}[0]->{searchresults}[0]->{header}[0]->{col}[($x-1)]->{label}[0] || ''; if( $value && ref( $value ) ne 'HASH' ) { push @headArray, $value; } } if( @headArray ) { $ret{itemHeaderCount} = $headCount; $ret{itemHeaders} = \@headArray; } else { $ret{itemHeaderCount} = 0 } my $check2 = $xml->{items}[0]->{searchresults}[0]->{results}[0]->{row} || [ ]; my $itemCount = scalar(@$check2) || 0; $ret{itemCount} = $itemCount; my @itemNumbers = ( ); foreach my $x ( 1 .. $itemCount ) { push @itemNumbers, $xml->{items}[0]->{searchresults}[0]->{results}[0]->{row}[($x-1)]->{key}[0] || ''; my %i = ( ); $i{RESTRICTIONS} = $xml->{items}[0]->{searchresults}[0]->{results}[0]->{row}[($x-1)]->{RESTRICTIONS}[0]->{data}[0]->{text}[0] || ''; $i{MIDSPINE} = $xml->{items}[0]->{searchresults}[0]->{results}[0]->{row}[($x-1)]->{MIDSPINE}[0]->{data}[0]->{text}[0] || ''; $i{AVAILABLETHRU} = $xml->{items}[0]->{searchresults}[0]->{results}[0]->{row}[($x-1)]->{AVAILABLETHRU}[0]->{data}[0]->{text}[0] || ''; $i{TEMPORARYLOCATION} = $xml->{items}[0]->{searchresults}[0]->{results}[0]->{row}[($x-1)]->{TEMPORARYLOCATION}[0]->{data}[0]->{text}[0] || ''; $i{AVAILABILITYDATE} = $xml->{items}[0]->{searchresults}[0]->{results}[0]->{row}[($x-1)]->{AVAILABILITYDATE}[0]->{data}[0]->{text}[0] || ''; push @allItemFixedDetails, \%i; my @itemArray = ( ); foreach my $y ( 1 .. $headCount ) { my $value = $xml->{items}[0]->{searchresults}[0]->{results}[0]->{row}[($x-1)]->{cell}[($y-1)]->{data}[0]->{text}[0] || ''; if( ref( $value ) eq 'HASH' ) { $value = ''; } push @itemArray, $value; } push @allItemDetails, \@itemArray; } if( @allItemFixedDetails ) { $ret{itemFixedDetails} = \@allItemFixedDetails } if( @allItemDetails ) { $ret{itemDetails} = \@allItemDetails } if( @itemNumbers ) { $ret{itemNumbers} = \@itemNumbers } } # SERIALS INFO { my $check1 = $xml->{subscriptionsummary}[0]->{header}[0]->{col} || [ ]; my $headCount1 = scalar(@$check1) || 0; my @headArray1 = ( ); my @serialCopyDetails = ( ); foreach my $x ( 1 .. $headCount1 ) { push @headArray1, $xml->{subscriptionsummary}[0]->{header}[0]->{col}[($x-1)]->{label}[0] || ''; } if( @headArray1 ) { $ret{serialHeaders} = \@headArray1 } my $check3 = $xml->{subscriptionsummary}[0]->{serial} || [ ]; my $itemCount = scalar(@$check3) || 0; $ret{serialCount} = $itemCount; if( $itemCount ) { $ret{serialCopyDetailCount} = $itemCount } my @serialCopyKeys = ( ); my @serialCopyLocs = ( ); $ret{serialCopyNotes} = 0; my @allNotesTop = ( ); my @allTextsTop = ( ); my @allLabelTop = ( ); foreach my $x ( 1 .. $itemCount ) { push @serialCopyKeys, $xml->{subscriptionsummary}[0]->{serial}[($x-1)]->{copykey}[0] || ''; push @serialCopyLocs, $xml->{subscriptionsummary}[0]->{serial}[($x-1)]->{location}[0] || ''; my @copyArray = ( ); foreach my $y ( 1 .. $headCount1 ) { my $value = $xml->{subscriptionsummary}[0]->{serial}[($x-1)]->{copy}[0]->{cell}[($y-1)]->{data}[0]->{text}[0] || ''; if( ref( $value ) eq 'HASH' ) { $value = '' } push @copyArray, $value; } my $check4 = $xml->{subscriptionsummary}[0]->{serial}[($x-1)]->{runlist}[0]->{run} || [ ]; my $itemCount4 = scalar(@$check4) || 0; my @allNotes = ( ); my @allTexts = ( ); my @allLabel = ( ); foreach my $y ( 1 .. $itemCount4 ) { my @runDetails = ( ); my @runNotes = ( ); my $check5 = $xml->{subscriptionsummary}[0]->{serial}[($x-1)]->{runlist}[0]->{run}[($y-1)]->{data}[0]->{rundata} || [ ]; my $itemCount5 = scalar(@$check5) || 0; push @allLabel, $xml->{subscriptionsummary}[0]->{serial}[($x-1)]->{runlist}[0]->{run}[($y-1)]->{runlabel}[0] || ''; my @notes = ( ); my @texts = ( ); foreach my $z ( 1 .. $itemCount5 ) { my $value = $xml->{subscriptionsummary}[0]->{serial}[($x-1)]->{runlist}[0]->{run}[($y-1)]->{data}[0]->{rundata}[($z-1)]->{text}[0] || ''; my $notes = $xml->{subscriptionsummary}[0]->{serial}[($x-1)]->{runlist}[0]->{run}[($y-1)]->{data}[0]->{rundata}[($z-1)]->{note}[0] || ''; if( ref( $value ) eq 'HASH' ) { $value = '' } if( ref( $notes ) eq 'HASH' ) { $notes = '' } push @runDetails, $value; push @runNotes, $notes; push @notes, $notes; push @texts, $value; } push @allNotes, \@notes; push @allTexts, \@texts; if( @runNotes ) { $ret{serialCopyNotes} = 1 } } push @allNotesTop, \@allNotes; push @allTextsTop, \@allTexts; push @allLabelTop, \@allLabel; push @serialCopyDetails, \@copyArray; } if( @allNotesTop ) { $ret{serialCopyRunNotes} = \@allNotesTop } if( @allTextsTop ) { $ret{serialCopyRunDetails} = \@allTextsTop } if( @allLabelTop ) { $ret{serialCopyRunLabels} = \@allLabelTop } if( @serialCopyKeys ) { $ret{serialCopyKeys} = \@serialCopyKeys } if( @serialCopyLocs ) { $ret{serialCopyLocations} = \@serialCopyLocs } if( @serialCopyDetails ) { $ret{serialDetails} = \@serialCopyDetails } } # CHECK WE'VE FOUND A BIB! unless( $ret{bib} ) { $ret{error} = 'no bib number found'; $ret{errorType} = 'warning'; } return( \%ret ); } sub convert { my $str = shift; return( Encode::encode_utf8( $str ) ); }