#!/usr/bin/perl -w use strict; ################## # # search.pl - parseSearch function # # convert HIP v2/v3 XML output from a keyword search into a Perl data structure # # Knocked together by Dave Pattern # If you spot any bugs, please let me know! # ############# # # version 0.01 - last updated 17/May/2006 # ######### # # (cc) 2005 # # http://creativecommons.org/licenses/by-nc-sa/2.5/ # ##### ################## # # THINGS TO BE AWARE OF! # # If a keyword search finds just a single result, then HIP will automatically # return the relevant full bib page. # # If that happens, then the parseSearch function will include the following {error}: # # search returned a full bib page # # ..and {bib} will contain the bib number # # You should check every parseSearch return to see if the above has occurred. If # it has, then you could consider sending the XML output through the parseBib # function so that you can fake a single result page. # ############# use LWP::UserAgent; use XML::Simple; use Data::Dumper; $Data::Dumper::Indent = 2; $Data::Dumper::Sortkeys = 1; ### ENTER ANY HTTP PROXY INFO YOU NEED HERE... my $proxy = ''; # $proxy = 'http://leed-cache-2.server.ntli.net:8080'; my $maxResults = 5; ### EXAMPLE SEARCH URLS # my $url = 'http://webcat.hud.ac.uk/ipac20/ipac.jsp?session=11478H9977A05.1159&menu=search&aspect=subtab33&npp=10&ipp=20&spp=20&profile=cls&ri=8&source=%7E%21horizon&index=.GW&term=amber+spyglass&aspect=subtab33'; my $url = 'http://webcat.hud.ac.uk/ipac20/ipac.jsp?session=11478H9977A05.1159&menu=search&aspect=subtab33&npp=10&ipp=20&spp=20&profile=cls&ri=9&source=%7E%21horizon&index=.GW&term=Pullman%2C+Philip&aspect=subtab33'; ### REPLACE npp VALUE with $maxResults $url =~ s/npp=(\d\d*)/npp=$maxResults/; ### TIDY UP URL AND ADD XML PARAMETER $url =~ s/\#focus//g; if( $url !~ /&GetXML=true/ ) { $url .= '&GetXML=true' } my $ua = LWP::UserAgent->new; $ua->timeout(20); if( $proxy ) { $ua->proxy( ['http'], $proxy ) } my $response = $ua->get( $url ); ### FETCH PAGE unless( $response->is_success ) { die $response->status_line } my $content = $response->content; my $info = parseSearch( \$content ); ### DUMP THE DATA STRUCTURE FOR REFERENCE... open( OUT, ">./dump_output.txt" ); print OUT Dumper($info); close( OUT ); print "Search completed - got "; print $info->{totalHitCount}; print " hits\n\n"; sleep(2); if( $info->{error} ) { print "The following error message was returned by the parser...\n\n\t"; print $info->{error}; if( $info->{bib} ) { print "\n\tBIB# ".$info->{bib}; } print "\n\n"; exit; } ### EXAMPLE TO SHOW A POSSIBLE SORTING BY TITLE... my @sorter = ( ); my $split = ':::'; foreach my $loop ( 1 .. scalar( @{$info->{title}} ) ) { my $offset = $loop -1 ; my $title = ${$info->{title}}[($offset)]; $title = lc( $title ); $title =~ s/^the //; $title =~ s/^an //; $title =~ s/^a //; $title =~ s/\W//g; push @sorter, $title.$split.$offset; } foreach (sort @sorter) { my( $title, $offset ) = split( /$split/ ); print "\nTITLE: ".$info->{title}[$offset]."\n"; my @authors = @{$info->{authors}->[$offset]}; print "AUTHOR(S): ".join( ' / ', @authors )."\n"; if( $info->{isbns}[$offset] ) { print "ISBN: ".$info->{isbns}[$offset]."\n" } } sub parseSearch { my $content = shift; if( ref($content) eq 'SCALAR' ) { $content = $$content } my %ret = ( ); # USE EVAL JUST IN CASE WE GET AN XML PARSING ERROR... my $xml = eval { XMLin( $content, ForceArray => 1 ) }; if( $@ ) { $ret{error} = 'XML parse failed'; $ret{errorText} = $@; $ret{errorType} = 'fatal'; return( \%ret ); } ### DUMP THE XML STRUCTURE FOR REFERENCE... open( OUT, ">./dump_input.txt" ); print OUT Dumper($xml); close( OUT ); my $headerCount = 0; my $bibCount = 0; ### FIRST, WE NEED TO CHECK THAT THE SEARCH DIDN'T GET JUST A SINGLE RESULT (WHICH RETURNS A FULL BIB PAGE) $ret{bib} = $xml->{fullnonmarc}[0]->{searchresults}[0]->{results}[0]->{row}[0]->{key}[0] || ''; if( $ret{bib} ) { $ret{error} = 'search returned a full bib page'; $ret{errorType} = 'warning'; $ret{returnCount} = 0; $ret{totalHitCount} = 0; return( \%ret ); } ### OKAY, WE EITHER GOT ZERO OR TWO OR MORE RESULTS... # NUMBER OF RESULTS { $ret{totalHitCount} = $xml->{summary}[0]->{searchresults}[0]->{hitcount}[0] || 0; } # HEADERS { my $check = $xml->{summary}[0]->{searchresults}[0]->{header}[0]->{col} || [ ]; $headerCount = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $headerCount ) { push @array, $xml->{summary}[0]->{searchresults}[0]->{header}[0]->{col}[($_-1)]->{label}[0]; } $ret{miscHeaders} = \@array; $ret{miscHeaderCount} = $headerCount; } # PARSE RESULTS { my $count = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row} || [ ]; my @allAuthors = ( ); my @allCalls = ( ); my @allTitles = ( ); my @title = ( ); my @allISBNs = ( ); my @allBibs = ( ); my @allPubs = ( ); my @allMisc = ( ); my @allUrls = ( ); my @allPubDates = ( ); my $counter = 0; foreach my $i ( 0 .. scalar(@$count)-1 ) { $counter++; $bibCount++; # AUTHOR(S) { my $check = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{AUTHOR}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{AUTHOR}[0]->{data}[($_-1)]->{text}[0]; if( ref($value) eq 'HASH' ) { $value = ''; } push @array, $value; } push @allAuthors, \@array; } # CALL NUMBER(S) { my $check = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{CALL}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{CALL}[0]->{data}[($_-1)]->{text}[0]; if( ref($value) eq 'HASH' ) { $value = ''; } push @array, $value; } push @allCalls, \@array; } # TITLE(S) { my $check = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{TITLE}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{TITLE}[0]->{data}[0]->{text}[0] || ''; if( ref($value) eq 'HASH' ) { $value = ''; } push @title, $value; foreach ( 1 .. $count ) { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{TITLE}[0]->{data}[($_-1)]->{text}[0] || ''; if( ref($value) eq 'HASH' ) { $value = ''; } push @array, $value; } push @allTitles, \@array; } # PUBLISHER(S) { my $check = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{PUBLISHER}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{PUBLISHER}[0]->{data}[($_-1)]->{text}[0] || ''; if( ref($value) eq 'HASH' ) { $value = ''; } push @array, $value; } push @allPubs, \@array; } # PUBDATE(S) { my $check = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{PUBDATE}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{PUBDATE}[0]->{data}[($_-1)]->{text}[0] || ''; if( ref($value) eq 'HASH' ) { $value = ''; } push @array, $value; } push @allPubDates, \@array; } # 856 URLS(S) { my $check = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{URL856}[0]->{data} || [ ]; my $count = scalar(@$check) || 0; my @array = ( ); foreach ( 1 .. $count ) { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{URL856}[0]->{data}[($_-1)]->{text}[0] || ''; if( ref($value) eq 'HASH' ) { $value = ''; } push @array, $value; } push @allUrls, \@array; } # MISC HEADER DATA { my @array = ( ); foreach ( 1 .. $headerCount ) { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{cell}[($_-1)]->{data}[0]->{text}[0] || ''; if( ref( $value ) eq 'HASH' ) { $value = '' } push @array, $value; } push @allMisc, \@array; } # MISC { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{key}[0] || ''; if( ref( $value ) eq 'HASH' ) { $value = '' } push @allBibs, $value; } # ISBN { my $value = $xml->{summary}[0]->{searchresults}[0]->{results}[0]->{row}[($counter-1)]->{isbn}[0] || ''; if( ref( $value ) eq 'HASH' ) { $value = '' } push @allISBNs, $value; } } $ret{authors} = \@allAuthors; $ret{bibs} = \@allBibs; $ret{urls} = \@allUrls; $ret{calls} = \@allCalls; $ret{isbns} = \@allISBNs; $ret{titles} = \@allTitles; $ret{title} = \@title; $ret{publishers} = \@allPubs; $ret{publishDates} = \@allPubDates; $ret{miscDetails} = \@allMisc; } $ret{returnCount} = $bibCount; return( \%ret ); }