HIP subject keywords

#!/usr/bin/perl -w use strict; use DBI; #### # # (cc) 2005 http://creativecommons.org/licenses/by-nc-sa/2.5/ # # Create a "tag" list of subject keywords to link to your OPAC # # You'll need to tweak the first section of variables to suit: # # $threshold --> the minimum number of total matches for inclusion in the HTML output # $spacing --> a character string to insert after each tag # # $opacUrl --> the link to your OPAC (ideally using the Subject Alphabetical index) # # $outputFile --> file to output the HTML to # # # You'll also need a "config.txt" file to define the boundaries between each text size # # See the following web pages for more info: # # http://www.daveyp.com/blog/index.php/archives/47/ # http://www.daveyp.com/blog/index.php/archives/48/ # # # And finally, you'll need a list of your keywords & number of matching bibs - put this # in a file called "subjects.txt" and it needs to be in a format similar to this: # # 1234 Germany history # 2745 Java # 912 World War II # # ...where the first value is the number of matching bibs, then some whitespace, then # the subject keyword. # # #### my $threshold = 200; my $spacing = ' ...'; my $opacUrl = qq(http://webcat.hud.ac.uk/ipac20/ipac.jsp?index=SUBJECT&term=); my $outputFile = 'subjects2.html'; my $prev = ''; my @row = ( ); my @size = ( ); my @check = ( ); my @colour = ( ); my %subjectCount = ( ); my %real = ( ); my %list = ( ); open( IN, "config.txt" ) || die "unable to open config.txt"; my $range = 0 ; while( ) { s/\t/ /g; s/ */ /g; s/[\r\n]//g; my( $count, $size, $colour ) = split( / / ); $range++; $size[$range] = $size; $check[$range] = $count; $colour[$range] = $colour; } close( IN ); open( IN, "subjects.txt" ) || die "unable to open subjects.txt"; while( my $line = ) { $line =~ s/^\s*//gi; $line =~ s/\s*$//gi; my( $count, $subject ) = split( /\s+?/, $line, 2 ); if( $count < 1 ) { next } my $processed = $subject; $real{$processed} = $subject; $subjectCount{$processed} += $count; if( $list{$processed} ) { $list{$processed} .= " / ".$subject; } else { $list{$processed} .= $subject; } } open( OUTPUT, ">$outputFile" ); print OUTPUT qq(HIP subject keywords\n); foreach my $v ( sort keys %subjectCount ) { if( $subjectCount{$v} < $threshold ) { next } my $x = $subjectCount{$v} - $threshold; my $css = 1; foreach my $l ( 1 .. $range ) { if( $x > $check[$l] ) { $css = $l } } $css = 's'.$css; if( length( $list{$v} ) > 500 ) { $list{$v} = substr($list{$v}, 0, 500).'...' } if( substr( lc($real{$v}), 0, 1 ) ne $prev ) { $prev = substr( lc($real{$v}), 0, 1 ); # print OUTPUT "

\n"; } print OUTPUT qq($real{$v}$spacing\n); } print OUTPUT qq(\n); close( OUTPUT );