#!/usr/bin/perl -w
use strict;
use DBI;
####
#
# (cc) 2005 http://creativecommons.org/licenses/by-nc-sa/2.5/
#
# Create a "tag" list of subject keywords to link to your OPAC
#
# You'll need to tweak the first section of variables to suit:
#
# $threshold --> the minimum number of total matches for inclusion in the HTML output
# $spacing --> a character string to insert after each tag
#
# $opacUrl --> the link to your OPAC (ideally using the Subject Alphabetical index)
#
# $outputFile --> file to output the HTML to
#
#
# You'll also need a "config.txt" file to define the boundaries between each text size
#
# See the following web pages for more info:
#
# http://www.daveyp.com/blog/index.php/archives/47/
# http://www.daveyp.com/blog/index.php/archives/48/
#
#
# And finally, you'll need a list of your keywords & number of matching bibs - put this
# in a file called "subjects.txt" and it needs to be in a format similar to this:
#
# 1234 Germany history
# 2745 Java
# 912 World War II
#
# ...where the first value is the number of matching bibs, then some whitespace, then
# the subject keyword.
#
#
####
my $threshold = 200;
my $spacing = ' ...';
my $opacUrl = qq(http://webcat.hud.ac.uk/ipac20/ipac.jsp?index=SUBJECT&term=);
my $outputFile = 'subjects2.html';
my $prev = '';
my @row = ( );
my @size = ( );
my @check = ( );
my @colour = ( );
my %subjectCount = ( );
my %real = ( );
my %list = ( );
open( IN, "config.txt" ) || die "unable to open config.txt";
my $range = 0 ;
while( )
{
s/\t/ /g;
s/ */ /g;
s/[\r\n]//g;
my( $count, $size, $colour ) = split( / / );
$range++;
$size[$range] = $size;
$check[$range] = $count;
$colour[$range] = $colour;
}
close( IN );
open( IN, "subjects.txt" ) || die "unable to open subjects.txt";
while( my $line = )
{
$line =~ s/^\s*//gi;
$line =~ s/\s*$//gi;
my( $count, $subject ) = split( /\s+?/, $line, 2 );
if( $count < 1 ) { next }
my $processed = $subject;
$real{$processed} = $subject;
$subjectCount{$processed} += $count;
if( $list{$processed} ) { $list{$processed} .= " / ".$subject; }
else { $list{$processed} .= $subject; }
}
open( OUTPUT, ">$outputFile" );
print OUTPUT qq(HIP subject keywords\n);
foreach my $v ( sort keys %subjectCount )
{
if( $subjectCount{$v} < $threshold ) { next }
my $x = $subjectCount{$v} - $threshold;
my $css = 1;
foreach my $l ( 1 .. $range )
{
if( $x > $check[$l] ) { $css = $l }
}
$css = 's'.$css;
if( length( $list{$v} ) > 500 ) { $list{$v} = substr($list{$v}, 0, 500).'...' }
if( substr( lc($real{$v}), 0, 1 ) ne $prev )
{
$prev = substr( lc($real{$v}), 0, 1 );
# print OUTPUT "\n";
}
print OUTPUT qq($real{$v}$spacing\n);
}
print OUTPUT qq(\n);
close( OUTPUT );