#! /usr/bin/perl -w package Search; $VERSION = '1.0'; use strict; use FileHandle; use Exporter; use SafeIO; our @ISA = qw(Exporter); our @EXPORT=qw(Fetch Update Delete); # # indexing back end # assumes db and pagename have been vetted for filesystem # ------------------------------------ my $forwardindex = ".forward"; my $reverseindex = ".reverse"; # ------------------------------------ # functions for pointers to sorted arrays sub binarychop { my ($records, $target) = @_; my $bottom = 0; my $top = $#$records; my $i = int(($top + $bottom) / 2); #print "? $target "; while ($top > $bottom) { my $o = $records->[$i] cmp $target; if($o == -1) { $bottom = $i + 1; # $records->[$i] < $target } elsif ($o == 0) { #print "$i\n"; return $i; } elsif ($o == 1) { $top = $i - 1; # $records->[$i] > $target } $i = int(($top + $bottom) / 2); } if ((defined $records->[$i]) && ($records->[$i] eq $target)) { #print "$i\n"; return $i; } #print "!\n"; return -1; } sub remove { my ($records, $target) = @_; my $i = &binarychop ($$records, $target); #print "- $target"; if ($i > -1) { #print "\n"; splice( @$$records, $i, 1 ); return 1; } #print "!\n"; return 0; } sub add { my ($records, $target) = @_; #print "+ $target"; if(&binarychop ($$records, $target) < 0) { push @$$records, $target; my @tmp = sort (@$$records); $$records = \@tmp; #print "\n"; return 1; } #print "!\n"; return 0; } # ------------------------------------ sub validate { my ($pagename) = @_; #print length $pagename; defined $pagename or return undef; (length($pagename)<127) or return undef; $pagename =~ s/[\\\;\:\&\|\`\(\)\[\]\{\}\%\$\<\>]//g; return $pagename; } # fetch list of pages that contain a link to pagename sub Fetch { my ($db, $pagename) = @_; defined($pagename = &validate($pagename)) or return [ ]; my $reverse = ReadFile( $db.$pagename.$reverseindex, [ ] ); return $reverse; } # delete pagename's entry sub Delete { my ($db, $pagename) = @_; defined($pagename = &validate($pagename)) or return [ ]; ObtainLock( $db.".updatelock" ); # fetch list of pages pagename links to my $forward = ReadFile( $db.$pagename.$forwardindex, [ ] ); # ..it no longer links to them, so delete its entry from their reverse indices foreach my $page ( @$forward ) { defined($page = &validate($page)) or next; &ensuredir( $db, $page ); my $record = ReadFile( $db.$page.$reverseindex, [ ] ); if( &remove(\$record, $pagename ) ) { WriteFile( $db.$page.$reverseindex, $record ); } } #..and delete the page entries themselves -f $db.$pagename.$forwardindex and unlink $db.$pagename.$forwardindex; -f $db.$pagename.$reverseindex and unlink $db.$pagename.$reverseindex; ReleaseLock( $db.".updatelock" ); } sub ensuredir { my ($db, $pagename) = @_; my $result = 0; if ($pagename =~ /^(.+)\//) { $result = $1; if ( ! -d $db.$1 ) { mkdir $db.$1 or ErrorExit ("Couldn't create directory $1.\n"); } } return $result; } sub Add { my ($db, $pagename, $entries) = @_; my @reverse = ( ); ObtainLock( $db.".updatelock" ); { my @tmp = sort @$entries; $entries = \@tmp; } if (my $base = &ensuredir($db, $pagename)) { @reverse = ( $base ); &add(\$entries, $base); } # $entries contains forward index. Write it, and update other pages' reverse indices. WriteFile( $db.$pagename.$forwardindex, $entries ); foreach my $page ( @$entries ) { defined($page = &validate($page)) or next; &ensuredir($db, $page); my $record = ReadFile( $db.$page.$reverseindex, [ ] ); if( &add(\$record, $pagename ) ) { WriteFile( $db.$page.$reverseindex, $record ); } } my $recs = ReadFile( $db.$pagename.$reverseindex, \@reverse ); if ($#reverse == 0) { &add( \$recs, $reverse[0] ); } WriteFile( $db.$pagename.$reverseindex, $recs ); ReleaseLock( $db.".updatelock" ); } sub Update { my ($db, $pagename, $entries) = @_; defined($pagename = &validate($pagename)) or return [ ]; # do my entries already exist? if ( -f $db.$pagename.$forwardindex and -f $db.$pagename.$reverseindex ) { ObtainLock( $db.".updatelock" ); my @entries = sort @$entries; $entries = \@entries; if (my $base = &ensuredir($db, $pagename)) { &add(\$entries, $base); } if ( -d $db.$pagename ) { # each page is considered to be a backlink of all its subpages my @subpages = glob( $db.$pagename."/*".$forwardindex ); foreach my $subpage(@subpages) { $subpage =~ /^$db\/(.+)$forwardindex/ or next; $subpage = $1; add (\$entries, $subpage); } } # fetch my old forward index my $oldentries = ReadFile( $db.$pagename.$forwardindex, [ ] ); # generate intersections my @added = ( ); my @removed = ( ); my $i = 0; my $j = 0; while ( ( $i <= $#$entries ) && ( $j <= $#$oldentries ) ) { my $diff = ( $entries->[$i] cmp $oldentries->[$j] ); if($diff == 0) { # no diff $i++; $j++; } elsif ($diff == -1) { # new < old => entry added #print "added: $entries->[$i]\n"; push @added, $entries->[$i++]; } else { # old < new => entry removed #print "removed: $oldentries->[$j]\n"; push @removed, $oldentries->[$j++]; } } # finish up while ( $i <= $#$entries ) { #print "added: $entries->[$i]\n"; push @added, $entries->[$i++]; } while ( $j <= $#$oldentries ) { #print "removed: $oldentries->[$j]\n"; push @removed, $oldentries->[$j++]; } # carry out updates foreach my $page ( @added ) { defined($page = &validate($page)) or next; &ensuredir( $db, $page ); my $record = ReadFile( $db.$page.$reverseindex, [ ] ); if( &add(\$record, $pagename ) ) { WriteFile( $db.$page.$reverseindex, $record ); } } foreach my $page ( @removed ) { defined($page = &validate($page)) or next; &ensuredir( $db, $page ); my $record = ReadFile( $db.$page.$reverseindex, [ ] ); if( &remove(\$record, $pagename ) ) { &ensuredir( $db, $page ); WriteFile( $db.$page.$reverseindex, $record ); } } # write the new forward index; reverse index still valid WriteFile( $db.$pagename.$forwardindex, $entries ); ReleaseLock( $db.".updatelock" ); } else { &Add( $db, $pagename, $entries ); } } # ------------------------------------ 1;