#! /usr/bin/perl -w
use strict;
use CGI;
use PageFetch;
# hard options
my $maxiter = 16000; # maximum number of tags to resolve before giving up
my $maxlen = 64000; # maximum length of generated string
# defaults for mutable options
my %options =
(
'spaces' => 1, # put spaces between elements
'debug' => 0 # generate debug trace
);
my %includes = ( );
# bnf syntax
# name ::= |-separated-list of list of name or string or "string"
# root tag is bnf
# implemented as hash of array of array
my %tags = ( );
my $validtag = qw/(?:(?:(?:<)[A-Za-z0-9_-]+?(?:>))|(?:[A-Za-z0-9_-]+))/;
sub parseBnfLine
{
my ($line) = @_;
$line =~ /^ *($validtag) *::= *(.*) *$/ or return;
my $name = $1;
$line = $2;
if($name eq 'option')
{
if($line =~ /([A-Za-z_]+) *= *([^ ]+)/)
{
$options{$1} = $2;
}
}
my $sequences = [ ];
my $curr_sequence = [ ];
while(length $line > 0)
{
$line =~ s/^ *//; # strip leading spaces
if($line =~ /^(\".*?[^\\]\")(.*)/)
{
my $symbol = $1;
$line = $2;
$symbol =~ s/\\(.)/$1/g;
push @$curr_sequence, $symbol;
}
else
{
my $symbol = $line;
if($line =~ /^([^ ]+) (.*)$/)
{
$symbol = $1;
$line = $2;
}
else
{
$line = '';
}
$symbol =~ s/\\(.)/$1/g;
if($symbol eq "|")
{
if($#$curr_sequence >= 0)
{
push @$sequences, $curr_sequence;
$curr_sequence = [ ];
}
}
else
{
push @$curr_sequence, $symbol;
}
}
}
if($#$curr_sequence >= 0)
{
push @$sequences, $curr_sequence;
}
$tags{$name} = $sequences;
}
sub dumpBNF
{
foreach my $k (keys %tags)
{
print "$k ::= ";
my $i = -1;
while(++$i < $#{$tags{$k}})
{
my $seq = $tags{$k}->[$i];
foreach my $item(@$seq)
{
my $titem = $item;
if($titem =~ / /)
{
$titem = '"' . $titem . '"';
}
print "$titem ";
}
print "| ";
}
{
my $seq = $tags{$k}->[$#{$tags{$k}}];
foreach my $item(@$seq)
{
my $titem = $item;
if($titem =~ / /)
{
$titem = '"' . $titem . '"';
}
print "$titem ";
}
}
print "
\n";
}
print "
\n";
}
sub resolveConcats
{
my ($tag) = @_;
while($tag =~ /^($validtag)\#\#($validtag)(.*?)$/)
{
my $root = $1;
my $ccat = $2;
my $rest = '';
if(defined($3))
{
$rest = $3;
}
$options{'debug'} and print 'Concatenation: '. $tag .
#' (' . $root . ',' . $ccat . ',' . $rest . ')' .
' --> ';
if(
defined($tags{$ccat}) &&
($#{$tags{$ccat}} == 0) &&
($#{$tags{$ccat}->[0]} == 0) &&
($tags{$ccat}->[0][0] =~/^$validtag$/)
)
{
$tag = $root . '_' . $tags{$ccat}->[0][0] . $rest;
}
else
{
$tag = $root . '_' . $ccat . $rest;
}
$options{'debug'} and print $tag;
$options{'debug'} and print "
\n";
}
return $tag;
}
sub generateString
{
my $result = "";
my @unresolved = ( "bnf" );
my $iter = 0;
while($#unresolved >= 0)
{
my $tag = shift @unresolved;
my $silent = '';
my $iterresult = [ ];
my @postassign = ( );
# --------
if($tag =~ /^<($validtag(?:\#\#$validtag)*)::=($validtag(?:\#\#$validtag)*)>$/)
{
$silent = resolveConcats($1);
$tag = $2;
}
elsif($tag =~ /^<($validtag(?:\#\#$validtag)*)::=("[^"]*")>$/)
{
$silent = resolveConcats($1);
$tag = $2;
}
$tag =~ /^".*"$/ or $tag = resolveConcats($tag);
while($tag =~ /^($validtag){($validtag(?:\#\#$validtag)*)}(.*)$/)
{
push @postassign, $2;
$tag = $1;
defined($3) and $tag = $tag . $3;
}
# --------
if (($iter++ > $maxiter) || (length($result) > $maxlen))
{
return $result . "
(...)";
}
$options{'debug'} and print "Iteration $iter: ";
if( $tags{$tag} )
{
# select a possible sequence at random
my $index = int( rand( $#{$tags{$tag}} + 1 ) );
defined $index or $index = 0;
# add the items in the sequence to the unresolved list
if($options{'debug'})
{
print "Lookup: $tag ::=";
foreach my $item(@{${$tags{$tag}}[$index]})
{
print " $item";
}
print "
\n";
}
foreach my $item(reverse @{${$tags{$tag}}[$index]})
{
if($silent eq '')
{
unshift @unresolved, $item;
}
unshift @$iterresult, $item;
}
}
else
{
my $ttag = $tag;
$ttag =~ s/^"(.*)"$/$1/;
if($silent eq '')
{
if( ((length($result)>0) && (length($ttag)>0) && ($options{'spaces'})) )
{
$result .= " ";
}
$result .= $ttag;
}
push @$iterresult, $tag;
}
if($silent ne '')
{
if($options{'debug'})
{
print 'Silent assign: (' . $silent . '::=' ;
foreach my $item(@$iterresult )
{
print $item;
}
print ")
\n";
}
$tags{$silent} = [ $iterresult ];
}
# else
{
foreach my $item(@postassign)
{
if($options{'debug'})
{
print 'Postfix assign: {' . $item . '::=' ;
foreach my $i(@$iterresult )
{
print $i;
}
print "}
\n";
}
$tags{$item} = [ $iterresult ];
}
}
}
return $result;
}
sub WikiLink
{
my ($name) = @_;
return '' . $name . '';
}
sub main()
{
srand();
my $seed = int(rand()*(1<<31));
my $q = new CGI;
print $q->header, $q->start_html("BNF generator");
my $s = $q->param('seed');
if(defined($s) && ($s =~ /([0-9-]+)/))
{
$seed = $1;
}
print ''."\n";
srand($seed);
if(defined($q->param('page')))
{
my @text = split(/[\r\n]/, FetchPageText('/home/sham/root/wiki/data',
$q->param('page')));
while($#text>-1)
{
my $item = shift @text;
$item =~ /[^ ]/ or next;
$options{'include'} = 0;
&parseBnfLine($item);
if($options{'include'} && !defined($includes{$options{'include'}}))
{
my @newtext = split(/[\r\n]/,
FetchPageText('/home/sham/root/wiki/data', $options{'include'}));
@text = (@newtext, @text);
$includes{$options{'include'}} = 1;
}
}
if(defined($q->param('debug')))
{
$options{'debug'} = 1;
}
if(defined $tags{"bnf"}) # success
{
$options{'debug'} and print 'Here, in no particular order, are the rules I found:
';
$options{'debug'} and &dumpBNF();
$options{'debug'} and print '