#! /usr/bin/perl -w
# Standalone version of generator generator
# http://www.toothycat.net/wiki/wiki.pl?MoonShadow/GeneratorGenerator
# Usage: bnf.pl page=/path/to/file [debug=1] [seed=random-seed]
use strict;
use CGI; # for parsing command line parameters
use FileHandle;
sub FetchPageText
{
my ($filename) = @_;
(-f $filename) or die( "$filename not found.\n");
my $fh = new FileHandle( '< ' . $filename );
my $text = join('', <$fh>);
$fh->close;
return $text;
}
# defaults for mutable options
my %options =
(
'spaces' => 1, # put spaces between elements
'debug' => 0 # generate debug trace
);
my %includes = ( );
# bnf syntax
# name ::= |-separated-list of list of name or string or "string"
# root tag is bnf
# implemented as hash of array of array
my %tags = ( );
my $validtag = qw/(?:(?:(?:\<)[A-Za-z0-9_-]+?(?:\>))|(?:[A-Za-z0-9_-]+))/;
my $commandchar = '!';
# commandchar must be a character that's not legal at the start of tag names (i.e. not present in $validtag)
sub parseBnfLine
{
my ($line) = @_;
$line =~ /^ *($validtag) *::= *(.*) *$/ or return;
my $name = $1;
$line = $2;
if($name eq 'option')
{
if($line =~ /([A-Za-z_]+) *= *([^ ]+)/)
{
$options{$1} = $2;
}
}
my $sequences = [ ];
my $curr_sequence = [ ];
while(length $line > 0)
{
$line =~ s/^ *//; # strip leading spaces
if($line =~ /^(\".*?[^\\]\")(.*)/)
{
# we have a quoted string as the next symbol
my $symbol = $1;
$line = $2;
$symbol =~ s/\\(.)/$1/g;
push @$curr_sequence, $symbol;
}
else
{
my $symbol = $line;
if($line =~ /^([^ ]+) (.*)$/)
{
$symbol = $1;
$line = $2;
}
else
{
$line = '';
}
$symbol =~ s/\\(.)/$1/g;
if($symbol eq "|")
{
if($#$curr_sequence >= 0)
{
push @$sequences, $curr_sequence;
$curr_sequence = [ ];
}
}
else
{
if($symbol =~ /^$commandchar/)
{
# they're starting a symbol with an unquoted command char
# we can't have that: quote it
# we know there's no bad effects to this because commandchar
# is not legal in tag names
$symbol = '"' . $symbol . '"';
}
push @$curr_sequence, $symbol;
}
}
}
if($#$curr_sequence >= 0)
{
push @$sequences, $curr_sequence;
}
$tags{$name} = $sequences;
}
sub dumpBNF
{
foreach my $k (keys %tags)
{
print "$k ::= ";
my $i = -1;
while(++$i < $#{$tags{$k}})
{
my $seq = $tags{$k}->[$i];
foreach my $item(@$seq)
{
my $titem = $item;
if($titem =~ / /)
{
$titem = '"' . $titem . '"';
}
print "$titem ";
}
print "| ";
}
{
my $seq = $tags{$k}->[$#{$tags{$k}}];
foreach my $item(@$seq)
{
my $titem = $item;
if($titem =~ / /)
{
$titem = '"' . $titem . '"';
}
print "$titem ";
}
}
# print "
\n";
print "\n";
}
# print "
\n";
print "\n";
}
sub resolveConcats
{
my ($tag) = @_;
while($tag =~ /^($validtag)\#\#($validtag)(.*?)$/)
{
my $root = $1;
my $ccat = $2;
my $rest = '';
if(defined($3))
{
$rest = $3;
}
#$options{'debug'} and print 'Concatenation: '. $tag .
$options{'debug'} and print 'Concatenation: '. $tag .
#' (' . $root . ',' . $ccat . ',' . $rest . ')' .
' --> ';
if(
defined($tags{$ccat}) &&
($#{$tags{$ccat}} == 0) &&
($#{$tags{$ccat}->[0]} == 0) &&
($tags{$ccat}->[0][0] =~/^$validtag$/)
)
{
$tag = $root . '_' . $tags{$ccat}->[0][0] . $rest;
}
else
{
$tag = $root . '_' . $ccat . $rest;
}
$options{'debug'} and print $tag."\n";
}
return $tag;
}
sub generateString
{
my @results = ( '' );
my @unresolved = ( "bnf" );
my $iter = 0;
while($#unresolved >= 0)
{
my $tag = shift @unresolved;
my $silent = '';
my $iterresult = [ ];
my @postassign = ( );
# -------- note silent assigns and postfix assigns
if($tag =~ /^\<($validtag(?:\#\#$validtag)*)::=($validtag(?:\#\#$validtag)*)\>$/)
{
# silent assign
$silent = resolveConcats($1);
$tag = $2;
}
elsif($tag =~ /^\<($validtag(?:\#\#$validtag)*)::=("[^"]*")\>$/)
{
# quoted silent assign
$silent = resolveConcats($1);
$tag = $2;
}
elsif($tag =~ /^\<\<($validtag(?:\#\#$validtag)*)::=($validtag(?:\#\#$validtag)*)\>\>$/)
{
# deep silent assign
$options{'debug'} and print "Start deep calculation of $2 to assign to $1\n";
my $ctag = $commandchar . 'deepname' . ' ' . $1;
unshift @unresolved, $ctag;
unshift @results, '';
$tag = $2;
}
$tag =~ /^".*"$/ or $tag = resolveConcats($tag);
# do we have any postfix assigns?
while($tag =~ /^($validtag){($validtag(?:\#\#$validtag)*)}(.*)$/ or $tag =~ /^($validtag){{($validtag(?:\#\#$validtag)*)}}(.*)$/)
{
if ($tag =~ /^($validtag){($validtag(?:\#\#$validtag)*)}(.*)$/)
{
# shallow postfix assign
push @postassign, $2;
$tag = $1;
defined($3) and $tag = $tag . $3;
}
elsif ($tag =~ /^($validtag){{($validtag(?:\#\#$validtag)*)}}(.*)$/)
{
# deep postfix assign
# because we only want to output to one result at once, convert this
# from foo{{bar}} to <> bar
$options{'debug'} and print "Start deep calculation of $1 to assign to $2\n";
unshift @unresolved, $2; # prepend a bar
my $ctag = $commandchar . 'deepname' . ' ' . $2;
unshift @unresolved, $ctag; # then prepend the command to assign to bar
unshift @results, '';
$tag = $1;
defined($3) and $tag = $tag . $3;
}
}
# -------- Resolve the tag
$iter++;
$options{'debug'} and print "Iteration $iter: ";
if ( $tag =~ /^($commandchar)/ )
{
# this is a command
#print "Found command '$tag'! ";
if ( $tag =~ /^($commandchar)deepname (.*)$/ )
{
# we've completed a deep assign
my $deepassignvar = shift(@results);
# all the tags come out of a deep assign flattened into a string,
# so we have to wrap them twice in []s to get the required array of arrays
$tags{$2} = [ [ $deepassignvar ] ];
if($options{'debug'})
{
print 'Deep assign: (' . $2 . '::=' . $deepassignvar . ")\n";
}
}
}
elsif( $tags{$tag} )
{
# this is a nonterminal:
# select a possible sequence at random
my $index = int( rand( $#{$tags{$tag}} + 1 ) );
defined $index or $index = 0;
# add the items in the sequence to the unresolved list
if($options{'debug'})
{
print "Lookup: $tag ::=";
foreach my $item(@{${$tags{$tag}}[$index]})
{
print " $item";
}
print "\n";
}
foreach my $item(reverse @{${$tags{$tag}}[$index]})
{
if($silent eq '')
{
unshift @unresolved, $item;
}
unshift @$iterresult, $item;
}
}
else
{
# this is a terminal
my $ttag = $tag;
$ttag =~ s/^"(.*)"$/$1/;
if($silent eq '')
{
if( ((length($results[0])>0) && (length($ttag)>0) && ($options{'spaces'})) )
{
$results[0] .= " ";
}
$results[0] .= $ttag;
}
push @$iterresult, $tag;
}
if($silent ne '')
{
# we're inside a silent assign
if($options{'debug'})
{
print 'Silent assign: (' . $silent . '::=' ;
foreach my $item(@$iterresult )
{
print $item;
}
print ")\n";
}
$tags{$silent} = [ $iterresult ];
}
# else
{
# we're not inside a silent assign: perform any postfix assigns
foreach my $item(@postassign)
{
if($options{'debug'})
{
print 'Postfix assign: {' . $item . '::=' ;
foreach my $i(@$iterresult )
{
print $i;
}
print "}\n";
}
$tags{$item} = [ $iterresult ];
}
}
}
return $results[0];
}
sub main()
{
srand();
my $seed = int(rand()*(1<<31));
my $q = new CGI;
my $s = $q->param('seed');
if(defined($s) && ($s =~ /([0-9-]+)/))
{
$seed = $1;
}
print STDERR ''."\n";
srand($seed);
if(defined($q->param('page')))
{
my @text = split(/[\r\n]/, FetchPageText($q->param('page')));
while($#text>-1)
{
my $item = shift @text;
$item =~ /[^ ]/ or next;
$options{'include'} = 0;
&parseBnfLine($item);
if($options{'include'} && !defined($includes{$options{'include'}}))
{
my @newtext = split(/[\r\n]/,
FetchPageText($options{'include'}));
@text = (@newtext, @text);
$includes{$options{'include'}} = 1;
}
}
if(defined($q->param('debug')))
{
$options{'debug'} = 1;
}
if(defined $tags{"bnf"}) # success
{
$options{'debug'} and print 'Here, in no particular order, are the rules I found:'."\n";
$options{'debug'} and &dumpBNF();
$options{'debug'} and print "\n";
$options{'debug'} and print 'This is what I did with them:'."\n";
my $result = &generateString();
$options{'debug'} and print "\n";
$options{'debug'} and print '..and here\'s what I got:'."\n";
print $result;
}
else
{
print 'No "bnf" tag was found in the file.';
print "Usage: bnf.pl page=/path/to/file [debug=1] [seed=random-seed]\n";
}
}
else
{
print "Usage: bnf.pl page=/path/to/file [debug=1] [seed=random-seed]\n";
}
}
&main();