#! /usr/bin/perl -w # Standalone version of generator generator # http://www.toothycat.net/wiki/wiki.pl?MoonShadow/GeneratorGenerator # Usage: bnf.pl page=/path/to/file [debug=1] [seed=random-seed] use strict; use CGI; # for parsing command line parameters use FileHandle; sub FetchPageText { my ($filename) = @_; (-f $filename) or die( "$filename not found.\n"); my $fh = new FileHandle( '< ' . $filename ); my $text = join('', <$fh>); $fh->close; return $text; } # defaults for mutable options my %options = ( 'spaces' => 1, # put spaces between elements 'debug' => 0 # generate debug trace ); my %includes = ( ); # bnf syntax # name ::= |-separated-list of list of name or string or "string" # root tag is bnf # implemented as hash of array of array my %tags = ( ); my $validtag = qw/(?:(?:(?:\<)[A-Za-z0-9_-]+?(?:\>))|(?:[A-Za-z0-9_-]+))/; my $commandchar = '!'; # commandchar must be a character that's not legal at the start of tag names (i.e. not present in $validtag) sub parseBnfLine { my ($line) = @_; $line =~ /^ *($validtag) *::= *(.*) *$/ or return; my $name = $1; $line = $2; if($name eq 'option') { if($line =~ /([A-Za-z_]+) *= *([^ ]+)/) { $options{$1} = $2; } } my $sequences = [ ]; my $curr_sequence = [ ]; while(length $line > 0) { $line =~ s/^ *//; # strip leading spaces if($line =~ /^(\".*?[^\\]\")(.*)/) { # we have a quoted string as the next symbol my $symbol = $1; $line = $2; $symbol =~ s/\\(.)/$1/g; push @$curr_sequence, $symbol; } else { my $symbol = $line; if($line =~ /^([^ ]+) (.*)$/) { $symbol = $1; $line = $2; } else { $line = ''; } $symbol =~ s/\\(.)/$1/g; if($symbol eq "|") { if($#$curr_sequence >= 0) { push @$sequences, $curr_sequence; $curr_sequence = [ ]; } } else { if($symbol =~ /^$commandchar/) { # they're starting a symbol with an unquoted command char # we can't have that: quote it # we know there's no bad effects to this because commandchar # is not legal in tag names $symbol = '"' . $symbol . '"'; } push @$curr_sequence, $symbol; } } } if($#$curr_sequence >= 0) { push @$sequences, $curr_sequence; } $tags{$name} = $sequences; } sub dumpBNF { foreach my $k (keys %tags) { print "$k ::= "; my $i = -1; while(++$i < $#{$tags{$k}}) { my $seq = $tags{$k}->[$i]; foreach my $item(@$seq) { my $titem = $item; if($titem =~ / /) { $titem = '"' . $titem . '"'; } print "$titem "; } print "| "; } { my $seq = $tags{$k}->[$#{$tags{$k}}]; foreach my $item(@$seq) { my $titem = $item; if($titem =~ / /) { $titem = '"' . $titem . '"'; } print "$titem "; } } # print "
\n"; print "\n"; } # print "
\n"; print "\n"; } sub resolveConcats { my ($tag) = @_; while($tag =~ /^($validtag)\#\#($validtag)(.*?)$/) { my $root = $1; my $ccat = $2; my $rest = ''; if(defined($3)) { $rest = $3; } #$options{'debug'} and print 'Concatenation: '. $tag . $options{'debug'} and print 'Concatenation: '. $tag . #' (' . $root . ',' . $ccat . ',' . $rest . ')' . ' --> '; if( defined($tags{$ccat}) && ($#{$tags{$ccat}} == 0) && ($#{$tags{$ccat}->[0]} == 0) && ($tags{$ccat}->[0][0] =~/^$validtag$/) ) { $tag = $root . '_' . $tags{$ccat}->[0][0] . $rest; } else { $tag = $root . '_' . $ccat . $rest; } $options{'debug'} and print $tag."\n"; } return $tag; } sub generateString { my @results = ( '' ); my @unresolved = ( "bnf" ); my $iter = 0; while($#unresolved >= 0) { my $tag = shift @unresolved; my $silent = ''; my $iterresult = [ ]; my @postassign = ( ); # -------- note silent assigns and postfix assigns if($tag =~ /^\<($validtag(?:\#\#$validtag)*)::=($validtag(?:\#\#$validtag)*)\>$/) { # silent assign $silent = resolveConcats($1); $tag = $2; } elsif($tag =~ /^\<($validtag(?:\#\#$validtag)*)::=("[^"]*")\>$/) { # quoted silent assign $silent = resolveConcats($1); $tag = $2; } elsif($tag =~ /^\<\<($validtag(?:\#\#$validtag)*)::=($validtag(?:\#\#$validtag)*)\>\>$/) { # deep silent assign $options{'debug'} and print "Start deep calculation of $2 to assign to $1\n"; my $ctag = $commandchar . 'deepname' . ' ' . $1; unshift @unresolved, $ctag; unshift @results, ''; $tag = $2; } $tag =~ /^".*"$/ or $tag = resolveConcats($tag); # do we have any postfix assigns? while($tag =~ /^($validtag){($validtag(?:\#\#$validtag)*)}(.*)$/ or $tag =~ /^($validtag){{($validtag(?:\#\#$validtag)*)}}(.*)$/) { if ($tag =~ /^($validtag){($validtag(?:\#\#$validtag)*)}(.*)$/) { # shallow postfix assign push @postassign, $2; $tag = $1; defined($3) and $tag = $tag . $3; } elsif ($tag =~ /^($validtag){{($validtag(?:\#\#$validtag)*)}}(.*)$/) { # deep postfix assign # because we only want to output to one result at once, convert this # from foo{{bar}} to <> bar $options{'debug'} and print "Start deep calculation of $1 to assign to $2\n"; unshift @unresolved, $2; # prepend a bar my $ctag = $commandchar . 'deepname' . ' ' . $2; unshift @unresolved, $ctag; # then prepend the command to assign to bar unshift @results, ''; $tag = $1; defined($3) and $tag = $tag . $3; } } # -------- Resolve the tag $iter++; $options{'debug'} and print "Iteration $iter: "; if ( $tag =~ /^($commandchar)/ ) { # this is a command #print "Found command '$tag'! "; if ( $tag =~ /^($commandchar)deepname (.*)$/ ) { # we've completed a deep assign my $deepassignvar = shift(@results); # all the tags come out of a deep assign flattened into a string, # so we have to wrap them twice in []s to get the required array of arrays $tags{$2} = [ [ $deepassignvar ] ]; if($options{'debug'}) { print 'Deep assign: (' . $2 . '::=' . $deepassignvar . ")\n"; } } } elsif( $tags{$tag} ) { # this is a nonterminal: # select a possible sequence at random my $index = int( rand( $#{$tags{$tag}} + 1 ) ); defined $index or $index = 0; # add the items in the sequence to the unresolved list if($options{'debug'}) { print "Lookup: $tag ::="; foreach my $item(@{${$tags{$tag}}[$index]}) { print " $item"; } print "\n"; } foreach my $item(reverse @{${$tags{$tag}}[$index]}) { if($silent eq '') { unshift @unresolved, $item; } unshift @$iterresult, $item; } } else { # this is a terminal my $ttag = $tag; $ttag =~ s/^"(.*)"$/$1/; if($silent eq '') { if( ((length($results[0])>0) && (length($ttag)>0) && ($options{'spaces'})) ) { $results[0] .= " "; } $results[0] .= $ttag; } push @$iterresult, $tag; } if($silent ne '') { # we're inside a silent assign if($options{'debug'}) { print 'Silent assign: (' . $silent . '::=' ; foreach my $item(@$iterresult ) { print $item; } print ")\n"; } $tags{$silent} = [ $iterresult ]; } # else { # we're not inside a silent assign: perform any postfix assigns foreach my $item(@postassign) { if($options{'debug'}) { print 'Postfix assign: {' . $item . '::=' ; foreach my $i(@$iterresult ) { print $i; } print "}\n"; } $tags{$item} = [ $iterresult ]; } } } return $results[0]; } sub main() { srand(); my $seed = int(rand()*(1<<31)); my $q = new CGI; my $s = $q->param('seed'); if(defined($s) && ($s =~ /([0-9-]+)/)) { $seed = $1; } print STDERR ''."\n"; srand($seed); if(defined($q->param('page'))) { my @text = split(/[\r\n]/, FetchPageText($q->param('page'))); while($#text>-1) { my $item = shift @text; $item =~ /[^ ]/ or next; $options{'include'} = 0; &parseBnfLine($item); if($options{'include'} && !defined($includes{$options{'include'}})) { my @newtext = split(/[\r\n]/, FetchPageText($options{'include'})); @text = (@newtext, @text); $includes{$options{'include'}} = 1; } } if(defined($q->param('debug'))) { $options{'debug'} = 1; } if(defined $tags{"bnf"}) # success { $options{'debug'} and print 'Here, in no particular order, are the rules I found:'."\n"; $options{'debug'} and &dumpBNF(); $options{'debug'} and print "\n"; $options{'debug'} and print 'This is what I did with them:'."\n"; my $result = &generateString(); $options{'debug'} and print "\n"; $options{'debug'} and print '..and here\'s what I got:'."\n"; print $result; } else { print 'No "bnf" tag was found in the file.'; print "Usage: bnf.pl page=/path/to/file [debug=1] [seed=random-seed]\n"; } } else { print "Usage: bnf.pl page=/path/to/file [debug=1] [seed=random-seed]\n"; } } &main();