|
RegExp.grm
|
// FILE. . . . . /home/hak/hlt/src/hlt/language/jaccapps/regx/sources/RegExp.grm // EDIT BY . . . Hassan Ait-Kaci // ON MACHINE. . Hp-Dv7 // STARTED ON. . Thu Oct 18 17:16:36 2012
|
This is a grammar for defining regular expressions and simplifying
them. It implements an interactive regular expression parser and
normalizer. One enters a regular expression, or a definition
associating a regular expression to an indentifier, and it is parsed
and, if syntactically correct, it is normalized and its normal form
is printed back.
[See also the grammar source files]
|
%package hlt.regx
%import java.util.HashMap
%import hlt.language.util.*
%import hlt.language.tools.Misc
%access public
%start Session
%root RegularExpression
%root Definition
%token IDENTIFIER NUMBER // symbols and numbers are of least prec.
%right '|' // choice is infix right assoc. or lesser prec. than concat.
%right '.' // concat. is infix right assoc. or lesser prec. than unary ops.
%left '?' '+' '*' '_' '^' // all unary operators are left assoc. of same highest prec.
%{
void p ()
{
System.out.println();
}
void p (String s)
{
System.out.println(s);
}
void help ()
{
p();
p("Enter one of the following ending with a ';':");
p();
p("- a regular expression to normalize - e.g., '(a|b).()'");
p("- a definition: 'Variable = expression' - e.g., 'Foo = a|b'");
p("- 'syntax' to have a syntax summary");
p("- 'trace' to toggle tracing");
p("- 'help' to print these lines");
p("- 'quit' to quit");
p();
Tokenizer.prompt();
}
void syntax ()
{
p();
p("REGULAR EXPRESSION SYNTAX SUMMARY:");
p("----------------------------------");
p();
p("CATEGORY KIND: DESCRIPTION - EXAMPLE");
p("-------- ----- ----------- - -------");
p("Empty symbol: ()");
p("Alphabet symbol: lowercase-start identifier - a, foo, bAR");
p("Variable symbol: capitalized identifier - A, Foo, BAR");
p("Choice bin-op: infix '|' (X or Y) - a | b");
p("Concat bin-op: infix '.' (X then Y) - a . b");
p("Option mon-op: postfix '?' (zero or one) - a?");
p("Plus mon-op: postfix '+' (one or more) - a+");
p("Star mon-op: postfix '*' (zero or more) - a*");
p("Power mon-op: postfix '^n' (exactly n) - a^2");
p("Bounded range mon-op: postfix '_m^n' ('|' of m to n) - a_1^3");
p("Unbounded range mon-op: postfix '_n~' (at least n) - a_2~");
p();
p("'|' has less precedence than '.'");
p("'.' has less precedence than '?', '+', '*', '_', or '^'");
p("'|' and '.' associate to the right");
p("'?', '+', '*', '_', and '^' associate to the left");
p("use parentheses to enforce precedence");
p();
Tokenizer.prompt();
}
| The definition store. It associates a RegExp E to a RegExpSymbol s, which is henceforth interpreted as E wherever it occurs, including in E itself!. |
HashMap defs = new HashMap();
%}
%nodeprefix ""
%nodesuffix ""
%nodeclass public Definition
{
private String message;
public String getMessage ()
{
return message;
}
public void setMessage (String msg)
{
message = msg;
}
}
%nodeclass public RegularExpression
{
private RegExp expression;
public RegExp getExpression ()
{
return expression;
}
public void setExpression (RegExp exp)
{
expression = exp;
}
}
%%
Session
: Actions Exit_opt
;
Actions
: /* empty */
| Actions Action
;
| An action is either a RegularExpression, a definition, or an error, followed by a semicolon. |
Action
: RegularExpression
{
RegExp exp = $1.getExpression();
RegExp cpy = exp.deepCopy();
RegExp nrm = exp.normalize();
p(" "+Misc.repeat(30,'='));
p("ORIG: "+cpy);
p(" "+Misc.repeat(30,'-'));
p("NORM: "+nrm);
p(" "+Misc.repeat(30,'='));
Tokenizer.prompt();
}
';'
| Definition
{
p($1.getMessage());
Tokenizer.prompt();
}
';'
| 'help'
{
help();
}
';'
| 'syntax'
{
syntax();
}
';'
| 'trace'
{
p("*** Toggling trace ...");
RegExp.toggleTrace();
}
';'
| error
{
errorManager().reportErrors(true);
Tokenizer.prompt();
} ';'
;
| A $RegularExpression$ is one of the following forms: |
RegularExpression
:
| The empty $RegularExpression$ is a $RegularExpression$. |
'(' ')'
{
$$.setExpression(RegExp.EMPTY);
}
|
| An $IDENTIFIER$ is a $RegularExpression$. |
IDENTIFIER
{
String name = $1.svalue();
RegExp definition = (RegExp)defs.get(RegExpSymbol.get(name));
if (definition == null)
$$.setExpression(RegExpSymbol.get(name));
else
$$.setExpression(definition);
}
|
| A choice is a $RegularExpression$. |
RegularExpression '|' RegularExpression
{
$$.setExpression(new RegExpChoice($1.getExpression(),
$3.getExpression()));
}
|
| A sequence is a $RegularExpression$. |
RegularExpression '.' RegularExpression
{
$$.setExpression(new RegExpConcat($1.getExpression(),
$3.getExpression()));
}
|
| An option is a $RegularExpression$. |
RegularExpression '?'
{
$$.setExpression(new RegExpOption($1.getExpression()));
}
|
| A non-empty infinite iteration is a $RegularExpression$. |
RegularExpression '+'
{
$$.setExpression(new RegExpPlus($1.getExpression()));
}
|
| A possibly empty infinite iteration is a $RegularExpression$. |
RegularExpression '*'
{
$$.setExpression(new RegExpStar($1.getExpression()));
}
|
| A finite power is a $RegularExpression$. |
RegularExpression '^' NUMBER
{
$$.setExpression(new RegExpPower($1.getExpression(),
(int)$3.nvalue()));
}
|
| A finite power range is a $RegularExpression$. |
RegularExpression '_' NUMBER '^' NUMBER
{
$$.setExpression(new RegExpPowerRange($1.getExpression(),
(int)$3.nvalue(),
(int)$5.nvalue()));
}
|
| An infinite power range is a $RegularExpression$. |
RegularExpression '_' NUMBER '~'
{
$$.setExpression(new RegExpPowerRange($1.getExpression(),
(int)$3.nvalue()));
}
|
| A parenthesized $RegularExpression$ is a $RegularExpression$. |
'(' RegularExpression ')'
{
$$.setExpression($2.getExpression());
}
;
| A $Definition$ stores the value of a $RegularExpression$ as an identifier. |
Definition
: IDENTIFIER '=' RegularExpression
{
String name = $1.svalue();
RegExp definition = $3.getExpression();
defs.put(RegExpSymbol.get(name),definition);
$$.setMessage("Defined: "+name+" = "+definition);
$$.setSvalue(name);
}
;
| Action for quitting the session. |
Exit
: 'quit'
{
p("Bye bye!...");
System.exit(0);
} ';'
;
Exit_opt
: /* empty */
| Exit
;
%%
This file was generated on Fri Oct 19 10:18:52 PDT 2012 from file RegExp.grm
by the hlt.language.tools.Hilite Java tool written by Hassan Aït-Kaci