Skip to content
Snippets Groups Projects
Commit d2f396ea authored by Wilke Pierre's avatar Wilke Pierre
Browse files

Add types to non-terminals in alpaga + Add tests for lexer

parent 3d26f178
No related branches found
No related tags found
No related merge requests found
......@@ -11,7 +11,7 @@ type rule = { rule_nt: nonterm;
}
type grammar = { tokens: (tokent * string option) list;
nonterms: nonterm list;
nonterms: (nonterm * string option) list;
rules: rule list;
mlcode: string option;
axiom: nonterm option
......
......@@ -13,9 +13,8 @@ rule token = parse
| "/*" { comment_multiline lexbuf }
| '\n' { Lexing.new_line lexbuf; EOL }
| '{' { action 0 "" lexbuf }
| '<' { ttype "" lexbuf }
| "->" { ARROW }
| ">" { GT }
| "<" { LT }
| "axiom" { AXIOM }
| "tokens" { TOK }
| "non-terminals" { NT }
......@@ -29,8 +28,12 @@ rule token = parse
and action level s = parse
| '}' { if level = 0 then CODE s else action (level-1) (s ^ "}") lexbuf }
| '{' { action (level + 1) (s ^ "{") lexbuf }
| _ as x { if x == '\n' then Lexing.new_line lexbuf;
| _ as x { if x = '\n' then Lexing.new_line lexbuf;
action level (Printf.sprintf "%s%c" s x) lexbuf }
and ttype s = parse
| '>' { TTYPE s }
| _ as x { if x = '\n' then Lexing.new_line lexbuf;
ttype (Printf.sprintf "%s%c" s x) lexbuf }
and comment = parse
| '\n' { Lexing.new_line lexbuf; token lexbuf }
| _ { comment lexbuf }
......
......@@ -26,7 +26,7 @@ let parse_grammar file : grammar * nonterm =
List.fold_left
(fun (undef, used) prod ->
let undef =
if not (List.mem prod (List.map fst gram.tokens) || List.mem prod gram.nonterms || List.mem prod undef)
if not (List.mem_assoc prod gram.tokens || List.mem_assoc prod gram.nonterms || List.mem prod undef)
then prod::undef
else undef in
let used =
......@@ -46,7 +46,7 @@ let parse_grammar file : grammar * nonterm =
| Some axiom ->
(* Warn if some non terminals are never seen on the right hand side of a rule. *)
let unused_nts = List.filter (fun nt -> not (List.mem nt used_strings) && Some nt <> gram.axiom) gram.nonterms in
let unused_nts = List.filter (fun nt -> not (List.mem nt used_strings) && Some nt <> gram.axiom) (List.map fst gram.nonterms) in
if unused_nts <> [] then Printf.printf "The following non-terminals are declared but never appear on the right hand-side of a rule:\n%a\n" print_list unused_nts;
(* Warn if some tokens are never seen on the right hand side of a rule. *)
......@@ -59,5 +59,5 @@ let parse_grammar file : grammar * nonterm =
match Hashtbl.find_opt h r.rule_nt with
| None -> Hashtbl.add h r.rule_nt [r]
| Some lp -> Hashtbl.replace h r.rule_nt (lp@[r]) ) (gram.rules);
let rules = List.concat (List.map (fun n -> hashget_def h n []) gram.nonterms) in
let rules = List.concat (List.map (fun n -> hashget_def h n []) (List.map fst gram.nonterms)) in
{ gram with rules = rules }, axiom
......@@ -4,9 +4,10 @@
%}
%token EOF EOL TOK NT RULES ARROW AXIOM LT GT
%token EOF EOL TOK NT RULES ARROW AXIOM //LT GT
%token<string> IDENTIFIER
%token<string> CODE
%token<string> TTYPE
%start main
%type <Grammar.grammar> main
......@@ -16,7 +17,7 @@
main:
| AXIOM IDENTIFIER EOL main { let r = $4 in {r with axiom = Some $2 }}
| TOK list_tokens EOL main { let r = $4 in {r with tokens = r.tokens @ $2} }
| NT list_ident EOL main { let r = $4 in {r with nonterms = r.nonterms @ $2} }
| NT list_nts EOL main { let r = $4 in {r with nonterms = r.nonterms @ $2} }
| CODE main { let r = $2 in { r with mlcode = Some ($1) }}
| RULES EOL rules EOF { { tokens = []; nonterms = []; axiom = None;
rules = $3; mlcode = None } }
......@@ -24,7 +25,7 @@
;
typed_tokens:
| IDENTIFIER LT IDENTIFIER GT { ($1, Some $3) }
| IDENTIFIER TTYPE { ($1, Some $2) }
| IDENTIFIER { ($1, None)}
;
......@@ -35,6 +36,20 @@
;
typed_nts:
| IDENTIFIER TTYPE { ($1, Some $2) }
| IDENTIFIER { ($1, None)}
;
list_nts:
| typed_nts list_nts { $1 :: $2}
| { [] }
;
list_ident:
| IDENTIFIER list_ident { $1 :: $2}
| { [] }
......
......@@ -25,8 +25,11 @@ let default_action (pl: string list) : string =
let resolve_vars s =
Str.global_replace (Str.regexp "\\$\\([0-9]+\\)") "p\\1" s
let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc n () =
Printf.fprintf oc "and parse_%s tokens () =\n" n;
let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc (n,ot) () =
let t = match ot with | None -> "_" | Some t -> t in
let type_annot = Printf.sprintf ": ((%s) * ((token*Lexing.position option) list)) res" t
in
Printf.fprintf oc "and parse_%s (tokens: ((token*Lexing.position option) list)) () %s=\n" n type_annot;
Printf.fprintf oc " begin match tokens with\n";
List.iteri
(fun i t ->
......@@ -41,48 +44,37 @@ let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc n () =
then Printf.fprintf oc " eat_%s tokens >>= fun (p%d, tokens) ->\n" t (i + 1)
else Printf.fprintf oc " parse_%s tokens () >>= fun (p%d, tokens) ->\n" t (i+1))
pl;
Printf.fprintf oc "\n" ;
Printf.fprintf oc " let res =\n" ;
(match act with
| Some act -> Printf.fprintf oc " %s\n" (resolve_vars act)
| _ ->
Printf.fprintf oc " %s\n" (resolve_vars (default_action pl))
);
Printf.fprintf oc " in OK (res, tokens)\n" ;
Printf.fprintf oc "end\n";
let act = match act with Some act -> act | _ -> default_action pl in
Printf.fprintf oc " let res = %s in\n" (resolve_vars act);
Printf.fprintf oc " OK (res, tokens)\n" ;
Printf.fprintf oc " end\n";
)
toks;
let expected = List.filter (fun t -> List.length (table (n,t)) > 0) toks in
Printf.fprintf oc " | tokens -> \n";
Printf.fprintf oc " let got,lexpos = match tokens with [] -> \"EOF\",None | (symbol, lexpos) :: _ -> (string_of_symbol symbol, lexpos) in Error (\n";
Printf.fprintf oc " (match lexpos with \n";
Printf.fprintf oc " | Some lexpos -> Printf.sprintf \"At %%s, error while parsing %s\\n\" (string_of_position lexpos) \n" n;
Printf.fprintf oc " | tokens ->\n";
Printf.fprintf oc " let got,lexpos =\n match tokens with\n [] -> \"EOF\",None\n | (symbol, lexpos) :: _ -> (string_of_symbol symbol, lexpos)\n in Error (\n";
Printf.fprintf oc " (match lexpos with\n";
Printf.fprintf oc " | Some lexpos -> Printf.sprintf \"At %%s, error while parsing %s\\n\" (string_of_position lexpos)\n" n;
Printf.fprintf oc " | None -> Printf.sprintf \"Error while parsing %s\\n\" )^\n" n;
Printf.fprintf oc " Printf.sprintf \"Expected one of \"^\n";
begin
match expected with
[] -> Printf.fprintf oc "Printf.sprintf \"{}\" ^\n"
| a::r ->
List.iteri (fun i t ->
Printf.fprintf oc "Printf.sprintf \"%s %%s\" (string_of_symbol default_%s)^\n" (if i = 0 then "{" else ",") t;
) (a::r);
Printf.fprintf oc "Printf.sprintf \"}\" ^ \n"
end;
Printf.fprintf oc " Printf.sprintf \" but got '%%s' instead.\\n\" got\n ^ ";
Printf.fprintf oc " Printf.sprintf \" '%%s' \\n\" (String.concat \",\" (List.map (fun (x, _) -> string_of_symbol x) (List.take 10 tokens)))\n";
Printf.fprintf oc " Printf.sprintf \"Expected one of {%%s}\"\n";
Printf.fprintf oc " (String.concat \", \" (List.map string_of_symbol [%s])) ^\n"
(String.concat ";" (List.map (fun s -> "default_"^s) expected)) ;
Printf.fprintf oc " Printf.sprintf \" but got '%%s' instead.\\n\" got\n ^ ";
Printf.fprintf oc " Printf.sprintf \" '%%s' \\n\" (String.concat \",\" (List.map (fun (x, _) -> string_of_symbol x) (List.take 10 tokens)))\n";
Printf.fprintf oc " )";
Printf.fprintf oc "\n end\n\n"
let make_parser (table: string*string -> lltype list)
(toks,nts,rules,mlcode)
(typ: (tokent * string) list)
(nttyp: (nonterm * string) list)
oc () =
Stdlib.Option.iter (fun mlcode -> Printf.fprintf oc "\n\n%s\n\n" mlcode) mlcode;
List.iter (fun t ->
begin match List.assoc_opt t typ with
| Some ty ->
begin
Printf.fprintf oc "let is_%s = function \n" t;
Printf.fprintf oc "let is_%s = function\n" t;
Printf.fprintf oc " | %s _ -> true\n" t;
Printf.fprintf oc " | _ -> false\n";
......@@ -96,7 +88,7 @@ let make_parser (table: string*string -> lltype list)
)
end
| None -> begin
Printf.fprintf oc "let is_%s = function \n" t;
Printf.fprintf oc "let is_%s = function\n" t;
Printf.fprintf oc " | %s -> true\n" t;
Printf.fprintf oc " | _ -> false\n";
Printf.fprintf oc "let default_%s = %s\n" t t
......@@ -104,23 +96,23 @@ let make_parser (table: string*string -> lltype list)
end;
) toks;
List.iter (fun t ->
Printf.fprintf oc "let eat_%s = function \n" t;
Printf.fprintf oc "let eat_%s = function\n" t;
begin match List.assoc_opt t typ with
| Some _ -> Printf.fprintf oc "| (%s(x),_) :: rtokens -> OK (x, rtokens)\n" t
| None -> Printf.fprintf oc "| (%s,_) :: rtokens -> OK ((), rtokens)\n" t
end;
Printf.fprintf oc "| (x,Some pos) :: _ -> Error (Printf.sprintf \"At position %%s, expected %%s, got %%s.\\n\"";
Printf.fprintf oc " (string_of_position pos)";
Printf.fprintf oc " (string_of_symbol default_%s)" t;
Printf.fprintf oc " (string_of_symbol x))";
Printf.fprintf oc " | (x,None) :: _ -> Error (Printf.sprintf \"Expected %%s, got %%s.\\n\"";
Printf.fprintf oc " (string_of_symbol default_%s)" t;
Printf.fprintf oc " (string_of_symbol x))";
Printf.fprintf oc "| (x,Some pos) :: _ -> Error (Printf.sprintf \"At position %%s, expected %%s, got %%s.\\n\"\n";
Printf.fprintf oc " (string_of_position pos)\n";
Printf.fprintf oc " (string_of_symbol default_%s)\n" t;
Printf.fprintf oc " (string_of_symbol x))\n";
Printf.fprintf oc " | (x,None) :: _ -> Error (Printf.sprintf \"Expected %%s, got %%s.\\n\"\n";
Printf.fprintf oc " (string_of_symbol default_%s)\n" t;
Printf.fprintf oc " (string_of_symbol x))\n";
Printf.fprintf oc " | _ -> Error (Printf.sprintf \"Expected %%s, got EOF.\\n\" (string_of_symbol default_%s))\n" t;
) toks;
Printf.fprintf oc "let rec ____unused = () \n";
List.iter (fun n -> make_nt table (toks,nts,rules) oc n ()) nts
Printf.fprintf oc "let rec ____unused = ()\n";
List.iter (fun n -> make_nt table (toks,nts,rules) oc (n, List.assoc_opt n nttyp) ()) nts
let nts_ordered start (toks,nts,rules) =
let nts =
......@@ -185,6 +177,11 @@ let _ =
| None -> None
| Some typ -> Some (t,typ)
) gram.tokens)
(List.filter_map (fun (t,o) ->
match o with
| None -> None
| Some typ -> Some (t,typ)
) gram.nonterms)
oc ();
close_out oc
)
......@@ -2,7 +2,7 @@ tokens SYM_EOF SYM_IDENTIFIER<string> SYM_INTEGER<int> SYM_PLUS SYM_MINUS SYM_AS
tokens SYM_LPARENTHESIS SYM_RPARENTHESIS SYM_LBRACE SYM_RBRACE
tokens SYM_ASSIGN SYM_SEMICOLON SYM_RETURN SYM_IF SYM_WHILE SYM_ELSE SYM_COMMA SYM_PRINT
tokens SYM_EQUALITY SYM_NOTEQ SYM_LT SYM_LEQ SYM_GT SYM_GEQ
non-terminals S INSTR INSTRS LINSTRS ELSE EXPR FACTOR
non-terminals S INSTR<tree> INSTRS<tree list> LINSTRS ELSE EXPR FACTOR
non-terminals LPARAMS REST_PARAMS
non-terminals IDENTIFIER INTEGER
non-terminals FUNDEF FUNDEFS
......
......@@ -4,6 +4,33 @@ open Batteries
open Utils
open Symbols
let nfa_accepts (n: nfa) (w: char list) : bool =
let rec trav vis s =
if Set.mem s vis then vis
else let en = List.filter_map (fun (oa, n) -> if oa = None then Some n else None) (n.nfa_step s) in
List.fold_left trav (Set.add s vis) en in
let ec s = trav Set.empty s in
let ecs ls = Set.fold (fun q -> Set.union (ec q)) ls Set.empty in
let rec walk (q: int set) (w: char list) =
let q = ecs q in
match w with
| [] -> Set.exists (fun q -> List.mem q (List.map fst n.nfa_final)) q
| c::w ->
let q' =
Set.fold Set.union (Set.map (fun q ->
(List.filter_map
(fun (cso,q') ->
match cso with
| None -> None
| Some cs -> if Set.mem c cs then Some q' else None
)
(n.nfa_step q)) |> Set.of_list
) q) Set.empty
in walk q' w in
walk (Set.of_list n.nfa_initial) w
let () =
let regexp_list = [
(keyword_regexp "while", fun s -> Some (SYM_WHILE));
......@@ -110,5 +137,54 @@ let () =
let table = Hashtbl.create 10 in
build_dfa_table table n (dfa_initial_state n);
expect_set_set "dfa states" (Hashtbl.keys table |> Set.of_enum) (Set.of_list [Set.of_list [1;2;3]; Set.of_list [2;4]; Set.of_list [2]])
expect_set_set "dfa states" (Hashtbl.keys table |> Set.of_enum) (Set.of_list [Set.of_list [1;2;3]; Set.of_list [2;4]; Set.of_list [2]]);
let expect_nfa_accepts n s b =
let r = nfa_accepts n (char_list_of_string s) in
if r = b
then Printf.printf "[OK] nfa_accepts %s = %b\n" s r
else Printf.printf "[KO] nfa_accepts %s = %b\n" s r
in
Printf.printf "*** NFA n1 : 'hello'\n";
let n1, f1 = nfa_of_regexp (keyword_regexp "hello") 1 (fun _ -> None) in
expect_nfa_accepts n1 "hello" true;
expect_nfa_accepts n1 "bonjour" false;
Printf.printf "*** NFA n2 : 'bonjour'\n";
let n2, f2 = nfa_of_regexp (keyword_regexp "bonjour") f1 (fun _ -> None) in
expect_nfa_accepts n2 "hello" false;
expect_nfa_accepts n2 "bonjour" true;
Printf.printf "*** NFA n3 : n1 | n2\n";
let n3 = alt_nfa n1 n2 in
expect_nfa_accepts n3 "hello" true;
expect_nfa_accepts n3 "bonjour" true;
expect_nfa_accepts n2 "buongiorno" false;
Printf.printf "*** NFA n4 : n1 . n2 \n";
let n4 = cat_nfa n1 n2 in
expect_nfa_accepts n4 "hello" false;
expect_nfa_accepts n4 "bonjour" false;
expect_nfa_accepts n4 "hellobonjour" true;
expect_nfa_accepts n4 "bonjourhello" false;
Printf.printf "*** NFA n5 : n1* \n";
let n5 = star_nfa n1 (fun _ -> None) in
expect_nfa_accepts n5 "" true;
expect_nfa_accepts n5 "hello" true;
expect_nfa_accepts n5 "hellohello" true;
expect_nfa_accepts n5 "hellobonjour" false;
Printf.printf "*** NFA n6 : n3* \n";
let n6 = star_nfa n3 (fun _ -> None) in
expect_nfa_accepts n6 "" true;
expect_nfa_accepts n6 "hello" true;
expect_nfa_accepts n6 "hellohello" true;
expect_nfa_accepts n6 "hellobonjour" true;
expect_nfa_accepts n6 "hellobonjourhello" true;
expect_nfa_accepts n6 "bonjourbonjourbonjourhello" true;
expect_nfa_accepts n6 "bonjlo" false;
ignore f2
......@@ -188,7 +188,10 @@ class CommandExecutor(Thread):
icon_ko,
self.filename,
r['compstep'], err)
elif r["compstep"] == "Parsing":
compstep_td = """
<td class="good">{}</td>
""".format(icon_ok)
elif r["compstep"] == "Lexing":
expect_lex_file_name = self.filename + ".expect_lexer"
out_lex_file_name = self.filename[:-2] + ".lex"
......@@ -321,7 +324,8 @@ def main():
exec_thread = CommandExecutor(fname, cmd,
args.args, args.make_expect,
# 1 colonne pour le lexer
len(args.passes) + 1)
# 1 colonne pour le parser
len(args.passes) + 2)
threads.append(exec_thread)
exec_thread.start()
......@@ -400,7 +404,7 @@ def main():
res_html.write("""
<table class="w3-table w3-striped w3-responsive">
<tr><th>File</th>""")
for pass_name in ["Lexer"] + args.passes:
for pass_name in ["Lexer","Parser"] + args.passes:
res_html.write("<th style='transform: rotate(180deg); writing-mode: vertical-rl;'>{}</th>\n".format(pass_name))
res_html.write("""
</tr>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment