-
Wilke Pierre authoredWilke Pierre authored
ml_parser_generator.ml 7.84 KiB
open Grammar_parser
open Batteries
open Ll_parser
open List_utils
open Grammar
let int_of_lltype = function
| First i
| Follow i -> i
let nth_rule (toks,nts,rules) a =
(List.nth rules (int_of_lltype a - 1))
let rec make_list l =
match l with
[] -> "[]"
| i::r -> Printf.sprintf "$%d :: %s" (i+1) (make_list r)
(* Return the list of elements of the rule. *)
let default_action (pl: string list) : string =
(* make_list (List.mapi (fun i e -> i) pl) *)
"()"
let resolve_vars s =
Str.global_replace (Str.regexp "\\$\\([0-9]+\\)") "p\\1" s
let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc (n,ot) () =
let t = match ot with | None -> "_" | Some t -> t in
let type_annot = Printf.sprintf ": ((%s) * ((token*Lexing.position option) list)) res" t
in
Printf.fprintf oc "and parse_%s (tokens: ((token*Lexing.position option) list)) () %s=\n" n type_annot;
Printf.fprintf oc " begin match tokens with\n";
List.iteri
(fun i t ->
let rules = List.map (fun a -> nth_rule (toks,nts,rules) a) (table (n,t)) in
match rules with
[] -> ()
| {rule_prods = pl; rule_action = act}::_ ->
Printf.fprintf oc " | (symbol, _) :: _ when is_%s symbol -> begin\n" t;
List.iteri
(fun i t ->
if List.mem t toks
then Printf.fprintf oc " eat_%s tokens >>= fun (p%d, tokens) ->\n" t (i + 1)
else Printf.fprintf oc " parse_%s tokens () >>= fun (p%d, tokens) ->\n" t (i+1))
pl;
let act = match act with Some act -> act | _ -> default_action pl in
Printf.fprintf oc " let res = %s in\n" (resolve_vars act);
Printf.fprintf oc " OK (res, tokens)\n" ;
Printf.fprintf oc " end\n";
)
toks;
let expected = List.filter (fun t -> List.length (table (n,t)) > 0) toks in
Printf.fprintf oc " | tokens ->\n";
Printf.fprintf oc " let got,lexpos =\n match tokens with\n [] -> \"EOF\",None\n | (symbol, lexpos) :: _ -> (string_of_symbol symbol, lexpos)\n in Error (\n";
Printf.fprintf oc " (match lexpos with\n";
Printf.fprintf oc " | Some lexpos -> Printf.sprintf \"At %%s, error while parsing %s\\n\" (string_of_position lexpos)\n" n;
Printf.fprintf oc " | None -> Printf.sprintf \"Error while parsing %s\\n\" )^\n" n;
Printf.fprintf oc " Printf.sprintf \"Expected one of {%%s}\"\n";
Printf.fprintf oc " (String.concat \", \" (List.map string_of_symbol [%s])) ^\n"
(String.concat ";" (List.map (fun s -> "default_"^s) expected)) ;
Printf.fprintf oc " Printf.sprintf \" but got '%%s' instead.\\n\" got\n ^ ";
Printf.fprintf oc " Printf.sprintf \" '%%s' \\n\" (String.concat \",\" (List.map (fun (x, _) -> string_of_symbol x) (List.take 10 tokens)))\n";
Printf.fprintf oc " )";
Printf.fprintf oc "\n end\n\n"
let make_parser (table: string*string -> lltype list)
(toks,nts,rules,mlcode)
(typ: (tokent * string) list)
(nttyp: (nonterm * string) list)
oc () =
Stdlib.Option.iter (fun mlcode -> Printf.fprintf oc "\n\n%s\n\n" mlcode) mlcode;
List.iter (fun t ->
begin match List.assoc_opt t typ with
| Some ty ->
begin
Printf.fprintf oc "let is_%s = function\n" t;
Printf.fprintf oc " | %s _ -> true\n" t;
Printf.fprintf oc " | _ -> false\n";
Printf.fprintf oc "let default_%s = %s %s\n" t t
(match ty with
"string" -> "\"\""
| "int" -> "0"
| "bool" -> "false"
| "char" -> "'a'"
| _ -> failwith (Printf.sprintf "Don't know how to generate a default value of type %s" ty)
)
end
| None -> begin
Printf.fprintf oc "let is_%s = function\n" t;
Printf.fprintf oc " | %s -> true\n" t;
Printf.fprintf oc " | _ -> false\n";
Printf.fprintf oc "let default_%s = %s\n" t t
end
end;
) toks;
List.iter (fun t ->
Printf.fprintf oc "let eat_%s = function\n" t;
begin match List.assoc_opt t typ with
| Some _ -> Printf.fprintf oc "| (%s(x),_) :: rtokens -> OK (x, rtokens)\n" t
| None -> Printf.fprintf oc "| (%s,_) :: rtokens -> OK ((), rtokens)\n" t
end;
Printf.fprintf oc "| (x,Some pos) :: _ -> Error (Printf.sprintf \"At position %%s, expected %%s, got %%s.\\n\"\n";
Printf.fprintf oc " (string_of_position pos)\n";
Printf.fprintf oc " (string_of_symbol default_%s)\n" t;
Printf.fprintf oc " (string_of_symbol x))\n";
Printf.fprintf oc " | (x,None) :: _ -> Error (Printf.sprintf \"Expected %%s, got %%s.\\n\"\n";
Printf.fprintf oc " (string_of_symbol default_%s)\n" t;
Printf.fprintf oc " (string_of_symbol x))\n";
Printf.fprintf oc " | _ -> Error (Printf.sprintf \"Expected %%s, got EOF.\\n\" (string_of_symbol default_%s))\n" t;
) toks;
Printf.fprintf oc "let rec ____unused = ()\n";
List.iter (fun n -> make_nt table (toks,nts,rules) oc (n, List.assoc_opt n nttyp) ()) nts
let nts_ordered start (toks,nts,rules) =
let nts =
let rec aux acc nt =
if List.mem nt acc then acc
else let acc = nt::acc in
let rules : (string list * string) list =
filter_map (fun {rule_nt; rule_prods; rule_action} ->
if rule_nt = nt
then Some (rule_prods, match rule_action with Some a -> a | None -> "")
else None) rules
in
List.fold_left (fun acc (rule,act) ->
List.fold_left (fun acc tokornt ->
if List.mem tokornt toks then acc else aux acc tokornt
) acc rule
) acc rules
in List.rev (aux [] start)
in
let rules =
List.concat (List.map (fun nt -> List.filter (fun r -> r.rule_nt = nt) rules) nts)
in (nts,rules)
let _ =
let grammar_file = ref None in
let table_file = ref None in
let parser_ml_file = ref None in
Arg.parse
[("-g", Arg.String (fun s -> grammar_file := Some s), "Input grammar file (.g)");
("-t", Arg.String (fun s -> table_file := Some s), "Where to output tables (.html)");
("-pml", Arg.String (fun s -> parser_ml_file := Some s), "Where to output the parser code (.ml)");
] print_endline "Usage: ";
match !grammar_file with
| None -> failwith "Please specify a grammar file using '-g <grammar.g>'"
| Some gramfile ->
let gram, axiom = parse_grammar gramfile in
let (toks, nts, rules, mlcode) =
(gram.tokens, gram.nonterms, gram.rules, gram.mlcode) in
let toks = List.map fst gram.tokens in
let (nts, rules) = nts_ordered axiom (toks,nts,rules) in
iter_nullnt (toks, nts, rules) ();
iter_first (toks, nts, rules) ();
iter_follownt (toks, nts, rules) ();
fill_lltable (toks, nts, rules) ();
let has_conflict = check_conflicts (toks, nts, rules) () in
if has_conflict
then Printf.fprintf stderr "Warning! There is a conflict in your grammar. Check the prediction table for more details.\n";
(match !table_file with
| Some tfile -> let oc = open_out tfile in
print_html (toks, nts, rules) (Format.formatter_of_out_channel oc) ();
close_out oc
| None -> ());
(match !parser_ml_file with
| None -> Printf.fprintf stderr "Please specify where I should write the generated parser code using '-pml <generated_parser.ml>'"
| Some mlfile ->
let oc = open_out mlfile in
make_parser (fun (n,t) -> hashget_def lltable (n,t) [])
(toks, nts, rules, mlcode)
(List.filter_map (fun (t,o) ->
match o with
| None -> None
| Some typ -> Some (t,typ)
) gram.tokens)
(List.filter_map (fun (t,o) ->
match o with
| None -> None
| Some typ -> Some (t,typ)
) gram.nonterms)
oc ();
close_out oc
)