Skip to content
Snippets Groups Projects
ml_parser_generator.ml 7.84 KiB
open Grammar_parser
open Batteries
open Ll_parser
open List_utils
open Grammar


let int_of_lltype = function
  | First i
  | Follow i -> i

let nth_rule (toks,nts,rules) a =
  (List.nth rules (int_of_lltype a - 1))

let rec make_list l =
  match l with
    [] -> "[]"
  | i::r -> Printf.sprintf "$%d :: %s" (i+1) (make_list r)

(* Return the list of elements of the rule. *)
let default_action (pl: string list) : string =
  (* make_list (List.mapi (fun i e -> i) pl) *)
  "()"

let resolve_vars s =
  Str.global_replace (Str.regexp "\\$\\([0-9]+\\)") "p\\1" s

let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc (n,ot) () =
  let t = match ot with | None -> "_" | Some t -> t in
  let type_annot = Printf.sprintf ": ((%s) * ((token*Lexing.position option) list)) res" t
  in
  Printf.fprintf oc "and parse_%s (tokens: ((token*Lexing.position option) list)) () %s=\n" n type_annot;
  Printf.fprintf oc " begin match tokens with\n";
  List.iteri
    (fun i t ->
       let rules = List.map (fun a -> nth_rule (toks,nts,rules) a) (table (n,t)) in
       match rules with
         [] -> ()
       | {rule_prods = pl; rule_action = act}::_ ->
         Printf.fprintf oc " | (symbol, _) :: _ when is_%s symbol -> begin\n" t;
         List.iteri
           (fun i t ->
              if List.mem t toks
              then Printf.fprintf oc "    eat_%s tokens >>= fun (p%d, tokens) ->\n" t (i + 1)
              else Printf.fprintf oc "    parse_%s tokens ()  >>= fun (p%d, tokens) ->\n" t (i+1))
           pl;
         let act = match act with Some act -> act | _ -> default_action pl in
         Printf.fprintf oc "    let res = %s in\n" (resolve_vars act);
         Printf.fprintf oc "    OK (res, tokens)\n" ;
         Printf.fprintf oc "  end\n";
    )
    toks;
  let expected = List.filter (fun t -> List.length (table (n,t)) > 0) toks in
  Printf.fprintf oc "  | tokens ->\n";
  Printf.fprintf oc " let got,lexpos =\n   match tokens with\n   [] -> \"EOF\",None\n     | (symbol, lexpos) :: _ -> (string_of_symbol symbol, lexpos)\n   in Error (\n";
  Printf.fprintf oc "      (match lexpos with\n";
  Printf.fprintf oc "      | Some lexpos -> Printf.sprintf \"At %%s, error while parsing %s\\n\" (string_of_position lexpos)\n" n;
  Printf.fprintf oc "      | None -> Printf.sprintf \"Error while parsing %s\\n\" )^\n" n;
  Printf.fprintf oc "      Printf.sprintf \"Expected one of {%%s}\"\n";
  Printf.fprintf oc "        (String.concat \", \" (List.map string_of_symbol [%s])) ^\n"
    (String.concat ";" (List.map (fun s -> "default_"^s) expected)) ;
  Printf.fprintf oc "      Printf.sprintf \" but got '%%s' instead.\\n\" got\n ^ ";
  Printf.fprintf oc "      Printf.sprintf \" '%%s' \\n\" (String.concat \",\" (List.map (fun (x, _) -> string_of_symbol x) (List.take 10 tokens)))\n";
  Printf.fprintf oc "  )";
  Printf.fprintf oc "\n  end\n\n"

let make_parser  (table: string*string -> lltype list)
    (toks,nts,rules,mlcode)
    (typ: (tokent * string) list)
    (nttyp: (nonterm * string) list)
    oc () =
  Stdlib.Option.iter (fun mlcode -> Printf.fprintf oc "\n\n%s\n\n" mlcode) mlcode;
  List.iter (fun t ->
      begin match List.assoc_opt t typ with
        | Some ty ->
          begin
            Printf.fprintf oc "let is_%s = function\n" t;
            Printf.fprintf oc " | %s _ -> true\n" t;
            Printf.fprintf oc " | _ -> false\n";

            Printf.fprintf oc "let default_%s = %s %s\n" t t
                       (match ty with
                          "string" -> "\"\""
                        | "int" -> "0"
                        | "bool" -> "false"
                        | "char" -> "'a'"
                        | _ -> failwith (Printf.sprintf "Don't know how to generate a default value of type %s" ty)
                )
          end
        | None -> begin
            Printf.fprintf oc "let is_%s = function\n" t;
            Printf.fprintf oc " | %s -> true\n" t;
            Printf.fprintf oc " | _ -> false\n";
            Printf.fprintf oc "let default_%s = %s\n" t t
          end
      end;
    ) toks;
  List.iter (fun t ->
      Printf.fprintf oc "let eat_%s = function\n" t;
      begin match List.assoc_opt t typ with
        | Some _ -> Printf.fprintf oc "| (%s(x),_) :: rtokens -> OK (x, rtokens)\n" t
        | None -> Printf.fprintf oc "| (%s,_) :: rtokens -> OK ((), rtokens)\n" t
      end;
      Printf.fprintf oc "|   (x,Some pos) :: _ -> Error (Printf.sprintf \"At position %%s, expected %%s, got %%s.\\n\"\n";
      Printf.fprintf oc "    (string_of_position pos)\n";
      Printf.fprintf oc "    (string_of_symbol default_%s)\n" t;
      Printf.fprintf oc "    (string_of_symbol x))\n";
      Printf.fprintf oc "    | (x,None) :: _ -> Error (Printf.sprintf \"Expected %%s, got %%s.\\n\"\n";
      Printf.fprintf oc "    (string_of_symbol default_%s)\n" t;
      Printf.fprintf oc "    (string_of_symbol x))\n";
      Printf.fprintf oc "    | _ -> Error  (Printf.sprintf \"Expected %%s, got EOF.\\n\" (string_of_symbol default_%s))\n" t;

    ) toks;
  Printf.fprintf oc "let rec ____unused = ()\n";
  List.iter (fun n -> make_nt table (toks,nts,rules) oc (n, List.assoc_opt n nttyp) ()) nts

let nts_ordered start (toks,nts,rules) =
  let nts =
    let rec aux acc nt =
      if List.mem nt acc then acc
      else let acc = nt::acc in
        let rules : (string list * string) list =
          filter_map (fun {rule_nt; rule_prods; rule_action} ->
              if rule_nt = nt
              then Some (rule_prods, match rule_action with Some a -> a | None -> "")
              else None) rules
        in
        List.fold_left (fun acc (rule,act) ->
            List.fold_left (fun acc tokornt ->
                if List.mem tokornt toks then acc else aux acc tokornt
              ) acc rule
          ) acc rules
    in List.rev (aux [] start)
  in
  let rules =
    List.concat (List.map (fun nt -> List.filter (fun r -> r.rule_nt = nt) rules) nts)
  in (nts,rules)

let _ =
  let grammar_file = ref None in
  let table_file = ref None in
  let parser_ml_file = ref None in

  Arg.parse
    [("-g", Arg.String (fun s -> grammar_file := Some s), "Input grammar file (.g)");
     ("-t", Arg.String (fun s -> table_file := Some s), "Where to output tables (.html)");
     ("-pml", Arg.String (fun s -> parser_ml_file := Some s), "Where to output the parser code (.ml)");
    ] print_endline "Usage: ";
  match !grammar_file with
  | None -> failwith "Please specify a grammar file using '-g <grammar.g>'"
  | Some gramfile ->
    let gram, axiom = parse_grammar gramfile in
    let (toks, nts, rules, mlcode) =
      (gram.tokens, gram.nonterms, gram.rules, gram.mlcode) in
    let toks = List.map fst gram.tokens in
    let (nts, rules) = nts_ordered axiom (toks,nts,rules) in
    iter_nullnt (toks, nts, rules) ();
    iter_first (toks, nts, rules) ();
    iter_follownt (toks, nts, rules) ();
    fill_lltable (toks, nts, rules) ();
    let has_conflict = check_conflicts (toks, nts, rules) () in
    if has_conflict
    then Printf.fprintf stderr "Warning! There is a conflict in your grammar. Check the prediction table for more details.\n";
    (match !table_file with
     | Some tfile -> let oc = open_out tfile in
       print_html (toks, nts, rules) (Format.formatter_of_out_channel oc) ();
       close_out oc
     | None -> ());
    (match !parser_ml_file with
     | None -> Printf.fprintf stderr "Please specify where I should write the generated parser code using '-pml <generated_parser.ml>'"
     | Some mlfile ->
       let oc = open_out mlfile in
       make_parser (fun (n,t) -> hashget_def lltable (n,t) [])
         (toks, nts, rules, mlcode)
         (List.filter_map (fun (t,o) ->
              match o with
              | None -> None
              | Some typ -> Some (t,typ)
            ) gram.tokens)
         (List.filter_map (fun (t,o) ->
              match o with
              | None -> None
              | Some typ -> Some (t,typ)
            ) gram.nonterms)
         oc ();
       close_out oc
    )