Skip to content
Snippets Groups Projects
ml_parser_generator.ml 7.27 KiB
Newer Older
open Grammar_parser
open Batteries
open Ll_parser
open List_utils
open Grammar


let int_of_lltype = function
  | First i
  | Follow i -> i

let nth_rule (toks,nts,rules) a =
  (List.nth rules (int_of_lltype a - 1))

let rec make_list l =
  match l with
    [] -> "[]"
  | i::r -> Printf.sprintf "$%d :: %s" (i+1) (make_list r)

(* Return the list of elements of the rule. *)
let default_action (pl: string list) : string =
  (* make_list (List.mapi (fun i e -> i) pl) *)
  "()"

let resolve_vars s =
  Str.global_replace (Str.regexp "\\$\\([0-9]+\\)") "p\\1" s

let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc n () =
  Printf.fprintf oc "and parse_%s tokens () =\n" n;
  Printf.fprintf oc " begin match tokens with\n";
  List.iteri
    (fun i t ->
       let rules = List.map (fun a -> nth_rule (toks,nts,rules) a) (table (n,t)) in
       match rules with
         [] -> ()
       | {rule_prods = pl; rule_action = act}::_ ->
         Printf.fprintf oc " | (symbol, _) :: _ when is_%s symbol -> begin\n" t;
         List.iteri
           (fun i t ->
              if List.mem t toks
              then Printf.fprintf oc "    eat_%s tokens >>= fun (p%d, tokens) ->\n" t (i + 1)
              else Printf.fprintf oc "    parse_%s tokens ()  >>= fun (p%d, tokens) ->\n" t (i+1))
           pl;
         Printf.fprintf oc "\n" ;
         Printf.fprintf oc " let res =\n" ;
         (match act with
          | Some act -> Printf.fprintf oc "    %s\n" (resolve_vars act)
          | _ ->
            Printf.fprintf oc "    %s\n" (resolve_vars (default_action pl))
         );
         Printf.fprintf oc " in OK (res, tokens)\n" ;
         Printf.fprintf oc "end\n";
    )
    toks;
  let expected = List.filter (fun t -> List.length (table (n,t)) > 0) toks in
  Printf.fprintf oc "  | tokens -> \n";
  Printf.fprintf oc " let got,lexpos = match tokens with [] -> \"EOF\",None | (symbol, lexpos) :: _ -> (string_of_symbol symbol, lexpos) in Error (\n";
  Printf.fprintf oc "      (match lexpos with \n";
  Printf.fprintf oc "      | Some lexpos -> Printf.sprintf \"At %%s, error while parsing %s\\n\" (string_of_position lexpos) \n" n;
  Printf.fprintf oc "      | None -> Printf.sprintf \"Error while parsing %s\\n\" )^\n" n;
  Printf.fprintf oc "  Printf.sprintf \"Expected one of \"^\n";
  begin
    match expected with
      [] -> Printf.fprintf oc "Printf.sprintf \"{}\" ^\n"
    | a::r -> 
      List.iteri (fun i t ->
          Printf.fprintf oc "Printf.sprintf \"%s %%s\" (string_of_symbol default_%s)^\n" (if i = 0 then "{" else ",") t;
        ) (a::r);
      Printf.fprintf oc "Printf.sprintf \"}\" ^ \n"
  end;
  Printf.fprintf oc "  Printf.sprintf \" but got '%%s' instead.\\n\" got\n";
  Printf.fprintf oc "  )";
  Printf.fprintf oc "\n  end\n\n"

let make_parser  (table: string*string -> lltype list)
    (toks,nts,rules,mlcode)
    (typ: (tokent * string) list)
    oc () =
  Stdlib.Option.iter (fun mlcode -> Printf.fprintf oc "\n\n%s\n\n" mlcode) mlcode;
  List.iter (fun t ->
      begin match List.assoc_opt t typ with
        | Some ty ->
          begin
            Printf.fprintf oc "let is_%s = function \n" t;
            Printf.fprintf oc " | %s _ -> true\n" t;
            Printf.fprintf oc " | _ -> false\n";

            Printf.fprintf oc "let default_%s = %s %s\n" t t
                       (match ty with
                          "string" -> "\"\""
                        | "int" -> "0"
                        | "bool" -> "false"
                        | _ -> failwith (Printf.sprintf "Don't know how to generate a default value of type %s" ty)
                )
          end
        | None -> begin
            Printf.fprintf oc "let is_%s = function \n" t;
            Printf.fprintf oc " | %s -> true\n" t;
            Printf.fprintf oc " | _ -> false\n";
            Printf.fprintf oc "let default_%s = %s\n" t t
          end
      end;
    ) toks;
  List.iter (fun t ->
      Printf.fprintf oc "let eat_%s = function \n" t;
      begin match List.assoc_opt t typ with
        | Some _ -> Printf.fprintf oc "| (%s(x),_) :: rtokens -> OK (x, rtokens)\n" t
        | None -> Printf.fprintf oc "| (%s,_) :: rtokens -> OK ((), rtokens)\n" t
      end;
      Printf.fprintf oc "|   (x,Some pos) :: _ -> Error (Printf.sprintf \"At position %%s, expected %%s, got %%s.\\n\"";
      Printf.fprintf oc "    (string_of_position pos)";
      Printf.fprintf oc "    (string_of_symbol default_%s)" t;
      Printf.fprintf oc "    (string_of_symbol x))";
      Printf.fprintf oc "    | (x,None) :: _ -> Error (Printf.sprintf \"Expected %%s, got %%s.\\n\"";
      Printf.fprintf oc "    (string_of_symbol default_%s)" t;
      Printf.fprintf oc "    (string_of_symbol x))";
      Printf.fprintf oc "    | _ -> Error  (Printf.sprintf \"Expected %%s, got EOF.\\n\" (string_of_symbol default_%s))\n" t;

    ) toks;
  Printf.fprintf oc "let rec ____unused = () \n";
  List.iter (fun n -> make_nt table (toks,nts,rules) oc n ()) nts

let nts_ordered start (toks,nts,rules) =
  let nts =
    let rec aux acc nt =
      if List.mem nt acc then acc
      else let acc = nt::acc in
        let rules : (string list * string) list =
          filter_map (fun {rule_nt; rule_prods; rule_action} ->
              if rule_nt = nt
              then Some (rule_prods, match rule_action with Some a -> a | None -> "")
              else None) rules
        in
        List.fold_left (fun acc (rule,act) ->
            List.fold_left (fun acc tokornt ->
                if List.mem tokornt toks then acc else aux acc tokornt
              ) acc rule
          ) acc rules
    in List.rev (aux [] start)
  in
  let rules =
    List.concat (List.map (fun nt -> List.filter (fun r -> r.rule_nt = nt) rules) nts)
  in (nts,rules)

let _ =
  let grammar_file = ref None in
  let table_file = ref None in
  let parser_ml_file = ref None in

  Arg.parse
    [("-g", Arg.String (fun s -> grammar_file := Some s), "Input grammar file (.g)");
     ("-t", Arg.String (fun s -> table_file := Some s), "Where to output tables (.html)");
     ("-pml", Arg.String (fun s -> parser_ml_file := Some s), "Where to output the parser code (.ml)");
    ] print_endline "Usage: ";
  match !grammar_file with
  | None -> failwith "Please specify a grammar file using '-g <grammar.g>'"
  | Some gramfile ->
    let gram, axiom = parse_grammar gramfile in
    let (toks, nts, rules, mlcode) =
      (gram.tokens, gram.nonterms, gram.rules, gram.mlcode) in
    let toks = List.map fst gram.tokens in
    let (nts, rules) = nts_ordered axiom (toks,nts,rules) in
    iter_nullnt (toks, nts, rules) ();
    iter_first (toks, nts, rules) ();
    iter_follownt (toks, nts, rules) ();
    fill_lltable (toks, nts, rules) ();
    (match !table_file with
     | Some tfile -> let oc = open_out tfile in
       print_html (toks, nts, rules) (Format.formatter_of_out_channel oc) ();
       close_out oc
     | None -> ());
    (match !parser_ml_file with
     | None -> Printf.fprintf stderr "Please specify where I should write the generated parser code using '-pml <generated_parser.ml>'"
     | Some mlfile ->
       let oc = open_out mlfile in
       make_parser (fun (n,t) -> hashget_def lltable (n,t) [])
         (toks, nts, rules, mlcode)
         (List.filter_map (fun (t,o) ->
              match o with
              | None -> None
              | Some typ -> Some (t,typ)
            ) gram.tokens)
         oc ();
       close_out oc
    )