Commit 8819df69 authored by Armillon Damien's avatar Armillon Damien
Browse files

work mokay

parent 8a87987a
......@@ -69,35 +69,35 @@ let list_regexp : (regexp * (string -> token option)) list =
(keyword_regexp "while", fun s -> Some (SYM_WHILE));
(keyword_regexp "int", fun s -> Some (SYM_INT));
(* begin TODO *)
(Eps, fun s -> Some (SYM_VOID));
(Eps, fun s -> Some (SYM_CHAR));
(Eps, fun s -> Some (SYM_IF));
(Eps, fun s -> Some (SYM_ELSE));
(Eps, fun s -> Some (SYM_RETURN));
(Eps, fun s -> Some (SYM_PRINT));
(Eps, fun s -> Some (SYM_STRUCT));
(Eps, fun s -> Some (SYM_POINT));
(Eps, fun s -> Some (SYM_PLUS));
(Eps, fun s -> Some (SYM_MINUS));
(Eps, fun s -> Some (SYM_ASTERISK));
(Eps, fun s -> Some (SYM_DIV));
(Eps, fun s -> Some (SYM_MOD));
(Eps, fun s -> Some (SYM_LBRACE));
(Eps, fun s -> Some (SYM_RBRACE));
(Eps, fun s -> Some (SYM_LBRACKET));
(Eps, fun s -> Some (SYM_RBRACKET));
(Eps, fun s -> Some (SYM_LPARENTHESIS));
(Eps, fun s -> Some (SYM_RPARENTHESIS));
(Eps, fun s -> Some (SYM_SEMICOLON));
(Eps, fun s -> Some (SYM_COMMA));
(Eps, fun s -> Some (SYM_ASSIGN));
(Eps, fun s -> Some (SYM_EQUALITY));
(Eps, fun s -> Some (SYM_NOTEQ));
(Eps, fun s -> Some (SYM_LT));
(Eps, fun s -> Some (SYM_GT));
(Eps, fun s -> Some (SYM_LEQ));
(Eps, fun s -> Some (SYM_GEQ));
(Eps, fun s -> Some (SYM_IDENTIFIER s));
(keyword_regexp "void", fun s -> Some (SYM_VOID));
(keyword_regexp "char", fun s -> Some (SYM_CHAR));
(keyword_regexp "if", fun s -> Some (SYM_IF));
(keyword_regexp "else", fun s -> Some (SYM_ELSE));
(keyword_regexp "return", fun s -> Some (SYM_RETURN));
(keyword_regexp "print", fun s -> Some (SYM_PRINT));
(keyword_regexp "struct", fun s -> Some (SYM_STRUCT));
(keyword_regexp ".", fun s -> Some (SYM_POINT));
(keyword_regexp "+", fun s -> Some (SYM_PLUS));
(keyword_regexp "-", fun s -> Some (SYM_MINUS));
(keyword_regexp "*", fun s -> Some (SYM_ASTERISK));
(keyword_regexp "/", fun s -> Some (SYM_DIV));
(keyword_regexp "%", fun s -> Some (SYM_MOD));
(keyword_regexp "{", fun s -> Some (SYM_LBRACE));
(keyword_regexp "}", fun s -> Some (SYM_RBRACE));
(keyword_regexp "[", fun s -> Some (SYM_LBRACKET));
(keyword_regexp "]", fun s -> Some (SYM_RBRACKET));
(keyword_regexp "(", fun s -> Some (SYM_LPARENTHESIS));
(keyword_regexp ")", fun s -> Some (SYM_RPARENTHESIS));
(keyword_regexp ";", fun s -> Some (SYM_SEMICOLON));
(keyword_regexp ",", fun s -> Some (SYM_COMMA));
(keyword_regexp "=", fun s -> Some (SYM_ASSIGN));
(keyword_regexp "==", fun s -> Some (SYM_EQUALITY));
(keyword_regexp "!=", fun s -> Some (SYM_NOTEQ));
(keyword_regexp "<", fun s -> Some (SYM_LT));
(keyword_regexp ">", fun s -> Some (SYM_GT));
(keyword_regexp "<=", fun s -> Some (SYM_LEQ));
(keyword_regexp ">=", fun s -> Some (SYM_GEQ));
(Cat (letter_regexp,Star(identifier_material)), fun s -> Some (SYM_IDENTIFIER s));
(* end TODO *)
(Cat(keyword_regexp "//",
Cat(Star (char_range (List.filter (fun c -> c <> '\n') alphabet)),
......@@ -144,5 +144,4 @@ let list_regexp : (regexp * (string -> token option)) list =
(char_range (char_list_of_string " \t\n"), fun s -> None);
(plus digit_regexp, fun s -> Some (SYM_INTEGER (int_of_string s)));
(Eps, fun s -> Some (SYM_EOF))
]
]
\ No newline at end of file
......@@ -45,19 +45,41 @@ let empty_nfa =
(* Concaténation de NFAs. *)
let cat_nfa n1 n2 =
(* TODO *)
empty_nfa
let n1_final_state = List.map (fst) n1.nfa_final in
let new_nfa_step = fun q ->
if List.mem q n2.nfa_states then n2.nfa_step q
else if List.mem q n1_final_state then List.map (fun q' -> (None,q')) n2.nfa_initial @ n1.nfa_step q
else n1.nfa_step q in
{
nfa_states = n1.nfa_states @ n2.nfa_states;
nfa_initial = n1.nfa_initial;
nfa_final = n2.nfa_final;
nfa_step = new_nfa_step;
}
(* Alternatives de NFAs *)
let alt_nfa n1 n2 =
(* TODO *)
empty_nfa
{
nfa_states = n1.nfa_states @ n2.nfa_states;
nfa_initial = n1.nfa_initial @ n2.nfa_initial;
nfa_final = n1.nfa_final @ n2.nfa_final;
nfa_step = fun q ->
if List.mem q n1.nfa_states then n1.nfa_step q
else n2.nfa_step q
}
(* Répétition de NFAs *)
(* t est de type [string -> token option] *)
let star_nfa n t =
(* TODO *)
empty_nfa
let final_states = List.map (fst) n.nfa_final in
{
nfa_states = n.nfa_states;
nfa_initial = n.nfa_initial @ final_states;
nfa_final = n.nfa_final (* List.map (fun q -> (q,t)) final_states *);
nfa_step = fun q -> if List.mem q final_states
then List.map (fun q' -> (None,q')) n.nfa_initial @ n.nfa_step q
else n.nfa_step q;
}
(* [nfa_of_regexp r freshstate t] construit un NFA qui reconnaît le même langage
......@@ -77,8 +99,14 @@ let rec nfa_of_regexp r freshstate t =
nfa_final = [freshstate + 1, t];
nfa_step = fun q -> if q = freshstate then [(Some c, freshstate + 1)] else []
}, freshstate + 2
(* TODO *)
| _ -> empty_nfa, freshstate
| Alt (r1, r2) -> let (nfa1, freshstate1) = nfa_of_regexp r1 freshstate t in
let (nfa2, freshstate2) = nfa_of_regexp r2 freshstate1 t in
(alt_nfa nfa1 nfa2, freshstate2)
| Cat (r1, r2) -> let (nfa1, freshstate1) = nfa_of_regexp r1 freshstate t in
let (nfa2, freshstate2) = nfa_of_regexp r2 freshstate1 t in
(cat_nfa nfa1 nfa2, freshstate2)
| Star r -> let (nfa1, freshstate1) = nfa_of_regexp r freshstate t in
(star_nfa nfa1 t , freshstate1)
(* Deterministic Finite Automaton (DFA) *)
......@@ -119,21 +147,24 @@ let epsilon_closure (n: nfa) (s: nfa_state) : nfa_state set =
(* La fonction [traversal visited s] effectue un parcours de l'automate en
partant de l'état [s], et en suivant uniquement les epsilon-transitions. *)
let rec traversal (visited: nfa_state set) (s: nfa_state) : nfa_state set =
(* TODO *)
visited
let new_visited = Set.add s visited in
let eps_transition = List.filter_map (
fun (step, state) -> if (step = None && not (Set.mem state visited)) then Some state else None
) (n.nfa_step s) in
List.fold_left (
fun accumulator transition -> Set.union accumulator (traversal accumulator transition)
) new_visited eps_transition
in
traversal Set.empty s
(* [epsilon_closure_set n ls] calcule l'union des epsilon-fermeture de chacun
des états du NFA [n] dans l'ensemble [ls]. *)
let epsilon_closure_set (n: nfa) (ls: nfa_state set) : nfa_state set =
(* TODO *)
ls
Set.fold (fun state acc-> Set.union acc (epsilon_closure n state)) ls Set.empty
(* [dfa_initial_state n] calcule l'état initial de l'automate déterminisé. *)
let dfa_initial_state (n: nfa) : dfa_state =
(* TODO *)
Set.empty
epsilon_closure_set n (Set.of_list n.nfa_initial)
(* Construction de la table de transitions de l'automate DFA. *)
......@@ -179,6 +210,7 @@ let assoc_merge_vals (l : ('a * 'b) list) : ('a * 'b set) list =
| None -> (k, Set.singleton v)::acc
| Some vl -> (k, Set.add v vl)::List.remove_assoc k acc
) [] l
let print_set = fun s -> Set.iter (Printf.printf "%d ") s; Printf.printf("\n")
let rec build_dfa_table (table: (dfa_state, (char * dfa_state) list) Hashtbl.t)
(n: nfa)
......@@ -189,8 +221,14 @@ let rec build_dfa_table (table: (dfa_state, (char * dfa_state) list) Hashtbl.t)
(* [transitions] contient les transitions du DFA construites
* à partir des transitions du NFA comme décrit auparavant *)
let transitions : (char * dfa_state) list =
(* TODO *)
[]
let nfa_transitions = List.flatten (
Set.elements (
Set.map (n.nfa_step) ds
)) in
let non_eps_transitions = assoc_throw_none nfa_transitions in
let distributed_transitions = assoc_distribute_key non_eps_transitions in
let merged_transitions = assoc_merge_vals distributed_transitions in
List.map (fun (c,states) -> (c,epsilon_closure_set n states)) merged_transitions
in
Hashtbl.replace table ds transitions;
List.iter (build_dfa_table table n) (List.map snd transitions)
......@@ -225,24 +263,53 @@ let priority t =
(* [min_priority l] renvoie le token de [l] qui a la plus petite priorité, ou
[None] si la liste [l] est vide. *)
let min_priority (l: token list) : token option =
(* TODO *)
None
match l with
| [] -> None
| q1::r -> Some (List.fold_left
(fun current_best current -> if priority current > priority current_best then current_best else current)
q1 r)
(* [dfa_final_states n dfa_states] renvoie la liste des états finaux du DFA,
(* [dfa_final_states n dfa_states] renvoie la liste des états finaux du DFA,
accompagnés du token qu'ils reconnaissent. *)
let dfa_final_states (n: nfa) (dfa_states: dfa_state list) :
(dfa_state * (string -> token option)) list =
(* TODO *)
[]
let extended_final = Hashtbl.create (List.length n.nfa_final) in
List.iter (fun (final,_) -> Hashtbl.add extended_final final Set.empty) n.nfa_final;
Set.iter (fun state ->
let closure = epsilon_closure_set n (Set.singleton state) in
let final_in_closure = List.filter_map (fun (final,_) -> let bite = Set.mem final closure in if bite then Printf.printf "%d et %d\n" final state; if bite then Some final else None) n.nfa_final in
List.iter (fun final -> Hashtbl.replace extended_final final (Set.union (Set.singleton state) (Hashtbl.find extended_final final) )) final_in_closure
) (Set.of_list n.nfa_states);
(* print_set (Hashtbl.find extended_final 20); *)
List.map (fun state_set ->
let final_states =
Set.filter_map (
fun state -> List.find_opt (fun nfa_final -> fst nfa_final = state) n.nfa_final
) state_set in
let (final_sates_names,final_sates_functions) = Set.fold (
fun (name,f) (acc_name,acc_fun) -> (Set.union (Set.singleton name) acc_name ,f::acc_fun)
) final_states (Set.empty,[]) in
let return_function = fun s -> min_priority (List.filter_map (fun f -> f s) final_sates_functions) in
(* let a = (Set.fold (fun final acc -> Set.union (Hashtbl.find extended_final final) acc ) final_sates_names Set.empty) in *)
(* avant juste final_sates_names (avec epsilon_closure_set c'était mieux) *)
(* print_set final_sates_names;
print_set a;
Printf.printf "Ouais :\n"; *)
(epsilon_closure_set n final_sates_names,return_function)
) dfa_states
(* Construction de la relation de transition du DFA. *)
let (let*) = Option.bind
(* [make_dfa_step table] construit la fonction de transition du DFA, où [table]
est la table générée par [build_dfa_table], définie ci-dessus. *)
let make_dfa_step (table: (dfa_state, (char * dfa_state) list) Hashtbl.t) =
fun (q: dfa_state) (a: char) ->
(* TODO *)
None
fun (q: dfa_state) (a: char) ->
let* possible_steps = Hashtbl.find_option table q in
let* (_,next_step) = List.find_opt (fun step -> fst step = a) possible_steps in
Some next_step
(* Finalement, on assemble tous ces morceaux pour construire l'automate. La
fonction [dfa_of_nfa n] vous est grâcieusement offerte. *)
......@@ -304,13 +371,26 @@ type lexer_result =
bien d'une erreur lexicale).
*)
let to_lexer_result (d:dfa) (q: dfa_state) (w: char list): lexer_result =
let result = List.find_opt (fun (state,_) -> print_set state; Set.equal q state) d.dfa_final in
match result with
| None -> LRerror
| Some (_,f) -> Printf.printf "a"; match f (string_of_char_list w) with
| None -> LRskip
| Some token -> LRtoken token
let tokenize_one (d : dfa) (w: char list) : lexer_result * char list =
let rec recognize (q: dfa_state) (w: char list)
(current_token: char list) (last_accepted: lexer_result * char list)
(current_word: char list) (last_accepted: lexer_result * char list)
: lexer_result * char list =
(* TODO *)
last_accepted
match w with
| [] -> last_accepted
| lettre::r ->
match d.dfa_step q lettre with
| None -> last_accepted
| Some q' -> match to_lexer_result d q' w with
| LRerror -> recognize q' r (current_word @ [lettre]) last_accepted
| lr -> recognize q' r (current_word @ [lettre]) (lr, r)
in
recognize d.dfa_initial w [] (LRerror, w)
......@@ -460,6 +540,9 @@ let dfa_of_list_regexp l =
let tokenize_list_regexp l s =
let d = dfa_of_list_regexp l in
let oc = open_out "/tmp/dfa.dot" in
dfa_to_dot oc d alphabet;
close_out oc;
let tokens, leftover = tokenize_all d (char_list_of_string s) in
if leftover <> []
then Error (Printf.sprintf "Lexer failed to recognize string starting with '%s'\n"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment