Commit f756b069 authored by Armillon Damien's avatar Armillon Damien
Browse files

Lexer OK

parent 8819df69
......@@ -108,7 +108,7 @@ let list_regexp : (regexp * (string -> token option)) list =
Star (Alt (
char_range (List.filter (fun c -> c <> '*') alphabet),
Cat (Star(char_regexp '*'),
plus(char_range (List.filter (fun c -> c <> '/' && c <> '*') alphabet)))
plus(char_range (List.filter (fun c -> c <> '/') alphabet)))
)),
keyword_regexp "*/")),
fun s -> None);
......
......@@ -42,17 +42,18 @@ let empty_nfa =
nfa_final = [];
nfa_step = fun q -> [];
}
let print_set = fun s -> Set.iter (Printf.printf "%d ") s; Printf.printf("\n")
(* Concaténation de NFAs. *)
let cat_nfa n1 n2 =
let n1_final_state = List.map (fst) n1.nfa_final in
let new_nfa_step = fun q ->
if List.mem q n2.nfa_states then n2.nfa_step q
else if List.mem q n1_final_state then List.map (fun q' -> (None,q')) n2.nfa_initial @ n1.nfa_step q
if List.mem q n1_final_state then List.map (fun q' -> (None,q')) n2.nfa_initial @ n1.nfa_step q
else if List.mem q n2.nfa_states then n2.nfa_step q
else n1.nfa_step q in
{
nfa_states = n1.nfa_states @ n2.nfa_states;
nfa_initial = n1.nfa_initial;
nfa_states = n1.nfa_states @ n2.nfa_states(*(List.filter (fun state -> not (List.mem state n2.nfa_initial)) n2.nfa_states)*);
nfa_initial = n1.nfa_initial;
nfa_final = n2.nfa_final;
nfa_step = new_nfa_step;
}
......@@ -74,8 +75,8 @@ let star_nfa n t =
let final_states = List.map (fst) n.nfa_final in
{
nfa_states = n.nfa_states;
nfa_initial = n.nfa_initial @ final_states;
nfa_final = n.nfa_final (* List.map (fun q -> (q,t)) final_states *);
nfa_initial = final_states;
nfa_final = List.map (fun q -> (q,t)) final_states ;
nfa_step = fun q -> if List.mem q final_states
then List.map (fun q' -> (None,q')) n.nfa_initial @ n.nfa_step q
else n.nfa_step q;
......@@ -210,7 +211,6 @@ let assoc_merge_vals (l : ('a * 'b) list) : ('a * 'b set) list =
| None -> (k, Set.singleton v)::acc
| Some vl -> (k, Set.add v vl)::List.remove_assoc k acc
) [] l
let print_set = fun s -> Set.iter (Printf.printf "%d ") s; Printf.printf("\n")
let rec build_dfa_table (table: (dfa_state, (char * dfa_state) list) Hashtbl.t)
(n: nfa)
......@@ -273,29 +273,18 @@ let min_priority (l: token list) : token option =
accompagnés du token qu'ils reconnaissent. *)
let dfa_final_states (n: nfa) (dfa_states: dfa_state list) :
(dfa_state * (string -> token option)) list =
let extended_final = Hashtbl.create (List.length n.nfa_final) in
List.iter (fun (final,_) -> Hashtbl.add extended_final final Set.empty) n.nfa_final;
Set.iter (fun state ->
let closure = epsilon_closure_set n (Set.singleton state) in
let final_in_closure = List.filter_map (fun (final,_) -> let bite = Set.mem final closure in if bite then Printf.printf "%d et %d\n" final state; if bite then Some final else None) n.nfa_final in
List.iter (fun final -> Hashtbl.replace extended_final final (Set.union (Set.singleton state) (Hashtbl.find extended_final final) )) final_in_closure
) (Set.of_list n.nfa_states);
(* print_set (Hashtbl.find extended_final 20); *)
List.map (fun state_set ->
List.filter_map (fun state_set ->
let final_states =
Set.filter_map (
fun state -> List.find_opt (fun nfa_final -> fst nfa_final = state) n.nfa_final
) state_set in
if not (Set.is_empty final_states) then
let (final_sates_names,final_sates_functions) = Set.fold (
fun (name,f) (acc_name,acc_fun) -> (Set.union (Set.singleton name) acc_name ,f::acc_fun)
) final_states (Set.empty,[]) in
let return_function = fun s -> min_priority (List.filter_map (fun f -> f s) final_sates_functions) in
(* let a = (Set.fold (fun final acc -> Set.union (Hashtbl.find extended_final final) acc ) final_sates_names Set.empty) in *)
(* avant juste final_sates_names (avec epsilon_closure_set c'était mieux) *)
(* print_set final_sates_names;
print_set a;
Printf.printf "Ouais :\n"; *)
(epsilon_closure_set n final_sates_names,return_function)
Some (state_set,return_function)
else None
) dfa_states
......@@ -372,13 +361,14 @@ type lexer_result =
*)
let to_lexer_result (d:dfa) (q: dfa_state) (w: char list): lexer_result =
let result = List.find_opt (fun (state,_) -> print_set state; Set.equal q state) d.dfa_final in
let result = List.find_opt (fun (state,_) -> Set.equal q state) d.dfa_final in
match result with
| None -> LRerror
| Some (_,f) -> Printf.printf "a"; match f (string_of_char_list w) with
| Some (_,f) -> match f (string_of_char_list w) with
| None -> LRskip
| Some token -> LRtoken token
let tokenize_one (d : dfa) (w: char list) : lexer_result * char list =
let rec recognize (q: dfa_state) (w: char list)
(current_word: char list) (last_accepted: lexer_result * char list)
......@@ -387,12 +377,12 @@ let tokenize_one (d : dfa) (w: char list) : lexer_result * char list =
| [] -> last_accepted
| lettre::r ->
match d.dfa_step q lettre with
| None -> last_accepted
| Some q' -> match to_lexer_result d q' w with
| LRerror -> recognize q' r (current_word @ [lettre]) last_accepted
| lr -> recognize q' r (current_word @ [lettre]) (lr, r)
| None -> (match (fst last_accepted) with | LRtoken(t) -> (match t with | Symbols.SYM_DIV -> Printf.printf "%s\n\n" (string_of_char_list w); last_accepted | _ -> last_accepted) | _ -> last_accepted)
| Some q' -> let new_current_word = current_word @ [lettre] in match to_lexer_result d q' new_current_word with
| LRerror -> recognize q' r (new_current_word) last_accepted
| lr -> recognize q' r (new_current_word) (lr, r)
in
recognize d.dfa_initial w [] (LRerror, w)
recognize d.dfa_initial w [] (LRtoken (Symbols.SYM_EOF), w)
(* La fonction [tokenize_all d w] répète l'application de [tokenize_one] tant qu'on
n'est pas arrivé à la fin du fichier (token [SYM_EOF]). Encore une fois,
......@@ -536,6 +526,9 @@ let nfa_of_list_regexp l =
let dfa_of_list_regexp l =
let n = nfa_of_list_regexp l in
let oc = open_out "/tmp/nfa.dot" in
nfa_to_dot oc n;
close_out oc;
dfa_of_nfa n
let tokenize_list_regexp l s =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment