diff --git a/.gitignore b/.gitignore index 512be27d3196e8ab747a7153009bc0d80be949cf..18688d3d73c77f7fd791dacec3f54b205e563a59 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ src/_build alpaga/_build grammar.html src/generated_parser.ml +src/config.ml **/*.native alpaga/ml_parser_generator alpaga/alpaga diff --git a/Sujet.pdf b/Sujet.pdf index 9c9948eab943c7c556b18536d7ab8620e30dba72..ec1cc6c137929c9d2144d2d22239f70c628ab468 100644 Binary files a/Sujet.pdf and b/Sujet.pdf differ diff --git a/src/Makefile b/src/Makefile index 3be2372f51ee4697b1e354d1ff921e7d63177f63..fdeb556f81a55237e477b61c4abb435a5cdb2faf 100644 --- a/src/Makefile +++ b/src/Makefile @@ -13,6 +13,10 @@ all: $(TG) $(TG): $(SRC) ocamlbuild -cflags -warn-error,"+a-26" -use-ocamlfind $(TG) +test_lexer: $(SRC) + ocamlbuild -use-ocamlfind test_lexer.native + ./test_lexer.native + clean: rm -rf _build rm -f config.ml main.native diff --git a/src/e_regexp.ml b/src/e_regexp.ml index 4a43143b98e31754a974dd8f3db3fc1b0850b262..53ad47274840914e55992e7827f4552917a81744 100644 --- a/src/e_regexp.ml +++ b/src/e_regexp.ml @@ -46,7 +46,7 @@ let rec string_of_regexp r = | Charset c -> Printf.sprintf "[%s]" (string_of_char_list (Set.to_list c)) | Alt (r1,r2) -> Printf.sprintf "(%s)|(%s)" (string_of_regexp r1) (string_of_regexp r2) - | Cat (r1,r2) -> Printf.sprintf "(%s)(%s)" + | Cat (r1,r2) -> Printf.sprintf "(%s).(%s)" (string_of_regexp r1) (string_of_regexp r2) | Star r -> Printf.sprintf "(%s)*" (string_of_regexp r) diff --git a/src/lexer_generator.ml b/src/lexer_generator.ml index 00311754535022c355d1cce9c2c7aabef5b556c8..5e4f0d3dbf678dec7d53cb186a1fa0aa7baaf61d 100644 --- a/src/lexer_generator.ml +++ b/src/lexer_generator.ml @@ -332,6 +332,30 @@ let rec tokenize_all (d: dfa) (w: char list) : (token list * char list) = (* Fonctions d'affichage - Utile pour déboguer *) + +let char_list_to_char_ranges s = + let rec recognize_range (cl: int list) l opt_c n = + match cl with + | [] -> (match opt_c with + None -> l + | Some c -> l @ [(c,n)] + ) + | c::r -> (match opt_c with + | None -> recognize_range r l (Some c) 0 + | Some c' -> + if c' + n + 1 = c + then recognize_range r l (Some c') (n + 1) + else recognize_range r ((c',n)::l) (Some c) 0 + ) + in + let l = recognize_range (List.sort Stdlib.compare (List.map Char.code s)) [] None 0 in + List.fold_left (fun acc (c,n) -> + if n = 0 + then Printf.sprintf "%c%s" (Char.chr c) acc + else Printf.sprintf "%c-%c%s" (Char.chr c) (Char.chr (c + n)) acc + ) "" l + + (* Affichage d'un NFA *) let nfa_to_string (n : nfa) : string = Printf.sprintf "===== NFA\nStates : %s\nInitial states : %s\nFinal states : %s\n%s" @@ -348,6 +372,24 @@ let nfa_to_string (n : nfa) : string = ) l) ) n.nfa_states)) +let nfa_to_dot oc (n : nfa) : unit = + Printf.fprintf oc "digraph {\n"; + List.iter (fun n -> Printf.fprintf oc "N%d [shape=\"house\" color=\"red\"]\n" n) (n.nfa_initial); + List.iter (fun (q,t) -> + Printf.fprintf oc "N%d [shape=\"rectangle\", label=\"%s\"]\n" + q (match t "0" with | Some s -> string_of_symbol s | None -> "" )) n.nfa_final; + List.iter (fun q -> + List.iter (fun (cso, q') -> + match cso with + | None -> + Printf.fprintf oc "N%d -> N%d [label=\"[epsilon]\"]\n" q q' + | Some cs -> + Printf.fprintf oc "N%d -> N%d [label=\"[%s]\"]\n" q q' (char_list_to_char_ranges (Set.to_list cs)) + ) (n.nfa_step q); + ) n.nfa_states; + Printf.fprintf oc "}\n" + + (* Affichage d'un DFA *) let dfa_to_string (n : dfa) (alphabet: char list): string = Printf.sprintf "===== DFA\nStates : %s\nInitial state : %s\nFinal states : [%s]\n%s" @@ -373,28 +415,6 @@ let dfa_to_string (n : dfa) (alphabet: char list): string = bien en copiant le code DOT dans un convertisseur en ligne (par exemple : http://proto.informatics.jax.org/prototypes/dot2svg/). *) -let char_list_to_char_ranges s = - let rec recognize_range (cl: int list) l opt_c n = - match cl with - | [] -> (match opt_c with - None -> l - | Some c -> l @ [(c,n)] - ) - | c::r -> (match opt_c with - | None -> recognize_range r l (Some c) 0 - | Some c' -> - if c' + n + 1 = c - then recognize_range r l (Some c') (n + 1) - else recognize_range r ((c',n)::l) (Some c) 0 - ) - in - let l = recognize_range (List.sort Stdlib.compare (List.map Char.code s)) [] None 0 in - List.fold_left (fun acc (c,n) -> - if n = 0 - then Printf.sprintf "%c%s" (Char.chr c) acc - else Printf.sprintf "%c-%c%s" (Char.chr c) (Char.chr (c + n)) acc - ) "" l - let dfa_to_dot oc (n : dfa) (cl: char list): unit = Printf.fprintf oc "digraph {\n"; Printf.fprintf oc "N%s [shape=\"house\" color=\"red\"]\n" (string_of_int_set n.dfa_initial);