From 7e8b34aafa97e8ba5fbb486e08d75e9467480571 Mon Sep 17 00:00:00 2001
From: Pierre Wilke <pierre.wilke@centralesupelec.fr>
Date: Fri, 6 Mar 2020 18:16:02 +0100
Subject: [PATCH] Meilleur affichage des automates du lexer.

---
 src/Makefile           |  2 ++
 src/e_regexp.ml        | 24 +++++++++++++-----------
 src/lexer_generator.ml | 17 +++++++++++------
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index d90147d..2221741 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -17,6 +17,8 @@ $(TG): $(SRC)
 test_lexer: $(SRC)
 	ocamlbuild -use-ocamlfind test_lexer.native
 	./test_lexer.native
+	dot -Tsvg /tmp/dfa.dot -o /tmp/dfa.svg
+	dot -Tsvg /tmp/nfa.dot -o /tmp/nfa.svg
 
 clean:
 	rm -rf _build
diff --git a/src/e_regexp.ml b/src/e_regexp.ml
index 403fa09..a59afbd 100644
--- a/src/e_regexp.ml
+++ b/src/e_regexp.ml
@@ -50,15 +50,18 @@ let rec string_of_regexp r =
                      (string_of_regexp r1) (string_of_regexp r2)
   | Star r -> Printf.sprintf "(%s)*" (string_of_regexp r)
 
+
+let lowercase_letters = "abcdefghijklmnopqrstuvwxyz"
+let uppercase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+let digits = "0123456789"
+let other_characters = "?!=<>_ :;,{}()[]^`-+*/%@\n\t\x00.\"\'\\|~#$&"
+let alphabet = char_list_of_string (lowercase_letters ^ uppercase_letters ^ digits ^ other_characters)
+let letter_regexp = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters))
+let digit_regexp = char_range (char_list_of_string digits)
+let identifier_material = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters ^ digits ^ "_"))
+
 (* La liste des expressions régulières permettant d'identifier les tokens du langage E *)
 let list_regexp =
-  let lowercase_letters = "abcdefghijklmnopqrstuvwxyz" in
-  let uppercase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" in
-  let digits = "0123456789" in
-  let other_characters = "?!=<>_ ;,{}()[]-+*/%\n\t" in
-  let alphabet = char_list_of_string (lowercase_letters ^ uppercase_letters ^ digits ^ other_characters) in
-  let letter_regexp = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters)) in
-  let digit_regexp = char_range (char_list_of_string digits) in
   let keyword_regexp s = str_regexp (char_list_of_string s) in
   [
     (keyword_regexp "while",    fun s -> Some (SYM_WHILE));
@@ -114,12 +117,13 @@ let list_regexp =
        | exception Invalid_argument _ -> Some (SYM_CHARACTER 'a')
     );
     (Cat (char_regexp '\'', Cat (char_regexp '\\',
-          Cat (char_range (char_list_of_string "\\tn0"),
+          Cat (char_range (char_list_of_string "\\tn0'"),
                char_regexp '\''))),
      fun s -> match String.get s 2 with
          | '\\' -> Some (SYM_CHARACTER '\\')
          | 'n' -> Some (SYM_CHARACTER '\n')
          | 't' -> Some (SYM_CHARACTER '\t')
+         | '\'' -> Some (SYM_CHARACTER '\'')
          | '0' -> Some (SYM_CHARACTER 'a')
          | _ -> None
          | exception _ -> Some (SYM_CHARACTER 'a')
@@ -133,9 +137,7 @@ let list_regexp =
             ),
                char_regexp '"')),
      fun s -> Some (SYM_STRING (Stdlib.Scanf.unescaped (String.slice ~first:1 ~last:(-1) s))));
-    (char_regexp ' ', fun s -> None);
-    (char_regexp '\n', fun s -> None);
-    (char_regexp '\t', fun s -> None);
+    (char_range (char_list_of_string " \t\n"), fun s -> None);
     (plus digit_regexp, fun s -> Some (SYM_INTEGER (int_of_string s)));
     (Eps, fun s -> Some (SYM_EOF))
   ]
diff --git a/src/lexer_generator.ml b/src/lexer_generator.ml
index fe942dc..bd37e84 100644
--- a/src/lexer_generator.ml
+++ b/src/lexer_generator.ml
@@ -338,7 +338,7 @@ let char_list_to_char_ranges s =
     match cl with
     | [] -> (match opt_c with
           None -> l
-        | Some c -> l @ [(c,n)]
+        | Some c -> (c,n)::l
       )
     | c::r -> (match opt_c with
         | None -> recognize_range r l (Some c) 0
@@ -350,12 +350,17 @@ let char_list_to_char_ranges s =
   in
   let l = recognize_range (List.sort Stdlib.compare (List.map Char.code s)) [] None 0 in
   let escape_char c =
-    if c = '"'
-    then "\\\"" else Printf.sprintf "%c" c in
+    if c = '"' then "\\\""
+    else if c = '\\' then "\\\\"
+    else if c = '\x00' then "\\\\0"
+    else if c = '\t' then "\\\\t"
+    else if c = '\n' then "\\\\n"
+    else Printf.sprintf "%c" c in
   List.fold_left (fun acc (c,n) ->
-      if n = 0
-      then Printf.sprintf "%s%s" (escape_char (Char.chr c)) acc
-      else Printf.sprintf "%s-%s%s" (escape_char (Char.chr c))
+      match n with
+      | 0 -> Printf.sprintf "%s%s" (escape_char (Char.chr c)) acc
+      | 1 -> Printf.sprintf "%s%s%s" (escape_char (Char.chr c)) (c + 1 |> Char.chr |> escape_char) acc
+      | _ -> Printf.sprintf "%s-%s%s" (escape_char (Char.chr c))
           (escape_char (Char.chr (c + n))) acc
     ) "" l
 
-- 
GitLab