diff --git a/src/Makefile b/src/Makefile index d90147d46204cbbc892890ff40cab1ae34baf963..2221741279a2c50bdc4b5903a195d3ad0128036e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -17,6 +17,8 @@ $(TG): $(SRC) test_lexer: $(SRC) ocamlbuild -use-ocamlfind test_lexer.native ./test_lexer.native + dot -Tsvg /tmp/dfa.dot -o /tmp/dfa.svg + dot -Tsvg /tmp/nfa.dot -o /tmp/nfa.svg clean: rm -rf _build diff --git a/src/e_regexp.ml b/src/e_regexp.ml index 403fa09172a452cea444cee746ff0bfc1924c033..a59afbdd9e1ea78d1724d3fad657c8a69efa2695 100644 --- a/src/e_regexp.ml +++ b/src/e_regexp.ml @@ -50,15 +50,18 @@ let rec string_of_regexp r = (string_of_regexp r1) (string_of_regexp r2) | Star r -> Printf.sprintf "(%s)*" (string_of_regexp r) + +let lowercase_letters = "abcdefghijklmnopqrstuvwxyz" +let uppercase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +let digits = "0123456789" +let other_characters = "?!=<>_ :;,{}()[]^`-+*/%@\n\t\x00.\"\'\\|~#$&" +let alphabet = char_list_of_string (lowercase_letters ^ uppercase_letters ^ digits ^ other_characters) +let letter_regexp = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters)) +let digit_regexp = char_range (char_list_of_string digits) +let identifier_material = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters ^ digits ^ "_")) + (* La liste des expressions régulières permettant d'identifier les tokens du langage E *) let list_regexp = - let lowercase_letters = "abcdefghijklmnopqrstuvwxyz" in - let uppercase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" in - let digits = "0123456789" in - let other_characters = "?!=<>_ ;,{}()[]-+*/%\n\t" in - let alphabet = char_list_of_string (lowercase_letters ^ uppercase_letters ^ digits ^ other_characters) in - let letter_regexp = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters)) in - let digit_regexp = char_range (char_list_of_string digits) in let keyword_regexp s = str_regexp (char_list_of_string s) in [ (keyword_regexp "while", fun s -> Some (SYM_WHILE)); @@ -114,12 +117,13 @@ let list_regexp = | exception Invalid_argument _ -> Some (SYM_CHARACTER 'a') ); (Cat (char_regexp '\'', Cat (char_regexp '\\', - Cat (char_range (char_list_of_string "\\tn0"), + Cat (char_range (char_list_of_string "\\tn0'"), char_regexp '\''))), fun s -> match String.get s 2 with | '\\' -> Some (SYM_CHARACTER '\\') | 'n' -> Some (SYM_CHARACTER '\n') | 't' -> Some (SYM_CHARACTER '\t') + | '\'' -> Some (SYM_CHARACTER '\'') | '0' -> Some (SYM_CHARACTER 'a') | _ -> None | exception _ -> Some (SYM_CHARACTER 'a') @@ -133,9 +137,7 @@ let list_regexp = ), char_regexp '"')), fun s -> Some (SYM_STRING (Stdlib.Scanf.unescaped (String.slice ~first:1 ~last:(-1) s)))); - (char_regexp ' ', fun s -> None); - (char_regexp '\n', fun s -> None); - (char_regexp '\t', fun s -> None); + (char_range (char_list_of_string " \t\n"), fun s -> None); (plus digit_regexp, fun s -> Some (SYM_INTEGER (int_of_string s))); (Eps, fun s -> Some (SYM_EOF)) ] diff --git a/src/lexer_generator.ml b/src/lexer_generator.ml index fe942dc3935411c9ef3ae876e7861da75081eef6..bd37e8489758ca694c62dbb7fa5063f2fa7c57b5 100644 --- a/src/lexer_generator.ml +++ b/src/lexer_generator.ml @@ -338,7 +338,7 @@ let char_list_to_char_ranges s = match cl with | [] -> (match opt_c with None -> l - | Some c -> l @ [(c,n)] + | Some c -> (c,n)::l ) | c::r -> (match opt_c with | None -> recognize_range r l (Some c) 0 @@ -350,12 +350,17 @@ let char_list_to_char_ranges s = in let l = recognize_range (List.sort Stdlib.compare (List.map Char.code s)) [] None 0 in let escape_char c = - if c = '"' - then "\\\"" else Printf.sprintf "%c" c in + if c = '"' then "\\\"" + else if c = '\\' then "\\\\" + else if c = '\x00' then "\\\\0" + else if c = '\t' then "\\\\t" + else if c = '\n' then "\\\\n" + else Printf.sprintf "%c" c in List.fold_left (fun acc (c,n) -> - if n = 0 - then Printf.sprintf "%s%s" (escape_char (Char.chr c)) acc - else Printf.sprintf "%s-%s%s" (escape_char (Char.chr c)) + match n with + | 0 -> Printf.sprintf "%s%s" (escape_char (Char.chr c)) acc + | 1 -> Printf.sprintf "%s%s%s" (escape_char (Char.chr c)) (c + 1 |> Char.chr |> escape_char) acc + | _ -> Printf.sprintf "%s-%s%s" (escape_char (Char.chr c)) (escape_char (Char.chr (c + n))) acc ) "" l