open Batteries open Symbols open Utils (* Expressions régulières *) (* Nous modélisons les expressions régulières avec le type suivant. Une expressions régulière est soit : - [Eps] qui dénote l'expressions vide. - [Charset cs] dénote l'expression régulière qui matche l'ensemble des caractères de l'ensemble [cs]. - [Cat(r1,r2)] dénote la concaténation de [r1] et [r2] : reconnaît les mots [uv] tels que [u] appartient à [r1] et [v] appartient à [r2]. - [Alt(r1,r2)] dénote un choix entre [r1] et [r2] : reconnaît les mots reconnus par [r1] et les mots reconnus par [r2]. - [Star r] dénote la répétition 0, 1 ou plusieurs fois de l'expression [r]. *) type 'a set = 'a Set.t type regexp = | Eps | Charset of char set | Cat of regexp * regexp | Alt of regexp * regexp | Star of regexp (* [char_regexp c] reconnaît le caractère [c] uniquement. *) let char_regexp c = Charset (Set.singleton c) (* [char_range l] reconnaît l'ensemble des caractères de [l]. *) let char_range (l: char list) = Charset (Set.of_list l) (* [str_regexp s] reconnaît la chaîne de caractère [s]. *) let str_regexp (s: char list) = List.fold_right (fun c reg -> Cat(Charset (Set.singleton c), reg)) s Eps (* [plus r] reconnaît 1 fois ou plus l'expression [r]. *) let plus r = Cat(r,Star r) (* Fonction d'affichage. Peut être utile pour déboguer. *) let rec string_of_regexp r = match r with Eps -> "Eps" | Charset c -> Printf.sprintf "[%s]" (string_of_char_list (Set.to_list c)) | Alt (r1,r2) -> Printf.sprintf "(%s)|(%s)" (string_of_regexp r1) (string_of_regexp r2) | Cat (r1,r2) -> Printf.sprintf "(%s).(%s)" (string_of_regexp r1) (string_of_regexp r2) | Star r -> Printf.sprintf "(%s)*" (string_of_regexp r) (* La liste des expressions régulières permettant d'identifier les tokens du langage E *) let list_regexp = let lowercase_letters = "abcdefghijklmnopqrstuvwxyz" in let uppercase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" in let digits = "0123456789" in let other_characters = "?!=<>_ ;,{}()[]-+*/%\n\t" in let alphabet = char_list_of_string (lowercase_letters ^ uppercase_letters ^ digits ^ other_characters) in let letter_regexp = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters)) in let digit_regexp = char_range (char_list_of_string digits) in let keyword_regexp s = str_regexp (char_list_of_string s) in [ (keyword_regexp "while", fun s -> Some (SYM_WHILE)); (keyword_regexp "int", fun s -> Some (SYM_INT)); (* begin TODO *) (Eps, fun s -> Some (SYM_VOID)); (Eps, fun s -> Some (SYM_CHAR)); (Eps, fun s -> Some (SYM_IF)); (Eps, fun s -> Some (SYM_ELSE)); (Eps, fun s -> Some (SYM_RETURN)); (Eps, fun s -> Some (SYM_PRINT)); (Eps, fun s -> Some (SYM_STRUCT)); (Eps, fun s -> Some (SYM_POINT)); (Eps, fun s -> Some (SYM_PLUS)); (Eps, fun s -> Some (SYM_MINUS)); (Eps, fun s -> Some (SYM_ASTERISK)); (Eps, fun s -> Some (SYM_DIV)); (Eps, fun s -> Some (SYM_MOD)); (Eps, fun s -> Some (SYM_LBRACE)); (Eps, fun s -> Some (SYM_RBRACE)); (Eps, fun s -> Some (SYM_LBRACKET)); (Eps, fun s -> Some (SYM_RBRACKET)); (Eps, fun s -> Some (SYM_LPARENTHESIS)); (Eps, fun s -> Some (SYM_RPARENTHESIS)); (Eps, fun s -> Some (SYM_SEMICOLON)); (Eps, fun s -> Some (SYM_COMMA)); (Eps, fun s -> Some (SYM_ASSIGN)); (Eps, fun s -> Some (SYM_EQUALITY)); (Eps, fun s -> Some (SYM_NOTEQ)); (Eps, fun s -> Some (SYM_LT)); (Eps, fun s -> Some (SYM_GT)); (Eps, fun s -> Some (SYM_LEQ)); (Eps, fun s -> Some (SYM_GEQ)); (Eps, fun s -> Some (SYM_IDENTIFIER s)); (* end TODO *) (Cat(keyword_regexp "//", Cat(Star (char_range (List.filter (fun c -> c <> '\n') alphabet)), Alt (char_regexp '\n', Eps))), fun s -> None); (Cat(keyword_regexp "/*", Cat( Star (Alt (char_range (List.filter (fun c -> c <> '*') alphabet), Cat (char_regexp '*', char_range (List.filter (fun c -> c <> '/') alphabet)))), keyword_regexp "*/")), fun s -> None); (Cat (char_regexp '\'', Cat (char_range (List.filter (fun c -> c <> '\'' && c <> '\\') alphabet), char_regexp '\'')), fun s -> match String.get s 1 with | a -> Some (SYM_CHARACTER a) | exception Invalid_argument _ -> Some (SYM_CHARACTER 'a') ); (Cat (char_regexp '\'', Cat (char_regexp '\\', Cat (char_range (char_list_of_string "\\tn0"), char_regexp '\''))), fun s -> match String.get s 2 with | '\\' -> Some (SYM_CHARACTER '\\') | 'n' -> Some (SYM_CHARACTER '\n') | 't' -> Some (SYM_CHARACTER '\t') | '0' -> Some (SYM_CHARACTER 'a') | _ -> None | exception _ -> Some (SYM_CHARACTER 'a') ); (Cat (char_regexp '"', Cat (Star ( Alt ( char_range (List.filter (fun c -> c <> '"' && c <> '\\') alphabet), Cat (char_regexp '\\', char_range (char_list_of_string "tn0\\\"")) ) ), char_regexp '"')), fun s -> Some (SYM_STRING (Stdlib.Scanf.unescaped (String.slice ~first:1 ~last:(-1) s)))); (char_regexp ' ', fun s -> None); (char_regexp '\n', fun s -> None); (char_regexp '\t', fun s -> None); (plus digit_regexp, fun s -> Some (SYM_INTEGER (int_of_string s))); (Eps, fun s -> Some (SYM_EOF)) ]