Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • theo.putegnat/infosec-ecomp
  • damien.armillon/infosec-ecomp
  • mouhamed.sougou/infosec-ecomp
  • cidre-public/compilation/infosec-ecomp
4 results
Show changes
(menhir
(modules yaccparser)
(flags (--unused-tokens))
; unused-tokens
)
(ocamllex lexer)
(executables
(names main test_lexer)
(libraries
batteries
yojson
lwt
websocket
websocket-lwt-unix
menhirLib
)
(flags (:standard -warn-error -A -w -27 -w -33 -w -9 -w -39))
)
(lang dune 2.9)
(using menhir 2.1)
open Batteries
open Symbols
open Utils
(* Expressions régulières *)
(* Nous modélisons les expressions régulières avec le type suivant.
Une expressions régulière est soit :
- [Eps] qui dénote l'expressions vide.
- [Charset cs] dénote l'expression régulière qui matche l'ensemble
des caractères de l'ensemble [cs].
- [Cat(r1,r2)] dénote la concaténation de [r1] et [r2] : reconnaît les mots
[uv] tels que [u] appartient à [r1] et [v] appartient à [r2].
- [Alt(r1,r2)] dénote un choix entre [r1] et [r2] : reconnaît les mots reconnus
par [r1] et les mots reconnus par [r2].
- [Star r] dénote la répétition 0, 1 ou plusieurs fois de l'expression [r].
*)
type 'a set = 'a Set.t
type regexp =
| Eps
| Charset of char set
| Cat of regexp * regexp
| Alt of regexp * regexp
| Star of regexp
(* [char_regexp c] reconnaît le caractère [c] uniquement. *)
let char_regexp c = Charset (Set.singleton c)
(* [char_range l] reconnaît l'ensemble des caractères de [l]. *)
let char_range (l: char list) =
Charset (Set.of_list l)
(* [str_regexp s] reconnaît la chaîne de caractère [s]. *)
let str_regexp (s: char list) =
List.fold_right (fun c reg -> Cat(Charset (Set.singleton c), reg)) s Eps
(* [plus r] reconnaît 1 fois ou plus l'expression [r]. *)
let plus r = Cat(r,Star r)
(* Fonction d'affichage. Peut être utile pour déboguer. *)
let rec string_of_regexp r =
match r with
Eps -> "Eps"
| Charset c -> Printf.sprintf "[%s]" (string_of_char_list (Set.to_list c))
| Alt (r1,r2) -> Printf.sprintf "(%s)|(%s)"
(string_of_regexp r1) (string_of_regexp r2)
| Cat (r1,r2) -> Printf.sprintf "(%s).(%s)"
(string_of_regexp r1) (string_of_regexp r2)
| Star r -> Printf.sprintf "(%s)*" (string_of_regexp r)
let lowercase_letters = "abcdefghijklmnopqrstuvwxyz"
let uppercase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
let digits = "0123456789"
let other_characters = "?!=<>_ :;,{}()[]^`-+*/%@\n\t\x00.\"\'\\|~#$&"
(* L'opérateur ^ dénote la concaténation des chaînes de caractères. *)
let alphabet = char_list_of_string (lowercase_letters ^ uppercase_letters ^ digits ^ other_characters)
let letter_regexp = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters))
let digit_regexp = char_range (char_list_of_string digits)
let identifier_material = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters ^ digits ^ "_"))
let keyword_regexp s = str_regexp (char_list_of_string s)
(* La liste des expressions régulières permettant d'identifier les tokens du langage E *)
let list_regexp : (regexp * (string -> token option)) list =
[
(keyword_regexp "while", fun _ -> Some (SYM_WHILE));
(keyword_regexp "int", fun _ -> Some (SYM_INT));
(* begin TODO *)
(Eps, fun _ -> Some (SYM_VOID));
(Eps, fun _ -> Some (SYM_CHAR));
(Eps, fun _ -> Some (SYM_IF));
(Eps, fun _ -> Some (SYM_ELSE));
(Eps, fun _ -> Some (SYM_RETURN));
(Eps, fun _ -> Some (SYM_PRINT));
(Eps, fun _ -> Some (SYM_STRUCT));
(Eps, fun _ -> Some (SYM_POINT));
(Eps, fun _ -> Some (SYM_PLUS));
(Eps, fun _ -> Some (SYM_MINUS));
(Eps, fun _ -> Some (SYM_ASTERISK));
(Eps, fun _ -> Some (SYM_DIV));
(Eps, fun _ -> Some (SYM_MOD));
(Eps, fun _ -> Some (SYM_LBRACE));
(Eps, fun _ -> Some (SYM_RBRACE));
(Eps, fun _ -> Some (SYM_LBRACKET));
(Eps, fun _ -> Some (SYM_RBRACKET));
(Eps, fun _ -> Some (SYM_LPARENTHESIS));
(Eps, fun _ -> Some (SYM_RPARENTHESIS));
(Eps, fun _ -> Some (SYM_SEMICOLON));
(Eps, fun _ -> Some (SYM_COMMA));
(Eps, fun _ -> Some (SYM_ASSIGN));
(Eps, fun _ -> Some (SYM_EQUALITY));
(Eps, fun _ -> Some (SYM_NOTEQ));
(Eps, fun _ -> Some (SYM_LT));
(Eps, fun _ -> Some (SYM_GT));
(Eps, fun _ -> Some (SYM_LEQ));
(Eps, fun _ -> Some (SYM_GEQ));
(Eps, fun s -> Some (SYM_IDENTIFIER s));
(* end TODO *)
(Cat(keyword_regexp "//",
Cat(Star (char_range (List.filter (fun c -> c <> '\n') alphabet)),
Alt (char_regexp '\n', Eps))),
fun _ -> None);
(Cat(keyword_regexp "/*",
Cat(
Cat (Star (Alt (
char_range (List.filter (fun c -> c <> '*') alphabet),
Cat (Star(char_regexp '*'),
plus(char_range (List.filter (fun c -> c <> '/' && c <> '*') alphabet)))
)), Star (char_range ['*'])),
keyword_regexp "*/")),
fun _ -> None);
(Cat (char_regexp '\'',
Cat (char_range (List.filter (fun c -> c <> '\'' && c <> '\\') alphabet),
char_regexp '\'')),
fun s ->
match String.get s 1 with
| a -> Some (SYM_CHARACTER a)
| exception Invalid_argument _ -> Some (SYM_CHARACTER 'a')
);
(Cat (char_regexp '\'', Cat (char_regexp '\\',
Cat (char_range (char_list_of_string "\\tn0'"),
char_regexp '\''))),
fun s -> match String.get s 2 with
| '\\' -> Some (SYM_CHARACTER '\\')
| 'n' -> Some (SYM_CHARACTER '\n')
| 't' -> Some (SYM_CHARACTER '\t')
| '\'' -> Some (SYM_CHARACTER '\'')
| '0' -> Some (SYM_CHARACTER 'a')
| _ -> None
| exception _ -> Some (SYM_CHARACTER 'a')
);
(Cat (char_regexp '"',
Cat (Star (
Alt (
char_range (List.filter (fun c -> c <> '"' && c <> '\\') alphabet),
Cat (char_regexp '\\', char_range (char_list_of_string "tn0\\\""))
)
),
char_regexp '"')),
fun s -> Some (SYM_STRING (Stdlib.Scanf.unescaped (String.slice ~first:1 ~last:(-1) s))));
(char_range (char_list_of_string " \t\n"), fun _ -> None);
(plus digit_regexp, fun s -> Some (SYM_INTEGER (int_of_string s)));
(Eps, fun _ -> Some (SYM_EOF))
]
open Ast
open Batteries
open Prog
open Utils
type binop = Eadd | Emul | Emod | Exor | Ediv | Esub (* binary operations *)
| Eclt | Ecle | Ecgt | Ecge | Eceq | Ecne (* comparisons *)
......
open Ast
open Elang
open Prog
open Report
open Options
open Batteries
open Elang_print
open Utils
let tag_is_binop =
......@@ -54,6 +58,7 @@ let rec make_eexpr_of_ast (a: tree) : expr res =
let rec make_einstr_of_ast (a: tree) : instr res =
let res =
match a with
(* TODO *)
| _ -> Error (Printf.sprintf "Unacceptable ast in make_einstr_of_ast %s"
(string_of_ast a))
in
......@@ -81,9 +86,18 @@ let make_fundef_of_ast (a: tree) : (string * efun) res =
let make_eprog_of_ast (a: tree) : eprog res =
match a with
| Node (Tlistglobdef, [a]) ->
make_fundef_of_ast a >>= fun (fname, efun) ->
OK [(fname, Gfun efun)]
| Node (Tlistglobdef, l) ->
list_map_res (fun a -> make_fundef_of_ast a >>= fun (fname, efun) -> OK (fname, Gfun efun)) l
| _ ->
Error (Printf.sprintf "make_fundef_of_ast: Expected a Tlistglobdef, got %s."
(string_of_ast a))
let pass_elang ast =
match make_eprog_of_ast ast with
| Error msg ->
record_compile_result ~error:(Some msg) "Elang";
Error msg
| OK ep ->
dump !e_dump dump_e ep (fun file () ->
add_to_report "e" "E" (Code (file_contents file))); OK ep
open Elang
open Batteries
open BatList
open Prog
open Utils
open Builtins
open Utils
let binop_bool_to_int f x y = if f x y then 1 else 0
......@@ -24,36 +21,35 @@ let eval_unop (u: unop) : int -> int =
let rec eval_eexpr st (e : expr) : int res =
Error "eval_eexpr not implemented yet."
(* [eval_einstr oc st ins] evaluates the instruction [ins] in starting state
[st].
(* [eval_einstr oc st ins] évalue l'instrution [ins] en partant de l'état [st].
The parameter [oc], unused for now, is an output channel in which functions
like "print" will write their output, when we add them.
Le paramètre [oc] est un "output channel", dans lequel la fonction "print"
écrit sa sortie, au moyen de l'instruction [Format.fprintf].
This function returns [(ret, st')] :
Cette fonction renvoie [(ret, st')] :
- [ret] is an [int option]. [Some v] should be returned when a return
instruction is met. [None] means that execution should continue.
- [ret] est de type [int option]. [Some v] doit être renvoyé lorsqu'une
instruction [return] est évaluée. [None] signifie qu'aucun [return] n'a eu
lieu et que l'exécution doit continuer.
- [st'] is the updated state.
*)
- [st'] est l'état mis à jour. *)
let rec eval_einstr oc (st: int state) (ins: instr) :
(int option * int state) res =
Error "eval_einstr not implemented yet."
(* [eval_efun oc st f fname vargs] evaluates function [f] (whose name is
[fname]) starting in state [st], with arguments given in [vargs].
(* [eval_efun oc st f fname vargs] évalue la fonction [f] (dont le nom est
[fname]) en partant de l'état [st], avec les arguments [vargs].
This returns a pair (ret, st') with the same meaning as for [eval_einstr].
*)
Cette fonction renvoie un couple (ret, st') avec la même signification que
pour [eval_einstr]. *)
let eval_efun oc (st: int state) ({ funargs; funbody}: efun)
(fname: string) (vargs: int list)
: (int option * int state) res =
(* A function's environment (mapping from local variables to values) is local
and a function call should not modify the caller's variables. Hence, we
save the caller's environment in [env_save], call the function in a clean
environment with only its arguments set, and restore the caller's
environment. *)
(* L'environnement d'une fonction (mapping des variables locales vers leurs
valeurs) est local et un appel de fonction ne devrait pas modifier les
variables de l'appelant. Donc, on sauvegarde l'environnement de l'appelant
dans [env_save], on appelle la fonction dans un environnement propre (Avec
seulement ses arguments), puis on restore l'environnement de l'appelant. *)
let env_save = Hashtbl.copy st.env in
let env = Hashtbl.create 17 in
match List.iter2 (fun a v -> Hashtbl.replace env a v) funargs vargs with
......@@ -66,26 +62,28 @@ let eval_efun oc (st: int state) ({ funargs; funbody}: efun)
fname (List.length vargs) (List.length funargs)
)
(* [eval_eprog oc ep memsize params] evaluates a complete program [ep], with
(* [eval_eprog oc ep memsize params] évalue un programme complet [ep], avec les
arguments [params].
The [memsize] parameter gives the size of the memory this program will be run
with. This is not useful for now (our programs do not use memory), but it
will when we add memory allocation to our programs.
Le paramètre [memsize] donne la taille de la mémoire dont ce programme va
disposer. Ce n'est pas utile tout de suite (nos programmes n'utilisent pas de
mémoire), mais ça le sera lorsqu'on ajoutera de l'allocation dynamique dans
nos programmes.
Renvoie:
- [OK (Some v)] lorsque l'évaluation de la fonction a lieu sans problèmes et renvoie une valeur [v].
- [OK None] lorsque l'évaluation de la fonction termine sans renvoyer de valeur.
Returns:
- [OK (Some v)] when the function evaluation went without problems and
resulted in integer value [v].
- [OK None] when the function evaluation finished without returning a value.
- [Error msg] when an error has occured.
*)
- [Error msg] lorsqu'une erreur survient.
*)
let eval_eprog oc (ep: eprog) (memsize: int) (params: int list)
: int option res =
let st = init_state memsize in
find_function ep "main" >>= fun f ->
(* trim the parameter list to only take as many as required by the function.
*)
(* ne garde que le nombre nécessaire de paramètres pour la fonction "main". *)
let n = List.length f.funargs in
let params = take n params in
eval_efun oc st f "main" params >>= fun (v, st) ->
eval_efun oc st f "main" params >>= fun (v, _) ->
OK v
{
open Symbols
exception SyntaxError of string
exception Eof
}
let digit = ['0'-'9']
let letter = ['a'-'z' 'A'-'Z']
let id = letter (digit|letter|'_')*
rule token = parse
| [' ' '\t' '\r'] { token lexbuf }
| '\n' { Lexing.new_line lexbuf; token lexbuf }
| ("0x" ['0'-'9''a'-'f''A'-'F']+) as i { SYM_INTEGER (int_of_string i) }
| ['0'-'9']+ as i { SYM_INTEGER (int_of_string i) }
| '+' { SYM_PLUS }
| "->" { SYM_ARROW }
| "&&" { SYM_BOOL_AND }
| "||" { SYM_BOOL_OR }
| "!" { SYM_BOOL_NOT }
| '&' { SYM_BITWISE_AND }
| '~' { SYM_BIT_NOT }
| '-' { SYM_MINUS }
| '*' { SYM_ASTERISK }
| "//" { single_line_comment lexbuf }
| "/*" { multi_line_comment lexbuf }
| '/' { SYM_DIV }
| '.' { SYM_POINT }
| "void" { SYM_VOID }
| "char" { SYM_CHAR }
| "int" { SYM_INT }
| "print" { SYM_PRINT }
| "struct" { SYM_STRUCT }
| "if" { SYM_IF }
| "else" { SYM_ELSE }
| "alloc" { SYM_ALLOC }
| "==" { SYM_EQUALITY }
| "=" { SYM_ASSIGN }
| "while" { SYM_WHILE }
| "return" { SYM_RETURN }
| id as s { SYM_IDENTIFIER s}
| '{' { SYM_LBRACE }
| '}' { SYM_RBRACE }
| '(' { SYM_LPARENTHESIS }
| ')' { SYM_RPARENTHESIS }
| '[' { SYM_LBRACKET }
| ']' { SYM_RBRACKET }
| ';' { SYM_SEMICOLON }
| ',' { SYM_COMMA }
| ">=" { SYM_GEQ }
| "<=" { SYM_LEQ }
| '>' { SYM_GT }
| '<' { SYM_LT }
| "!=" { SYM_NOTEQ }
| '^' { SYM_XOR }
| '%' { SYM_MOD }
| '\'' { parse_char lexbuf }
| '"' { read_string (Buffer.create 17) lexbuf }
| eof { SYM_EOF }
| _ as x { failwith (Printf.sprintf "unexpected char '%c' at %s \n" x (string_of_position (Lexing.lexeme_start_p lexbuf)))}
and parse_char = parse
| "\\n'" { SYM_CHARACTER '\n' }
| "\\0'" { SYM_CHARACTER (char_of_int 0) }
| "\\r'" { SYM_CHARACTER '\r' }
| "\\t'" { SYM_CHARACTER '\t' }
| _ as c '\'' { SYM_CHARACTER c}
| _ as x { failwith (Printf.sprintf "unexpected char literal '%c'\n" x) }
and read_string buf =
parse
| '"' { SYM_STRING (Buffer.contents buf) }
| '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf }
| '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
| '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf }
| '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf }
| '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf }
| '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf }
| '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf }
| [^ '"' '\\']+
{ Buffer.add_string buf (Lexing.lexeme lexbuf);
read_string buf lexbuf
}
| _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) }
| eof { raise (SyntaxError ("String is not terminated")) }
and single_line_comment = parse
| ['\n' '\r'] { Lexing.new_line lexbuf; token lexbuf }
| _ { single_line_comment lexbuf }
and multi_line_comment = parse
| "*/" { token lexbuf }
| '\n' { Lexing.new_line lexbuf; multi_line_comment lexbuf }
| _ {multi_line_comment lexbuf}
......@@ -2,80 +2,10 @@ open Batteries
open BatList
open Symbols
open Utils
open E_regexp
type 'a set = 'a Set.t
let remove_dups l : 'a list =
List.fold_left (fun acc elt -> if List.mem elt acc then acc else elt::acc) [] l
let rec take n l =
if n = 0 then []
else match l with
| [] -> []
| a::r -> a::take (n-1) r
let char_list_of_string l : char list =
String.to_list l
let string_of_char_list cl =
String.of_list cl
let string_of_char_set s =
string_of_char_list (Set.to_list s)
let string_of_int_list l =
Printf.sprintf "%s" (String.concat "_" (List.map string_of_int l))
let string_of_int_set s =
string_of_int_list (Set.to_list s)
(* Expressions régulières *)
(* Nous modélisons les expressions régulières avec le type suivant.
Une expressions régulière est soit :
- [Eps] qui dénote l'expressions vide.
- [Charset cs] dénote l'expression régulière qui matche l'ensemble
des caractères de l'ensemble [cs].
- [Cat(r1,r2)] dénote la concaténation de [r1] et [r2] : reconnaît les mots
[uv] tels que [u] appartient à [r1] et [v] appartient à [r2].
- [Alt(r1,r2)] dénote un choix entre [r1] et [r2] : reconnaît les mots reconnus
par [r1] et les mots reconnus par [r2].
- [Star r] dénote la répétition 0, 1 ou plusieurs fois de l'expression [r].
*)
type regexp =
| Eps
| Charset of char set
| Cat of regexp * regexp
| Alt of regexp * regexp
| Star of regexp
(* [char_regexp c] reconnaît le caractère [c] uniquement. *)
let char_regexp c = Charset (Set.singleton c)
(* [char_range l] reconnaît l'ensemble des caractères de [l]. *)
let char_range (l: char list) =
Charset (Set.of_list l)
(* [str_regexp s] reconnaît la chaîne de caractère [s]. *)
let str_regexp (s: char list) =
List.fold_right (fun c reg -> Cat(Charset (Set.singleton c), reg)) s Eps
(* [plus r] reconnaît 1 fois ou plus l'expression [r]. *)
let plus r = Cat(r,Star r)
(* Fonction d'affichage. Peut être utile pour déboguer. *)
let rec string_of_regexp r =
match r with
Eps -> "Eps"
| Charset c -> Printf.sprintf "[%s]" (string_of_char_list (Set.to_list c))
| Alt (r1,r2) -> Printf.sprintf "(%s)|(%s)"
(string_of_regexp r1) (string_of_regexp r2)
| Cat (r1,r2) -> Printf.sprintf "(%s)(%s)"
(string_of_regexp r1) (string_of_regexp r2)
| Star r -> Printf.sprintf "(%s)*" (string_of_regexp r)
(* Non-deterministic Finite Automata (NFA) *)
(* Les états d'un NFA [nfa_state] sont des entiers.
......@@ -115,33 +45,40 @@ let empty_nfa =
(* Concaténation de NFAs. *)
let cat_nfa n1 n2 =
(* TODO *)
empty_nfa
(* Alternatives de NFAs *)
let alt_nfa n1 n2 =
(* TODO *)
empty_nfa
(* Répétition de NFAs *)
(* t est de type [string -> token option] *)
let star_nfa n t =
(* TODO *)
empty_nfa
(* [nfa_of_regexp r firststate t] construit un NFA qui reconnaît le même langage
(* [nfa_of_regexp r freshstate t] construit un NFA qui reconnaît le même langage
que l'expression régulière [r].
Pour ce faire, vous allez devoir générer des noms d'états pour vos automates.
Le paramètre [firststate]
[freshstate] correspond à un entier pour lequel il n'y a pas encore d'état dans
le nfa. Il suffit d'incrémenter [freshstate] pour obtenir de nouveaux états non utilisés.
[t] est une fonction du type [string -> token option] utile pour les états finaux.
*)
let rec nfa_of_regexp r firststate t =
let rec nfa_of_regexp r freshstate t =
match r with
| Eps -> { nfa_states = [firststate];
nfa_initial = [firststate];
nfa_final = [(firststate,t)];
nfa_step = fun q -> []}, firststate + 1
| Charset c -> { nfa_states = [firststate; firststate + 1];
nfa_initial = [firststate];
nfa_final = [firststate + 1, t];
nfa_step = fun q -> if q = firststate then [(Some c, firststate + 1)] else []
}, firststate + 2
| _ -> empty_nfa, firststate
| Eps -> { nfa_states = [freshstate];
nfa_initial = [freshstate];
nfa_final = [(freshstate,t)];
nfa_step = fun q -> []}, freshstate + 1
| Charset c -> { nfa_states = [freshstate; freshstate + 1];
nfa_initial = [freshstate];
nfa_final = [freshstate + 1, t];
nfa_step = fun q -> if q = freshstate then [(Some c, freshstate + 1)] else []
}, freshstate + 2
(* TODO *)
| _ -> empty_nfa, freshstate
(* Deterministic Finite Automaton (DFA) *)
......@@ -182,6 +119,7 @@ let epsilon_closure (n: nfa) (s: nfa_state) : nfa_state set =
(* La fonction [traversal visited s] effectue un parcours de l'automate en
partant de l'état [s], et en suivant uniquement les epsilon-transitions. *)
let rec traversal (visited: nfa_state set) (s: nfa_state) : nfa_state set =
(* TODO *)
visited
in
traversal Set.empty s
......@@ -189,10 +127,12 @@ let epsilon_closure (n: nfa) (s: nfa_state) : nfa_state set =
(* [epsilon_closure_set n ls] calcule l'union des epsilon-fermeture de chacun
des états du NFA [n] dans l'ensemble [ls]. *)
let epsilon_closure_set (n: nfa) (ls: nfa_state set) : nfa_state set =
(* TODO *)
ls
(* [dfa_initial_state n] calcule l'état initial de l'automate déterminisé. *)
let dfa_initial_state (n: nfa) : dfa_state =
(* TODO *)
Set.empty
(* Construction de la table de transitions de l'automate DFA. *)
......@@ -212,7 +152,7 @@ let dfa_initial_state (n: nfa) : dfa_state =
[(c1,q1);(c1,q2);...;(c1,qn);(c2,q'1);...(c2,q'm)] ->
[(c1,{q1,q2,...,qn});(c2,{q'1,...,q'm})] : [assoc_merge_vals]
- on applique la epsilon-fermeture sur tous les états:
[(c1,q1);...;(cn,qn)] -> [(c1, eps(q1)); ...; (cn, eps(qn))] :
[(c1,{q1,q2,...,qn});...;(cn,{qn}])] -> [(c1, eps({q1,q2,...,qn})); ...; (cn, eps({qn}))] :
[epsilon_closure_set]
On obtient alors l'ensemble des transitions depuis l'état [q] dans
......@@ -228,6 +168,11 @@ let assoc_throw_none (l : ('a option * 'b) list) : ('a * 'b) list =
| Some x -> Some (x,n)
) l
let assoc_distribute_key (l : ('a set * 'b) list) : ('a * 'b) list =
List.fold_left (fun (acc : ('a * 'b) list) (k, v) ->
Set.fold (fun c acc -> (c, v)::acc) k acc)
[] l
let assoc_merge_vals (l : ('a * 'b) list) : ('a * 'b set) list =
List.fold_left (fun (acc : ('a * 'b set) list) (k, v) ->
match List.assoc_opt k acc with
......@@ -235,19 +180,16 @@ let assoc_merge_vals (l : ('a * 'b) list) : ('a * 'b set) list =
| Some vl -> (k, Set.add v vl)::List.remove_assoc k acc
) [] l
let assoc_distribute_key (l : ('a set * 'b) list) : ('a * 'b) list =
List.fold_left (fun (acc : ('a * 'b) list) (k, v) ->
Set.fold (fun c acc -> (c, v)::acc) k acc)
[] l
let rec build_dfa_table (table: (dfa_state, (char * dfa_state) list) Hashtbl.t)
(n: nfa)
(ds: dfa_state) : unit =
match Hashtbl.find_option table ds with
| Some _ -> ()
| None ->
(* [transitions] contient les transitions du DFA construites
* à partir des transitions du NFA comme décrit auparavant *)
let transitions : (char * dfa_state) list =
(* TODO *)
[]
in
Hashtbl.replace table ds transitions;
......@@ -283,12 +225,14 @@ let priority t =
(* [min_priority l] renvoie le token de [l] qui a la plus petite priorité, ou
[None] si la liste [l] est vide. *)
let min_priority (l: token list) : token option =
(* TODO *)
None
(* [dfa_final_states n dfa_states] renvoie la liste des états finaux du DFA,
accompagnés du token qu'ils reconnaissent. *)
let dfa_final_states (n: nfa) (dfa_states: dfa_state list) :
(dfa_state * (string -> token option)) list =
(* TODO *)
[]
(* Construction de la relation de transition du DFA. *)
......@@ -297,6 +241,7 @@ let dfa_final_states (n: nfa) (dfa_states: dfa_state list) :
est la table générée par [build_dfa_table], définie ci-dessus. *)
let make_dfa_step (table: (dfa_state, (char * dfa_state) list) Hashtbl.t) =
fun (q: dfa_state) (a: char) ->
(* TODO *)
None
(* Finalement, on assemble tous ces morceaux pour construire l'automate. La
......@@ -387,6 +332,39 @@ let rec tokenize_all (d: dfa) (w: char list) : (token list * char list) =
(* Fonctions d'affichage - Utile pour déboguer *)
let char_list_to_char_ranges s =
let rec recognize_range (cl: int list) l opt_c n =
match cl with
| [] -> (match opt_c with
None -> l
| Some c -> (c,n)::l
)
| c::r -> (match opt_c with
| None -> recognize_range r l (Some c) 0
| Some c' ->
if c' + n + 1 = c
then recognize_range r l (Some c') (n + 1)
else recognize_range r ((c',n)::l) (Some c) 0
)
in
let l = recognize_range (List.sort Stdlib.compare (List.map Char.code s)) [] None 0 in
let escape_char c =
if c = '"' then "\\\""
else if c = '\\' then "\\\\"
else if c = '\x00' then "\\\\0"
else if c = '\t' then "\\\\t"
else if c = '\n' then "\\\\n"
else Printf.sprintf "%c" c in
List.fold_left (fun acc (c,n) ->
match n with
| 0 -> Printf.sprintf "%s%s" (escape_char (Char.chr c)) acc
| 1 -> Printf.sprintf "%s%s%s" (escape_char (Char.chr c)) (c + 1 |> Char.chr |> escape_char) acc
| _ -> Printf.sprintf "%s-%s%s" (escape_char (Char.chr c))
(escape_char (Char.chr (c + n))) acc
) "" l
(* Affichage d'un NFA *)
let nfa_to_string (n : nfa) : string =
Printf.sprintf "===== NFA\nStates : %s\nInitial states : %s\nFinal states : %s\n%s"
......@@ -403,6 +381,24 @@ let nfa_to_string (n : nfa) : string =
) l)
) n.nfa_states))
let nfa_to_dot oc (n : nfa) : unit =
Printf.fprintf oc "digraph {\n";
List.iter (fun n -> Printf.fprintf oc "N%d [shape=\"house\" color=\"red\"]\n" n) (n.nfa_initial);
List.iter (fun (q,t) ->
Printf.fprintf oc "N%d [shape=\"rectangle\", label=\"%s\"]\n"
q (match t "0" with | Some s -> string_of_symbol s | None -> "" )) n.nfa_final;
List.iter (fun q ->
List.iter (fun (cso, q') ->
match cso with
| None ->
Printf.fprintf oc "N%d -> N%d [label=\"[epsilon]\"]\n" q q'
| Some cs ->
Printf.fprintf oc "N%d -> N%d [label=\"[%s]\"]\n" q q' (char_list_to_char_ranges (Set.to_list cs))
) (n.nfa_step q);
) n.nfa_states;
Printf.fprintf oc "}\n"
(* Affichage d'un DFA *)
let dfa_to_string (n : dfa) (alphabet: char list): string =
Printf.sprintf "===== DFA\nStates : %s\nInitial state : %s\nFinal states : [%s]\n%s"
......@@ -428,28 +424,6 @@ let dfa_to_string (n : dfa) (alphabet: char list): string =
bien en copiant le code DOT dans un convertisseur en ligne (par exemple :
http://proto.informatics.jax.org/prototypes/dot2svg/). *)
let char_list_to_char_ranges s =
let rec recognize_range (cl: int list) l opt_c n =
match cl with
| [] -> (match opt_c with
None -> l
| Some c -> l @ [(c,n)]
)
| c::r -> (match opt_c with
| None -> recognize_range r l (Some c) 0
| Some c' ->
if c' + n + 1 = c
then recognize_range r l (Some c') (n + 1)
else recognize_range r ((c',n)::l) (Some c) 0
)
in
let l = recognize_range (List.sort Stdlib.compare (List.map Char.code s)) [] None 0 in
List.fold_left (fun acc (c,n) ->
if n = 0
then Printf.sprintf "%c%s" (Char.chr c) acc
else Printf.sprintf "%c-%c%s" (Char.chr c) (Char.chr (c + n)) acc
) "" l
let dfa_to_dot oc (n : dfa) (cl: char list): unit =
Printf.fprintf oc "digraph {\n";
Printf.fprintf oc "N%s [shape=\"house\" color=\"red\"]\n" (string_of_int_set n.dfa_initial);
......@@ -473,99 +447,6 @@ let dfa_to_dot oc (n : dfa) (cl: char list): unit =
) n.dfa_states;
Printf.fprintf oc "}\n"
let alts l =
match l with
[] -> Eps
| a::r -> List.fold_left (fun acc r -> Alt(acc,r)) a l
let list_regexp =
let lowercase_letters = "abcdefghijklmnopqrstuvwxyz" in
let uppercase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" in
let digits = "0123456789" in
let other_characters = "?!=<>_ ;,{}()[]-+*/%\n\t" in
let alphabet = char_list_of_string (lowercase_letters ^ uppercase_letters ^ digits ^ other_characters) in
let letter_regexp = char_range (char_list_of_string (uppercase_letters ^ lowercase_letters)) in
let digit_regexp = char_range (char_list_of_string digits) in
let keyword_regexp s = str_regexp (char_list_of_string s) in
[
(keyword_regexp "while", fun s -> Some (SYM_WHILE));
(keyword_regexp "int", fun s -> Some (SYM_INT));
(keyword_regexp "void", fun s -> Some (SYM_VOID));
(keyword_regexp "char", fun s -> Some (SYM_CHAR));
(keyword_regexp "if", fun s -> Some (SYM_IF));
(keyword_regexp "else", fun s -> Some (SYM_ELSE));
(keyword_regexp "return", fun s -> Some (SYM_RETURN));
(keyword_regexp "print", fun s -> Some (SYM_PRINT));
(keyword_regexp "struct", fun s -> Some (SYM_STRUCT));
(keyword_regexp ".", fun s -> Some (SYM_POINT));
(keyword_regexp "+", fun s -> Some (SYM_PLUS));
(keyword_regexp "-", fun s -> Some (SYM_MINUS));
(keyword_regexp "*", fun s -> Some (SYM_ASTERISK));
(keyword_regexp "/", fun s -> Some (SYM_DIV));
(keyword_regexp "%", fun s -> Some (SYM_MOD));
(keyword_regexp "{", fun s -> Some (SYM_LBRACE));
(keyword_regexp "}", fun s -> Some (SYM_RBRACE));
(keyword_regexp "[", fun s -> Some (SYM_LBRACKET));
(keyword_regexp "]", fun s -> Some (SYM_RBRACKET));
(keyword_regexp "(", fun s -> Some (SYM_LPARENTHESIS));
(keyword_regexp ")", fun s -> Some (SYM_RPARENTHESIS));
(keyword_regexp ";", fun s -> Some (SYM_SEMICOLON));
(keyword_regexp ",", fun s -> Some (SYM_COMMA));
(keyword_regexp "=", fun s -> Some (SYM_ASSIGN));
(keyword_regexp "==", fun s -> Some (SYM_EQUALITY));
(keyword_regexp "!=", fun s -> Some (SYM_NOTEQ));
(keyword_regexp "<", fun s -> Some (SYM_LT));
(keyword_regexp ">", fun s -> Some (SYM_GT));
(keyword_regexp "<=", fun s -> Some (SYM_LEQ));
(keyword_regexp ">=", fun s -> Some (SYM_GEQ));
(Cat(letter_regexp,
Star(Alt(letter_regexp,
Alt(digit_regexp,
Charset (Set.singleton '_'))))),
fun s -> Some (SYM_IDENTIFIER s));
(Cat(keyword_regexp "//",
Cat(Star (char_range (List.filter (fun c -> c <> '\n') alphabet)),
Alt (char_regexp '\n', Eps))),
fun s -> None);
(Cat(keyword_regexp "/*",
Cat(
Star (Alt (char_range (List.filter (fun c -> c <> '*') alphabet),
Cat (char_regexp '*',
char_range (List.filter (fun c -> c <> '/') alphabet)))),
keyword_regexp "*/")),
fun s -> None);
(Cat (char_regexp '\'',
Cat (char_range (List.filter (fun c -> c <> '\'' && c <> '\\') alphabet),
char_regexp '\'')),
fun s -> Some (SYM_CHARACTER (String.get s 1)));
(Cat (char_regexp '\'', Cat (char_regexp '\\',
Cat (char_range (char_list_of_string "\\tn0"),
char_regexp '\''))),
fun s -> match String.get s 2 with
| '\\' -> Some (SYM_CHARACTER '\\')
| 'n' -> Some (SYM_CHARACTER '\n')
| 't' -> Some (SYM_CHARACTER '\t')
| '0' -> Some (SYM_CHARACTER '\x00')
| _ -> None
);
(Cat (char_regexp '"',
Cat (Star (
Alt (
char_range (List.filter (fun c -> c <> '"' && c <> '\\') alphabet),
Cat (char_regexp '\\', char_range (char_list_of_string "tn0\\\""))
)
),
char_regexp '"')),
fun s -> Some (SYM_STRING (Stdlib.Scanf.unescaped (String.slice ~first:1 ~last:(-1) s))));
(char_regexp ' ', fun s -> None);
(char_regexp '\n', fun s -> None);
(char_regexp '\t', fun s -> None);
(plus digit_regexp, fun s -> Some (SYM_INTEGER (int_of_string s)));
(Eps, fun s -> Some (SYM_EOF))
]
let nfa_of_list_regexp l =
let (n, fs) = List.fold_left (fun (nfa, fs) (r,t) ->
let n,fs = nfa_of_regexp r fs t in
......
......@@ -5,6 +5,10 @@ open Batteries
open BatList
open Prog
open Utils
open Report
open Linear_print
open Report
open Options
let dse_instr (ins: rtl_instr) live =
[ins]
......@@ -19,6 +23,9 @@ let dse_fun live {linearfunargs; linearfunbody; linearfuninfo; } =
let dse_prog p live =
if !Options.no_linear_dse
then p
else
List.map (fun (fname,gdef) ->
match gdef with
Gfun f ->
......@@ -26,3 +33,12 @@ let dse_prog p live =
let f = dse_fun live f in
(fname, Gfun f)
) p
let pass_linear_dse linear lives =
let linear = dse_prog linear lives in
record_compile_result "DSE";
dump (!linear_dump >*> fun s -> s ^ "1")
(fun oc -> dump_linear_prog oc (Some lives)) linear
(fun file () -> add_to_report "linear-after-dse" "Linear after DSE"
(Code (file_contents file)));
OK linear
......@@ -3,7 +3,10 @@ open Rtl
open Linear
open Prog
open Utils
open Report
open Linear_print
open Options
open Linear_liveness
let succs_of_rtl_instr (i: rtl_instr) =
match i with
......@@ -38,6 +41,7 @@ let linear_of_rtl_fun
({ rtlfunargs; rtlfunbody; rtlfunentry; rtlfuninfo }: rtl_fun) =
let block_order = sort_blocks rtlfunbody rtlfunentry in
let linearinstrs =
Rjmp rtlfunentry ::
List.fold_left (fun l n ->
match Hashtbl.find_option rtlfunbody n with
| None -> l
......@@ -54,3 +58,10 @@ let linear_of_rtl_gdef = function
let linear_of_rtl r =
assoc_map linear_of_rtl_gdef r
let pass_linearize rtl =
let linear = linear_of_rtl rtl in
let lives = liveness_linear_prog linear in
dump !linear_dump (fun oc -> dump_linear_prog oc (Some lives)) linear
(fun file () -> add_to_report "linear" "Linear" (Code (file_contents file)));
OK (linear, lives)
......@@ -5,11 +5,75 @@ open Utils
open Linear
open Rtl
let gen_live (i: rtl_instr) =
match i with
| Rbinop (b, rd, rs1, rs2) -> Set.of_list [rs1; rs2]
| Rprint rs
| Runop (_, _, rs) -> Set.singleton rs
| Rconst (_, _) -> Set.empty
| Rbranch (_, rs1, rs2, _) -> Set.of_list [rs1; rs2]
| Rjmp _ -> Set.empty
| Rmov (_, rs) -> Set.singleton rs
| Rret r -> Set.singleton r
| Rlabel _ -> Set.empty
let kill_live (i: rtl_instr) =
match i with
| Rbinop (_, rd, _, _)
| Runop (_, rd,_)
| Rconst (rd, _)
| Rmov (rd,_) -> Set.singleton rd
| Rbranch (_, _, _, _)
| Rprint _
| Rret _
| Rjmp _
| Rlabel _ -> Set.empty
let liveness_instrs linearfunbody =
(Hashtbl.create 0, Hashtbl.create 0)
let linear_succs (ins: rtl_instr) i labels =
match ins with
| Rbranch(_, _, _, s1) -> [Hashtbl.find_default labels s1 0; i+1]
| Rjmp s -> [Hashtbl.find_default labels s 0]
| Rret r -> []
| _ -> [i+1]
let setup_labels insl =
let labels = Hashtbl.create 17 in
List.iteri (fun i ins ->
match ins with
| Rlabel l -> Hashtbl.replace labels l i
| _ -> ()
) insl;
labels
let add_changes h k v =
let orig = Hashtbl.find_default h k Set.empty in
Hashtbl.replace h k v;
not (Set.equal v orig)
let iter_liveness insl live_in live_out labels =
List.fold_lefti (fun changed i ins ->
let gl = gen_live ins in
let kl = kill_live ins in
let oi = Hashtbl.find_default live_out i Set.empty in
let newin = Set.union gl (Set.diff oi kl) in
let changed = add_changes live_in i newin || changed in
let succs = linear_succs ins i labels in
let j = List.fold_left (fun j succ ->
Set.union j (Hashtbl.find_default live_in succ Set.empty)
) Set.empty succs in
add_changes live_out i j || changed
) false insl
let liveness_instrs insns =
let live_in = Hashtbl.create 17 in
let live_out = Hashtbl.create 17 in
let labels = setup_labels insns in
let rec aux () =
if iter_liveness insns live_in live_out labels
then aux ()
else (live_in, live_out) in
aux ()
let liveness_linear_prog lp =
let lives = Hashtbl.create 17 in
......
......@@ -53,10 +53,10 @@ let reg_ret = reg_a0
type ltl_instr =
LAddi of ltl_reg * ltl_reg * int
| LSubi of ltl_reg * ltl_reg * int
| LStore of ltl_reg * int * ltl_reg * int (* LStore(rd, rofs, rs, sz) : store
| LStore of ltl_reg * int * ltl_reg * mem_access_size (* LStore(rd, rofs, rs, sz) : store
value in [rs] on [sz] bytes at
address [rd+rofs] *)
| LLoad of ltl_reg * ltl_reg * int * int (* LLoad(rd, rs, rofs, sz) : load
| LLoad of ltl_reg * ltl_reg * int * mem_access_size (* LLoad(rd, rs, rofs, sz) : load
value at address [rs+rofs] on [sz]
bytes in register [rd]. *)
| LMov of ltl_reg * ltl_reg (* LMov(rd, rs) : move value of [rs] into [rd].
......
......@@ -78,9 +78,9 @@ let rec json_summary j =
| `List l -> `List (List.map json_summary l)
| _ -> j
let trace_regs st =
Hashtbl.fold (fun r v acc ->
Array.fold_lefti (fun acc r v ->
(string_of_reg r, `Int v) :: acc
) st.regs []
) [] st.regs
|> fun l -> `Assoc l
let make_trace ip (st: ltl_state) out () =
......@@ -177,10 +177,10 @@ let debugger_message progname breaks state st prog rstop client : unit Lwt.t =
) funinfo))
]) :: acc
) !st.funs [] |> fun funboundaries ->
Hashtbl.fold (fun ip ins acc ->
Array.fold_lefti (fun acc ip ins ->
(Format.fprintf Format.str_formatter "%a" dump_ltl_instr ins);
(string_of_int ip, `String (Format.flush_str_formatter ())) :: acc
) !st.code [] |> fun code ->
) [] !st.code |> fun code ->
`List ( [`Assoc [("progname", `String progname);
("params", `List (List.map (fun x -> `Int x) params))];
`Assoc [("funboundaries", `List funboundaries)];
......@@ -214,11 +214,11 @@ let debug_ltl_prog progname lp memsize params : unit=
let st = ref (init_state memsize lp params) in
let state = ref (None) in
let breaks = ref [] in
let ctx = Conduit_lwt_unix.default_ctx in
let ctx = Lazy.force Conduit_lwt_unix.default_ctx in
let uri = "http://localhost:8080" in
let (pstop, rstop) = Lwt.task () in
let server () =
Resolver_lwt.resolve_uri (Uri.of_string uri) Resolver_lwt_unix.system >>= fun endp ->
Resolver_lwt.resolve_uri ~uri:(Uri.of_string uri) Resolver_lwt_unix.system >>= fun endp ->
Conduit_lwt_unix.endp_to_server ~ctx endp >>= fun server ->
Websocket_lwt_unix.establish_server
~ctx ~mode:server
......
......@@ -7,6 +7,8 @@ open Prog
open Utils
open Regalloc
open Linear_liveness
open Report
open Options
(* list of registers used to store arguments. [a0-a7] *)
let arg_registers =
......@@ -18,12 +20,12 @@ let arg_registers =
*)
let make_push r =
[LSubi(reg_sp, reg_sp, !Archi.wordsize);
LStore(reg_sp, 0, r, !Archi.wordsize)]
[LSubi(reg_sp, reg_sp, (Archi.wordsize ()));
LStore(reg_sp, 0, r, (archi_mas ()))]
let make_pop r =
[LLoad(r, reg_sp, 0, !Archi.wordsize);
LAddi(reg_sp, reg_sp, !Archi.wordsize)]
[LLoad(r, reg_sp, 0, (archi_mas ()));
LAddi(reg_sp, reg_sp, (Archi.wordsize ()))]
let make_sp_sub v =
[LSubi(reg_sp, reg_sp, v)]
......@@ -31,37 +33,395 @@ let make_sp_sub v =
let make_sp_add v =
[LAddi(reg_sp, reg_sp, v)]
(* Moving between locations. [src] and [dst] are locations. [make_loc_mov src
dst] generates instructions so that the value in [src] ends up in [dst],
where [src] and [dst] can be registers [Reg r] or stack offsets [Stk o].
*)
let make_loc_mov src dst =
match src, dst with
| Stk osrc , Stk odst ->
let rtmp = reg_tmp1 in
[LLoad(rtmp, reg_fp, (Archi.wordsize ()) * osrc, (archi_mas ()));
LStore(reg_fp, (Archi.wordsize ()) * odst, rtmp, (archi_mas ()))]
| Stk osrc, Reg rdst ->
[LLoad(rdst, reg_fp, (Archi.wordsize ()) * osrc, (archi_mas ()))]
| Reg rsrc, Stk ofst ->
[LStore(reg_fp, (Archi.wordsize ()) * ofst, rsrc, (archi_mas ()))]
| Reg rsrc, Reg rdst ->
[LMov(rdst,rsrc)]
(* load_loc tmp allocation r = (l, r'). Loads the equivalent of RTL register r
in a LTL register r'. tmpis used if necessary. *)
let load_loc tmp allocation r =
match Hashtbl.find_option allocation r with
| None ->
Error (Format.sprintf "Unable to allocate RTL register r%d." r)
| Some (Stk o) -> OK ([LLoad(tmp, reg_fp, (Archi.wordsize ()) * o, (archi_mas ()))], tmp)
| Some (Reg r) -> OK ([], r)
(* store_loc tmp allocation r = (l, r'). I want to write in RTL register r.
Tells me that I just have to write to LTL register r' and execute l. *)
let store_loc tmp allocation r =
match Hashtbl.find_option allocation r with
| None ->
Error (Format.sprintf "Unable to allocate RTL register r%d." r)
| Some (Stk o) -> OK ([LStore(reg_fp, (Archi.wordsize ()) * o, tmp, (archi_mas ()))], tmp)
| Some (Reg r) -> OK ([], r)
let ltl_prog_of_linear_with_alloc_order alloc_order lp =
let prog = List.map (function
(fname, Gfun f) ->
(fname, Gfun {
ltlfunargs = 0;
ltlfunbody = [];
ltlfuninfo = [];
ltlregalloc = [];
}
)) lp in
prog
(* saves registers in [to_save] on the stack at offsets [fp + 8 * o, fp + 8 * (o
- 1), fp + 8 * (o - 2)...]. Returns:
- an association list [(reg,ofs)] (meaning register reg is saved at [fp+ofs])
- the list of store instructions - the next offset to be written. *)
let save_caller_save to_save ofs =
List.fold_left (fun (instrs, arg_saved, ofs) reg ->
(instrs @ [LStore(reg_fp, (Archi.wordsize ()) * ofs, reg, (archi_mas ()))],
(reg,ofs)::arg_saved, ofs - 1)
) ([], [], ofs) to_save
(* Given a list [(reg,ofs)], loads [fp+ofs] into [reg]. *)
let restore_caller_save arg_saved =
List.map
(fun (reg, ofs) -> LLoad(reg, reg_fp, (Archi.wordsize ()) * ofs, (archi_mas ())))
arg_saved
let alloc_order_st = [
reg_s1; reg_s2; reg_s3; reg_s4; reg_s5;
reg_s6; reg_s7; reg_s8; reg_s9; reg_s10; reg_s11;
reg_t2; reg_t3; reg_t4; reg_t5; reg_t6;
]
let num_parameters_passed_on_stack regs =
let r = List.length regs - number_of_arguments_passed_in_registers in
Stdlib.max 0 r
let alloc_order_ts = [
reg_t2; reg_t3; reg_t4; reg_t5; reg_t6;
(* Given a list or RTL registers [rargs], we want to load their values in LTL
argument registers a0-7. But while writing these registers, we may overwrite
the value of some registers before we actually read them.
For example if [r1 -> a1] and [r2 -> a0], and we want to load [r1] in [a0]
and [r2] in [a1] (because a function call f(r1,r2) occurs), the following
would happen :
mv a0, a1
mv a1, a0
But the value in [a1] will not be the value that was originally in RTL reg
[r2].
Hence, we keep track of the registers like [a1] that are going to be written
before being read, and those will be saved on the stack.
*)
let overwritten_args rargs allocation =
(* [ltl_args] contains the locations of RTL args after allocation. *)
list_map_res (fun r -> match Hashtbl.find_option allocation r with
| None -> Error (Format.sprintf
"overwritten_args: Couldn't allocate register r%d."
r)
| Some loc -> OK loc
) rargs >>= fun ltl_args ->
let (overwritten, read_overwritten) =
List.fold_lefti (fun (overwritten, read_overwritten) i (src: loc) ->
(* [overwritten] contains the list of registers that have been written
to.
[read_overwritten] contains the list of registers that have been read
after being written to. *)
let read_overwritten =
match src with
| Reg rs -> if Set.mem rs overwritten
then Set.add rs read_overwritten
else read_overwritten
| Stk _ -> read_overwritten
in
let overwritten =
if i < number_of_arguments_passed_in_registers
then Set.add (starting_arg_register + i) overwritten
else overwritten
in (overwritten, read_overwritten)
) (Set.empty,Set.empty) ltl_args in
OK read_overwritten
(* [pass_parameters rargs allocation arg_saved ofs] generates code to pass
parameters in RTL registers rargs. [allocation] maps RTL registers to LTL
locations, [arg_saved] contains saved registers, and [ofs] says where,
relative to reg_fp we may save more registers if needed. *)
let pass_parameters rargs allocation arg_saved =
(* LTL locations corresponding to RTL arguments. *)
list_map_res (fun r -> match Hashtbl.find_option allocation r with
| None ->
Error (Format.sprintf
"pass_parameters: Couldn't allocate register r%d." r)
| Some loc -> OK loc
) rargs >>= fun ltl_args ->
(* Relocation of arguments may be necessary if, e.g. a1 must be passed as
first argument (in a0) and a0 must be passed as second argument (in a1). In
that situation, a temporary must be used. These registers (a0 and a1) would
have been saved before on the stack and the relocation information is
available in arg_saved. *)
let reloc_loc overwritten loc =
match loc with
| Stk o -> OK loc
| Reg r -> if List.mem r overwritten
then match List.assoc_opt r arg_saved with
| None -> Error (Format.sprintf "Register %s has been overwritten, \
yet it has not been saved."
(print_reg r))
| Some newloc -> OK (Stk newloc)
else OK loc
in
(* Iterates over the list of LTL arguments. Generates 4 things:
- [overwritten] is the set of registers that are overwritten during
parameter passing. If a register has been overwritten, then we use its copy
on the stack; otherwise we can use it directly.
- [instrs] is a list of instructions for the arguments passed in registers.
- [pushes] is a list of push pseudo-instructions for every additional
argument. The two lists are built separately so that we can build [pushes]
backwards so that e.g. the 9th argument is at the top of the stack at the
end, and e.g. the 15th at higher addresses.
- [npush] is the number of arguments that were pushed to the stack. *)
List.fold_lefti (fun acc i (src: loc) ->
acc >>= fun (overwritten, instrs, pushes, npush) ->
reloc_loc overwritten src >>= fun src ->
let (overwritten, l,pushes, npush) =
if i < number_of_arguments_passed_in_registers
then let rd = starting_arg_register + i in
begin match src with
| Reg rs -> (rd::overwritten, [LMov(rd, rs)],[], npush)
| Stk o -> (rd::overwritten,
[LLoad(rd, reg_fp, (Archi.wordsize ()) * o, (archi_mas ()))],
[], npush)
end else
begin match src with
| Reg rs -> (overwritten, [], make_push rs@pushes, npush+1)
| Stk o -> (overwritten, [],
LLoad(reg_tmp1, reg_fp, (Archi.wordsize ()) * o, (archi_mas ()))
::make_push reg_tmp1 @ pushes,
npush+1)
end
in
OK (overwritten, instrs@l, pushes, npush)
) (OK ([], [], [], 0)) ltl_args >>=
fun (overwritten, instrs, pushes, npush) ->
OK (instrs@pushes, npush)
let written_rtl_regs_instr (i: rtl_instr) =
match i with
| Rbinop (_, rd, _, _)
| Runop (_, rd, _)
| Rconst (rd, _)
| Rmov (rd, _) -> Set.singleton rd
| Rprint _
| Rret _
| Rlabel _
| Rbranch (_, _, _, _)
| Rjmp _ -> Set.empty
let read_rtl_regs_instr (i: rtl_instr) =
match i with
| Rbinop (_, _, rs1, rs2)
| Rbranch (_, rs1, rs2, _) -> Set.of_list [rs1; rs2]
| Rprint rs
| Runop (_, _, rs)
| Rmov (_, rs)
| Rret rs -> Set.singleton rs
| Rlabel _
| Rconst (_, _)
| Rjmp _ -> Set.empty
let read_rtl_regs (l: rtl_instr list) =
List.fold_left (fun acc i -> Set.union acc (read_rtl_regs_instr i))
Set.empty l
let written_rtl_regs (l: rtl_instr list) =
List.fold_left (fun acc i -> Set.union acc (written_rtl_regs_instr i))
Set.empty l
let rtl_to_ltl_registers allocation l =
Set.filter_map (fun rtlreg ->
match Hashtbl.find_option allocation rtlreg with
| Some (Stk ofs) -> None
| None -> None
| Some (Reg r) -> Some r) l
(* Take the RTL registers used by RTL instructions in [l], apply allocation to
them. This gives a list of machine registers used by the LTL function. We
need to add also the registers that will be used for argument passing. *)
let written_ltl_regs fname l allocation =
written_rtl_regs l |> rtl_to_ltl_registers allocation
let caller_save live_out allocation rargs =
let live_after = live_out in
let live_after_ltl = live_after |> rtl_to_ltl_registers allocation in
overwritten_args rargs allocation >>= fun overwritten_args_tosave ->
let l = Set.union live_after_ltl overwritten_args_tosave in
OK (Set.intersect l (Set.of_list (arg_registers @ reg_tmp)))
(* This generates LTL instructions for a given Linear/RTL instruction. In most
cases, the transformation amounts to 'loading' RTL registers in LTL locations
and emitting the straightforward corresponding LTL instructions. This uses
load_loc and store_loc, described above, a lot. The most interesting case is
call instructions. Indeed, in that case, we emit code for saving and
restoring caller-save registers before and after the call, respectively. The
registers to be saved are computed as the set of Risc-V registers marked as
caller-save (a0-a7,t0-t6) intersected with the registers that are read in the
code of the caller. (The rationale being, if we don't read this variable,
then we don't need its value to be preserved across function calls.) These
registers are saved at [fp - 8 * (curstackslot + 1)] *)
let ltl_instrs_of_linear_instr fname live_out allocation
numspilled epilogue_label ins =
let res =
match ins with
| Rbinop (b, rd, rs1, rs2) ->
load_loc reg_tmp1 allocation rs1 >>= fun (l1, r1) ->
load_loc reg_tmp2 allocation rs2 >>= fun (l2, r2) ->
store_loc reg_tmp1 allocation rd >>= fun (ld, rd) ->
OK (l1 @ l2 @ LBinop(b, rd, r1, r2) :: ld)
| Runop (u, rd, rs) ->
load_loc reg_tmp1 allocation rs >>= fun (l1,r1) ->
store_loc reg_tmp1 allocation rd >>= fun (ld, rd) ->
OK (l1 @ LUnop(u, rd, r1) :: ld)
| Rconst (rd, i) ->
store_loc reg_tmp1 allocation rd >>= fun (ld, rd) ->
OK (LConst(rd, i)::ld)
| Rbranch (cmp, rs1, rs2, s1) ->
load_loc reg_tmp1 allocation rs1 >>= fun (l1, r1) ->
load_loc reg_tmp2 allocation rs2 >>= fun (l2, r2) ->
OK (l1 @ l2 @ [LBranch(cmp, r1, r2, Format.sprintf "%s_%d" fname s1)])
| Rjmp s -> OK [LJmp (Format.sprintf "%s_%d" fname s)]
| Rmov (rd, rs) ->
load_loc reg_tmp1 allocation rs >>= fun (ls, rs) ->
store_loc reg_tmp1 allocation rd >>= fun (ld, rd) ->
OK (ls @ LMov(rd, rs) :: ld)
| Rprint r ->
let (save_a_regs, arg_saved, ofs) =
save_caller_save
(range 32)
(- (numspilled+1)) in
let parameter_passing =
match Hashtbl.find_option allocation r with
| None -> Error (Format.sprintf "Could not find allocation for register %d\n" r)
| Some (Reg rs) -> OK [LMov(reg_a0, rs)]
| Some (Stk o) -> OK [LLoad(reg_a0, reg_fp, (Archi.wordsize ()) * o, (archi_mas ()))]
in
parameter_passing >>= fun parameter_passing ->
OK (LComment "Saving a0-a7,t0-t6" :: save_a_regs @
LAddi(reg_sp, reg_s0, (Archi.wordsize ()) * (ofs + 1)) ::
parameter_passing @
LCall "print" ::
LComment "Restoring a0-a7,t0-t6" :: restore_caller_save arg_saved)
| Rret r ->
load_loc reg_tmp1 allocation r >>= fun (l,r) ->
OK (l @ [LMov (reg_ret, r) ; LJmp epilogue_label])
| Rlabel l -> OK [LLabel (Format.sprintf "%s_%d" fname l)]
in
res >>= fun l ->
OK (LComment (Format.asprintf "#<span style=\"background: pink;\"><b>Linear instr</b>: %a #</span>" (Rtl_print.dump_rtl_instr fname (None, None) ~endl:"") ins)::l)
(** Retrieves the location of the n-th argument (in the callee). The first 8 are
passed in a0-a7, the next are passed on the stack. *)
let retrieve_nth_arg n numcalleesave =
let thr = number_of_arguments_passed_in_registers in
if n < thr
then Reg (n + starting_arg_register)
else Stk ((numcalleesave + n-thr))
(* This function creates a LTL function out of a Linear function. In addition to
using machine registers instead of ideal registers, it deals with saving and
restoring callee-save registers. The function prologue consists of saving
these callee-save registers, making the frame pointer to sp, and saving space
on the stack for the variables that would reside on the stack (i.e. spilled -
they go from [s0] to [s0+curstackslot] (curstackslot is negative, and
returned by the register allocation)). The function epilogue restores sp to
the value stored in s0 (fp), restore all the callee-save registers and jumps
back to [ra]. *)
let ltl_fun_of_linear_fun linprog
(({ linearfunargs; linearfunbody; linearfuninfo }): linear_fun) fname
(live_in,live_out) (allocation, numspilled) =
List.iteri (fun i pr ->
Hashtbl.replace allocation pr (retrieve_nth_arg i 0)
) linearfunargs;
let written_regs = Set.add reg_ra
(Set.add reg_fp
(written_ltl_regs fname linearfunbody allocation)) in
let callee_saved_regs =
Set.intersect (Set.of_list callee_saved) written_regs in
List.iteri (fun i pr ->
Hashtbl.replace allocation pr
(retrieve_nth_arg i (Set.cardinal callee_saved_regs))
) linearfunargs;
let max_label =
List.fold_left (fun acc i ->
match i with
Rlabel l -> Stdlib.max l acc
| _ -> acc)
0 linearfunbody in
let epilogue_label = Format.sprintf "%s_%d" fname (max_label + 1) in
let prologue =
List.concat (List.map make_push (Set.to_list callee_saved_regs)) @
LMov (reg_fp, reg_sp) ::
make_sp_sub (numspilled * (Archi.wordsize ())) @
[LComment "end prologue"] in
let epilogue = LLabel epilogue_label ::
LMov(reg_sp, reg_fp) ::
List.concat (List.map make_pop
(List.rev (Set.to_list callee_saved_regs))) @
[LJmpr reg_ra] in
list_map_resi (fun i ->
ltl_instrs_of_linear_instr fname (Hashtbl.find_default live_out i Set.empty)
allocation numspilled epilogue_label) linearfunbody
>>= fun l ->
let instrs = List.concat l
in
OK {
ltlfunargs = List.length linearfunargs;
ltlfunbody = prologue @ instrs @ epilogue;
ltlfuninfo = linearfuninfo;
ltlregalloc = Hashtbl.bindings allocation;
}
let allocable_registers = Set.of_list [
reg_s1; reg_s2; reg_s3; reg_s4; reg_s5;
reg_s6; reg_s7; reg_s8; reg_s9; reg_s10; reg_s11;
reg_t2; reg_t3; reg_t4; reg_t5; reg_t6;
]
let ltl_prog_of_linear lp () =
let alloc_order =
if !Options.alloc_order_st then alloc_order_st else alloc_order_ts in
ltl_prog_of_linear_with_alloc_order alloc_order lp
let ltl_prog_of_linear lp =
let lives = liveness_linear_prog lp in
let allocations = regalloc lp lives allocable_registers in
let prog = list_map_res (function
(fname, Gfun f) ->
let f_alloc =
match Hashtbl.find_option allocations fname with
| None -> (Hashtbl.create 0, 0)
| Some (rig, allocation, next_stack_slot) -> (allocation, - next_stack_slot - 1)
in
let f_lives =
match Hashtbl.find_option lives fname with
| None -> (Hashtbl.create 0, Hashtbl.create 0)
| Some x -> x
in
ltl_fun_of_linear_fun lp f fname f_lives f_alloc >>= fun f ->
OK (fname, Gfun f)
) lp in
prog
let pass_ltl_gen linear =
match ltl_prog_of_linear linear with
| Error msg -> record_compile_result ~error:(Some msg) "LTL"; Error msg
| OK ltl ->
record_compile_result "LTL";
dump !ltl_dump dump_ltl_prog ltl
(fun file () -> add_to_report "ltl" "LTL" (Code (file_contents file)));
OK ltl
......@@ -83,9 +83,9 @@ let dump_ltl_instr oc (i: ltl_instr) =
Format.fprintf oc "%s <- %s %s"
(print_reg rd) (print_unop u) (print_reg rs)
| LStore(rt, i, rs, sz) ->
Format.fprintf oc "%s{%d}[%d] <- %s" (print_reg rt) sz i (print_reg rs)
Format.fprintf oc "%s%s[%d] <- %s" (print_reg rt) (string_of_mem_access_size sz) i (print_reg rs)
| LLoad(rd, rt, i, sz) ->
Format.fprintf oc "%s <- %s{%d}[%d]" (print_reg rd) (print_reg rt) sz i
Format.fprintf oc "%s <- %s%s[%d]" (print_reg rd) (print_reg rt) (string_of_mem_access_size sz) i
| LMov(rd, rs) ->
Format.fprintf oc "%s <- %s" (print_reg rd) (print_reg rs)
| LLabel l ->
......@@ -93,7 +93,7 @@ let dump_ltl_instr oc (i: ltl_instr) =
| LJmp l -> Format.fprintf oc "j %s" l
| LJmpr r -> Format.fprintf oc "jmpr %s" (print_reg r)
| LConst (rd, i) -> Format.fprintf oc "%s <- %d" (print_reg rd) i
| LComment l -> Format.fprintf oc "; %s" l
| LComment l -> Format.fprintf oc "<span style=\"color: gray;\">; %s</span>" l
| LBranch(cmp, rs1, rs2, s) ->
Format.fprintf oc "%s(%s,%s) ? j %s"
(print_cmpop cmp) (print_reg rs1) (print_reg rs2) s
......@@ -107,15 +107,16 @@ let dump_ltl_instr_list fname oc l =
dump_ltl_instr oc ins;
Format.fprintf oc "\n") l
let dump_allocation oc fname alloc =
Format.fprintf oc "// In function %s\n" fname;
List.iter (fun (linr,ltlloc) ->
Format.fprintf oc "// LinReg %d allocated to %s\n" linr (print_loc ltlloc)
) alloc
let dump_ltl_fun oc fname lf =
dump_allocation oc fname lf.ltlregalloc;
Format.fprintf oc "%s:\n" fname;
dump_ltl_instr_list fname oc lf.ltlfunbody
let dump_ltl_prog oc lp =
dump_prog dump_ltl_fun oc lp
let dump_allocation fname alloc =
Format.printf "In function %s\n" fname;
Hashtbl.iter (fun linr ltlloc ->
Format.printf "LinReg %d allocated to %s\n" linr (print_loc ltlloc)
) alloc
......@@ -41,29 +41,27 @@ type fun_location = {
}
type ltl_state = {
code: (int, ltl_instr) Hashtbl.t;
code: ltl_instr Array.t;
funs: (string, fun_location) Hashtbl.t;
regs: (ltl_reg, int) Hashtbl.t;
regs: int array;
labels : (string, int) Hashtbl.t;
mem: Mem.t;
numstep: int ref;
}
(* Finds the position of a label in the code. *)
let find_label (code: (int, ltl_instr) Hashtbl.t) (l: string) =
Hashtbl.fold (fun k v acc ->
match acc with
| OK x -> acc
| Error _ -> if v = LLabel l then OK k else acc
) code (Error (Format.sprintf "Label %s not found." l))
let find_label (labels: (string, int) Hashtbl.t) (l: string) =
match Hashtbl.find_option labels l with
| Some ip -> OK ip
| None -> Error (Format.sprintf "Label %s not found." l)
(* For most instructions, the next instruction to execute is the one at [ip +
1]. *)
let next ip = OK (Some (ip + 1))
(* Helper function to get value of register [r] in state [st]. *)
let get_reg st r =
Hashtbl.find_option st.regs r >>
(Format.sprintf "uninitialized register %s\n" (print_reg r))
let get_reg st r f =
f (Array.get st.regs r)
(* Execution of one LTL instruction.
......@@ -79,99 +77,112 @@ let get_reg st r =
let exec_ltl_instr oc ip st : (int option) res =
let open Utils in
match Hashtbl.find_option st.code ip with
| None -> Error (Format.sprintf
"Could not find next instruction to execute at ip=%d \
[in exec_ltl_instr]" ip)
| Some i ->
match i with
| LAddi(rd, rs, i) ->
get_reg st rs $ fun vs ->
Hashtbl.replace st.regs rd (vs + i);
next ip
| LSubi(rd, rs, i) ->
get_reg st rs $ fun vs ->
Hashtbl.replace st.regs rd (vs - i);
match Array.get st.code ip with
| exception Invalid_argument _ -> Error (Format.sprintf
"Could not find next instruction to execute at ip=%d \
[in exec_ltl_instr]" ip)
| LAddi(rd, rs, i) ->
get_reg st rs $ fun vs ->
Array.set st.regs rd (vs + i);
next ip
| LSubi(rd, rs, i) ->
get_reg st rs $ fun vs ->
Array.set st.regs rd (vs - i);
next ip
| LBinop(b, rd, rs1, rs2) ->
get_reg st rs1 $ fun vs1 ->
get_reg st rs2 $ fun vs2 ->
Array.set st.regs rd (eval_binop b vs1 vs2);
next ip
| LBinop(b, rd, rs1, rs2) ->
get_reg st rs1 $ fun vs1 ->
get_reg st rs2 $ fun vs2 ->
Hashtbl.replace st.regs rd (eval_binop b vs1 vs2);
next ip
| LUnop(u, rd, rs) ->
| LUnop(u, rd, rs) ->
get_reg st rs $ fun vs ->
Array.set st.regs rd (eval_unop u vs);
next ip
| LStore(rt, i, rs, sz) ->
get_reg st rt $ fun vt ->
get_reg st rs $ fun vs ->
Hashtbl.replace st.regs rd (eval_unop u vs);
Mem.write_bytes st.mem (vt + i) (split_bytes (size_of_mas sz) vs) >>= fun _ ->
next ip
| LStore(rt, i, rs, sz) ->
get_reg st rt $ fun vt ->
get_reg st rs $ fun vs ->
Mem.write_bytes st.mem (vt + i) (split_bytes sz vs) >>= fun _ ->
next ip
| LLoad(rd, rt, i, sz) ->
get_reg st rt $ fun vt ->
Mem.read_bytes_as_int st.mem (vt + i) sz >>= fun (v) ->
Hashtbl.replace st.regs rd v;
| LLoad(rd, rt, i, sz) ->
get_reg st rt $ fun vt ->
Mem.read_bytes_as_int st.mem (vt + i) (size_of_mas sz) >>= fun (v) ->
Array.set st.regs rd v;
next ip
| LMov(rd, rs) ->
get_reg st rs $ fun vs ->
Array.set st.regs rd vs;
next ip
| LLabel l -> next ip
| LJmp l -> find_label st.labels l >>= fun n -> OK (Some n)
| LJmpr r ->
get_reg st reg_ra $ fun ra ->
OK (Some ra)
| LConst (rd, i) -> Array.set st.regs rd i; next ip
| LComment _ -> next ip
| LBranch(cmp, rs1, rs2, s) ->
get_reg st rs1 $ fun vs1 ->
get_reg st rs2 $ fun vs2 ->
let b = eval_rtl_cmp cmp vs1 vs2 in
if b
then find_label st.labels s >>= fun n -> OK (Some n)
else next ip
| LCall callee_name ->
begin match Hashtbl.find_option st.funs callee_name with
Some {funstart} ->
Array.set st.regs reg_ra (ip+1);
OK (Some funstart)
| None ->
do_builtin oc st.mem callee_name
(list_ints_desc number_of_arguments_passed_in_registers |> List.rev |>
List.map (fun i -> i + starting_arg_register) |>
List.map (fun i -> Array.get st.regs i)) >>=
fun v ->
begin match v with
| None -> ()
| Some v -> Array.set st.regs reg_ret v
end;
next ip
| LMov(rd, rs) ->
get_reg st rs $ fun vs ->
Hashtbl.replace st.regs rd vs;
next ip
| LLabel l -> next ip
| LJmp l -> find_label st.code l >>= fun n -> OK (Some n)
| LJmpr r ->
get_reg st reg_ra $ fun ra -> OK (Some ra)
| LConst (rd, i) -> Hashtbl.replace st.regs rd i; next ip
| LComment _ -> next ip
| LBranch(cmp, rs1, rs2, s) ->
get_reg st rs1 $ fun vs1 ->
get_reg st rs2 $ fun vs2 ->
let b = eval_rtl_cmp cmp vs1 vs2 in
if b
then find_label st.code s >>= fun n -> OK (Some n)
else next ip
| LCall callee_name ->
begin match Hashtbl.find_option st.funs callee_name with
Some {funstart} ->
Hashtbl.replace st.regs reg_ra (ip+1);
OK (Some funstart)
| None ->
do_builtin oc st.mem callee_name
(list_ints_desc number_of_arguments_passed_in_registers |> List.rev |>
List.map (fun i -> i + starting_arg_register) |>
List.map (fun i -> Hashtbl.find_default st.regs i 0)) >>=
fun v ->
begin match v with
| None -> ()
| Some v -> Hashtbl.replace st.regs reg_ret v
end;
next ip
end
| LHalt -> OK None
end
| LHalt -> OK None
(* Initialize regs [0,n[ *)
let rec init_regs n =
let regs = Hashtbl.create n in
range n |> List.iter (fun n -> Hashtbl.replace regs n 0);
let regs = Array.init n (fun _ -> 0) in
regs
let init_state memsize lp params =
let code : (int, ltl_instr) Hashtbl.t = Hashtbl.create 17 in
let lp = (("__halt", Gfun {ltlfunargs = 0;
ltlfunbody = [LHalt];
ltlfuninfo = [];
ltlregalloc = [];
})::lp) in
let codesize = List.fold_left (fun sz (name, def) ->
match def with
| Gfun f ->
sz + List.length f.ltlfunbody
) 0 lp in
let code : ltl_instr Array.t = Array.init codesize (fun _ -> LHalt) in
let funs : (string, fun_location) Hashtbl.t = Hashtbl.create 17 in
let labels : (string, int) Hashtbl.t = Hashtbl.create 17 in
let mem = Mem.init memsize in
let regs = init_regs 32 in
let sp = memsize - !Archi.wordsize in
Hashtbl.replace regs reg_sp sp;
Hashtbl.replace regs reg_fp sp;
Hashtbl.replace regs reg_ra 0;
let sp = memsize in
Array.set regs reg_sp sp;
Array.set regs reg_fp sp;
Array.set regs reg_ra 0;
let codesize = List.fold_left (fun ofs (name, def) ->
match def with
| Gfun f ->
let funstart = ofs in
let funend = List.fold_left (fun ofs ins ->
Hashtbl.replace code ofs ins;
code.(ofs) <- ins;
(* write dummy instruction in memory. *)
Mem.write_char mem ofs 0x90 >>! fun _ ->
begin match ins with
| LLabel l -> Hashtbl.replace labels l ofs
| _ -> ()
end;
ofs + 1
) ofs f.ltlfunbody in
Hashtbl.replace funs name {
......@@ -180,32 +191,31 @@ let init_state memsize lp params =
funregalloc = f.ltlregalloc
};
funend
) 0 (("__halt", Gfun {ltlfunargs = 0;
ltlfunbody = [LHalt];
ltlfuninfo = [];
ltlregalloc = [];
})::lp)
) 0 lp
in
let codesize = (codesize / 8 + 1) * 8 in
Hashtbl.replace regs reg_gp codesize;
Array.set regs reg_gp codesize;
(* write arguments, relative to sp *)
List.iteri (fun i p ->
if i >= number_of_arguments_passed_in_registers
then begin
let sp = Hashtbl.find regs reg_sp - !Archi.wordsize in
Hashtbl.replace regs reg_sp sp;
Mem.write_bytes mem sp (split_bytes !Archi.wordsize p) >>!
let sp = Array.get regs reg_sp - (Archi.wordsize ()) in
Array.set regs reg_sp sp;
Mem.write_bytes mem sp (split_bytes (Archi.wordsize ()) p) >>!
ignore
end else
begin
Hashtbl.replace regs (starting_arg_register + i) p
Array.set regs (starting_arg_register + i) p
end
) params;
let mem_next = ref (codesize + 8) in
Mem.write_bytes mem codesize (split_bytes !Archi.wordsize !mem_next) >>!
fun _ -> { code; funs; mem ; regs ; numstep = ref 0}
Mem.write_bytes mem codesize (split_bytes (Archi.wordsize ()) !mem_next) >>!
fun _ ->
Printf.eprintf "numlabels = %d\n" (Hashtbl.length labels);
Printf.eprintf "labels = %s\n" (Hashtbl.keys labels |> List.of_enum |> String.concat ", ");
{ code; funs; mem ; labels; regs ; numstep = ref 0}
let rec exec_ltl_at oc ip st =
......@@ -220,5 +230,5 @@ let exec_ltl_prog oc lp memsize params : int option res =
| None -> Error (Format.sprintf "Could not find function main.")
| Some {funstart} ->
exec_ltl_at oc funstart st >>= fun st ->
OK (Hashtbl.find_option st.regs reg_ret)
OK (Some (Array.get st.regs reg_ret))
......@@ -35,10 +35,7 @@ open Archi
open Report
open Options
open Lexer_generator
let tokenize file =
Lexer_generator.tokenize_file file >>= fun tokens ->
OK (List.map (fun tok -> (tok, None)) tokens)
open Tokenize
let speclist =
[
......@@ -64,6 +61,15 @@ let speclist =
("-riscv-run", Arg.Set riscv_run, "Run RISC-V program.");
("-no-dump", Arg.Set no_dump, "Do not dump anything but the .s file");
("-no-dot", Arg.Set no_dot, "Do not call dot on CFG dumps (default false)");
("-clever-regalloc", Arg.Unit (fun () -> naive_regalloc := false), "Use the graph coloring algorithm for register allocation.");
("-naive-regalloc", Arg.Unit (fun () -> naive_regalloc := true),
"Use the naive algorithm for register allocation (all pseudo-registers go on the stack).");
("-no-cfg-constprop", Arg.Set no_cfg_constprop, "Disable CFG constprop");
("-no-cfg-dae", Arg.Set no_cfg_dae, "Disable CFG Dead Assign Elimination");
("-no-cfg-ne", Arg.Set no_cfg_ne, "Disable CFG Nop Elimination");
("-no-linear-dse", Arg.Set no_linear_dse, "Disable Linear Dead Store Elimination");
("-rig-dump", Arg.String (fun s -> rig_dump := Some s),
"Path to output the register interference graph");
("-all-run", Arg.Unit (fun () ->
e_run := true;
cfg_run := true;
......@@ -81,116 +87,17 @@ let speclist =
("-m32", Arg.Unit (fun _ -> Archi.archi := A32), "32bit mode");
("-f", Arg.String (fun s -> input_file := Some s), "file to compile");
("-alloc-order-ts", Arg.Unit (fun _ -> Options.alloc_order_st := false), "Allocate t regs before s regs");
("-json", Arg.Set output_json, "Output JSON summary");
("-json", Arg.String (fun s -> output_json := s), "Output JSON summary");
("-nostart", Arg.Set nostart, "Don't output _start code.");
("-nostats", Arg.Set nostats, "Don't output stats.");
("-nomul", Arg.Unit (fun _ -> has_mul := false), "Target architecture without mul instruction.");
("-lex-hand", Arg.Unit (fun _ -> Options.handwritten_lexer := true), "Use handwritten lexer generator");
("-lex-auto", Arg.Unit (fun _ -> Options.handwritten_lexer := false), "Use OCamlLex lexer");
("-linux", Arg.Unit (fun _ -> target := Linux), "emit linux syscalls");
("-xv6", Arg.Unit (fun _ -> target := Xv6), "emit xv6 syscalls");
("--", Arg.Rest (fun p -> params := int_of_string p::!params), "Run parameters.")
]
type run_result = {
step: string;
retval: int option;
output: string;
error: string option;
}
type compile_result = {
step: string;
error: string option;
data: Yojson.t
}
type result = RunRes of run_result
| CompRes of compile_result
let results = ref []
let run step flag eval p =
if flag then begin
begin match eval Format.str_formatter p !heapsize !params with
| OK v ->
let output = Format.flush_str_formatter () in
results := !results @ [RunRes { step ; retval = v; output; error = None}];
add_to_report step ("Run " ^ step) (
Paragraph
(
Printf.sprintf "With parameters : [%s]<br>\n" (String.concat"," (List.map string_of_int !params))
^ Printf.sprintf "Mem size : %d bytes<br>\n" !heapsize
^ Printf.sprintf "Return value : %s<br>\n" (match v with | Some v -> string_of_int v | _ -> "none")
^ Printf.sprintf "Output : <pre style=\"padding: 1em; background-color: #ccc;\">%s</pre>\n" output
)
)
| Error msg ->
let output = Format.flush_str_formatter () in
results := !results @ [RunRes { step ; retval = None; output; error = Some msg}];
add_to_report step ("Run " ^ step) (
Paragraph
(
Printf.sprintf "With parameters : [%s]<br>\n" (String.concat"," (List.map string_of_int !params))
^ Printf.sprintf "Mem size : %d bytes<br>\n" !heapsize
^ Printf.sprintf "Return value : none<br>\n"
^ Printf.sprintf "Output : <pre style=\"padding: 1em; background-color: #ccc;\">%s</pre>\n" output
^ Printf.sprintf "Error : <pre style=\"padding: 1em; background-color: #fcc;\">\n%s</pre>\n" msg
)
)
end
end
let record_compile_result ?error:(error=None) ?data:(data=[]) step =
let data = if not !Options.nostats then `List data else `Null in
results := !results @ [CompRes { step; error; data}]
let dump file dumpf p additional_command =
begin match file with
| None -> ()
| Some file ->
let oc, close =
if file = "-"
then (Format.std_formatter, fun _ -> ())
else
let oc = open_out file in
(Format.formatter_of_out_channel oc, fun () -> close_out oc)
in
dumpf oc p; close ();
additional_command file ()
end
let process_output_to_list2 = fun command ->
let chan = Unix.open_process_in command in
let res = ref ([] : string list) in
let rec process_otl_aux () =
let e = input_line chan in
res := e::!res;
process_otl_aux() in
try process_otl_aux ()
with End_of_file ->
let stat = Unix.close_process_in chan in (List.rev !res,stat)
let cmd_to_list command =
let (l,_) = process_output_to_list2 command in l
let file_contents file =
match
let ic = open_in file in
let rec aux s () =
try
let line = input_line ic in (* read line from in_channel and discard \n *)
aux (s ^ line ^ "\n") () (* close the input channel *)
with e -> (* some unexpected exception occurs *)
close_in_noerr ic; (* emergency closing *)
s in
aux "" ()
with
| exception Sys_error _ -> failwith (Printf.sprintf "Could not open file %s\n" file)
| x -> x
let set_default r v suff =
match !r with
None -> r := Some (v ^ suff)
......@@ -198,13 +105,28 @@ let set_default r v suff =
let compile_rv basename asmfile () =
if not !Options.nostart then begin
let out, _ =
Format.sprintf
"%s -nostdlib -nostartfiles -T %s/link.ld -o \"%s.exe\" \"%s\" %s/lib%d.s %s/mul%d.S 2>&1"
!Archi.assembler Config.runtime_dir basename asmfile
Config.runtime_dir !Archi.nbits
Config.runtime_dir !Archi.nbits
|> process_output_to_list2 in
let obj_file_prog = Filename.temp_file ~temp_dir:"/tmp" "" ".o" in
let cmdas_prog = Format.sprintf "%s -I%s -o %s %s"
(Archi.assembler ())
(Archi.runtime_lib_include_path ())
obj_file_prog asmfile in
let obj_file_lib = Filename.temp_file ~temp_dir:"/tmp" "" ".o" in
let cmdas_lib = Format.sprintf "%s -I%s -o %s %s"
(Archi.assembler ())
(Archi.runtime_lib_include_path ())
obj_file_lib (Archi.runtime_lib_path ()) in
let cmdld = Format.sprintf "%s -T %s/link.ld %s %s -o %s.exe"
(Archi.linker ())
Config.runtime_dir
obj_file_prog obj_file_lib
basename in
Printf.printf "AS: %s\n" cmdas_prog;
Printf.printf "AS: %s\n" cmdas_lib;
Printf.printf "LD: %s\n" cmdld;
let out_as_prog = cmd_to_list cmdas_prog in
let out_as_lib = cmd_to_list cmdas_lib in
let out_ld = cmd_to_list cmdld in
let out = out_as_prog @ out_as_lib @ out_ld in
match out with
[] -> None
| _ -> Some (String.concat "\n" out)
......@@ -220,11 +142,12 @@ let exec_rv_prog ltl basename oc rvp heapsize params =
f
in
let error = ref None in
dump (Some rvp) dump_riscv_prog ltl (fun file () -> error := compile_rv basename file ());
dump (Some rvp) (dump_riscv_prog !Archi.target) ltl (fun file () ->
error := compile_rv basename file ());
match !error with
| Some e -> Error ("RiscV generation error:\n" ^e)
| None ->
let l = cmd_to_list (Format.sprintf "%s%d-static \"%s.exe\" %s" Config.qemu_path !Archi.nbits basename
let l = cmd_to_list (Format.sprintf "%s \"%s.exe\" %s" (Archi.qemu ()) basename
(params |> List.map string_of_int |> String.concat " " )) in
try
let all_but_last = l |> List.rev |> List.tl |> List.rev in
......@@ -233,137 +156,73 @@ let exec_rv_prog ltl basename oc rvp heapsize params =
OK (Some ret)
with _ -> OK None
let call_dot report_sectid report_secttitle file () : unit =
if not !Options.no_dot
then begin
let r = Sys.command (Format.sprintf "dot -Tsvg %s -o %s.svg" file file) in
add_to_report report_sectid report_secttitle (Img (Filename.basename file^".svg"));
ignore r
end
let _ =
Arg.parse speclist (fun s -> ()) "Usage";
init_archi !archi ();
Archi.archi := !archi;
match !input_file with
| None -> failwith "No input file specified.\n"
| Some input ->
add_to_report "Source" "Source" (Code (file_contents input));
match Filename.chop_suffix_opt ".e" input with
match Filename.chop_suffix_opt ~suffix:".e" input with
None -> failwith
(Format.sprintf "File (%s) should end in .e" input)
| Some basename ->
begin
params := List.rev !params;
set_default riscv_dump basename ".s";
if not !no_dump then begin
set_default show_tokens basename ".lex";
set_default ast_tree basename ".ast";
set_default e_dump basename ".e.dump";
set_default cfg_dump basename ".cfg";
set_default rtl_dump basename ".rtl";
set_default linear_dump basename ".linear";
set_default ltl_dump basename ".ltl";
end;
tokenize input >>* (fun msg ->
record_compile_result ~error:(Some msg) "Lexing";
) $ fun tokens ->
record_compile_result "Lexing";
dump !show_tokens (fun oc tokens ->
List.iter (fun (tok,_) ->
Format.fprintf oc "%s\n" (string_of_symbol tok)
) tokens) tokens (fun f () -> add_to_report "lexer" "Lexer" (Code (file_contents f)));
parse tokens () >>* (fun msg ->
record_compile_result ~error:(Some msg) "Parsing";
) $ fun (ast, tokens) ->
record_compile_result "Parsing";
dump !ast_tree draw_ast_tree ast (call_dot "ast" "AST");
if !ast_dump then Format.printf "%s\n" (string_of_ast ast) else ();
match make_eprog_of_ast ast with
| Error msg -> record_compile_result ~error:(Some msg) "Elang"
| OK ep ->
dump !e_dump dump_e ep (fun file () ->
add_to_report "e" "E" (Code (file_contents file)));
run "Elang" !e_run eval_eprog ep;
cfg_prog_of_eprog ep >>! fun cfg ->
dump !cfg_dump dump_cfg_prog cfg (call_dot "cfg" "CFG");
run "CFG" !cfg_run eval_cfgprog cfg;
let cfg = constant_propagation cfg in
record_compile_result "ConstProp";
dump (!cfg_dump >*> fun s -> s ^ "0") dump_cfg_prog cfg
(call_dot "cfg-after-cstprop" "CFG after Constant Propagation");
run "CFG after constant_propagation" !cfg_run_after_cp eval_cfgprog cfg;
let cfg = dead_assign_elimination cfg in
record_compile_result "DeadAssign";
dump (!cfg_dump >*> fun s -> s ^ "1") dump_cfg_prog cfg
(call_dot "cfg-after-dae" "CFG after DAE");
run "CFG after dead_assign_elimination" !cfg_run_after_dae eval_cfgprog cfg;
let cfg = nop_elimination cfg in
record_compile_result "NopElim";
dump (!cfg_dump >*> fun s -> s ^ "2") dump_cfg_prog cfg
(call_dot "cfg-after-nop" "CFG after NOP elim");
run "CFG after nop_elimination" !cfg_run_after_ne eval_cfgprog cfg;
let rtl = rtl_of_cfg cfg in
dump !rtl_dump dump_rtl_prog rtl
(fun file () -> add_to_report "rtl" "RTL" (Code (file_contents file)));
run "RTL" !rtl_run exec_rtl_prog rtl;
let linear = linear_of_rtl rtl in
let lives = liveness_linear_prog linear in
dump !linear_dump (fun oc -> dump_linear_prog oc (Some lives)) linear
(fun file () -> add_to_report "linear" "Linear" (Code (file_contents file)));
run "Linear" !linear_run exec_linear_prog linear;
let linear = dse_prog linear lives in
record_compile_result "DSE";
dump (!linear_dump >*> fun s -> s ^ "1")
(fun oc -> dump_linear_prog oc (Some lives)) linear
(fun file () -> add_to_report "linear-after-dse" "Linear after DSE"
(Code (file_contents file)));
run "Linear after DSE" !linear_run_after_dse exec_linear_prog linear;
let ltl = ltl_prog_of_linear linear () in
dump !ltl_dump dump_ltl_prog ltl
(fun file () -> add_to_report "ltl" "LTL" (Code (file_contents file)));
run "LTL" !ltl_run (exec_ltl_prog) ltl;
(if !ltl_debug then debug_ltl_prog input ltl !heapsize !params);
dump !riscv_dump dump_riscv_prog ltl (fun file () -> ignore (compile_rv basename file ()));
if not !Options.nostart then begin
run "Risc-V" !riscv_run (exec_rv_prog ltl basename) !riscv_dump
end;
params := List.rev !params;
set_default riscv_dump basename ".s";
if not !no_dump then begin
set_default show_tokens basename ".lex";
set_default ast_tree basename ".ast";
set_default e_dump basename ".e.dump";
set_default cfg_dump basename ".cfg";
set_default rtl_dump basename ".rtl";
set_default linear_dump basename ".linear";
set_default rig_dump basename ".rig";
set_default ltl_dump basename ".ltl";
end;
if !output_json
then begin
let open Yojson in
let jstring_of_ostring o =
match o with
| None -> `Null
| Some s -> `String s
in
let j = `List (List.map (function
| RunRes { step; retval; output; error; } ->
`Assoc [("runstep",`String step);
("retval", match retval with Some r -> `Int r | None -> `Null);
("output", `String output);
("error", jstring_of_ostring error);
]
| CompRes { step; error; data } ->
`Assoc [("compstep",`String step);
("error", jstring_of_ostring error);
("data", data)
]
) !results) in
Format.printf "%s\n" (Yojson.pretty_to_string j);
Printexc.record_backtrace true;
let compiler_res =
try
pass_tokenize input >>= fun tokens ->
pass_parse tokens >>= fun (ast, _) ->
pass_elang ast >>= fun ep ->
run "Elang" !e_run eval_eprog ep;
pass_cfg_gen ep >>= fun cfg ->
run "CFG" !cfg_run eval_cfgprog cfg;
pass_constant_propagation cfg >>= fun cfg ->
run "CFG after constant_propagation" !cfg_run_after_cp eval_cfgprog cfg;
pass_dead_assign_elimination cfg >>= fun cfg ->
run "CFG after dead_assign_elimination" !cfg_run_after_dae eval_cfgprog cfg;
pass_nop_elimination cfg >>= fun cfg ->
run "CFG after nop_elimination" !cfg_run_after_ne eval_cfgprog cfg;
pass_rtl_gen cfg >>= fun rtl ->
run "RTL" !rtl_run exec_rtl_prog rtl;
pass_linearize rtl >>= fun (linear, lives) ->
run "Linear" !linear_run exec_linear_prog linear;
pass_linear_dse linear lives >>= fun linear ->
run "Linear after DSE" !linear_run_after_dse exec_linear_prog linear;
pass_ltl_gen linear
with e ->
let emsg = Printexc.to_string e ^ "\n" ^ Printexc.get_backtrace () in
record_compile_result ~error:(Some emsg) "global";
Error emsg
in
begin
match compiler_res with
| Error msg -> ()
| OK ltl ->
run "LTL" !ltl_run (exec_ltl_prog) ltl;
(if !ltl_debug then debug_ltl_prog input ltl !heapsize !params);
dump !riscv_dump (dump_riscv_prog !Archi.target) ltl (fun file () ->
add_to_report "riscv" "RISC-V" (Code (file_contents file));
ignore (compile_rv basename file ()));
if not !Options.nostart then begin
run "Risc-V" !riscv_run (exec_rv_prog ltl basename) !riscv_dump
end
end;
dump (Some !output_json) (fun oc p ->
Format.fprintf oc "%s\n" p
) (json_output_string ()) (fun _ () -> ());
make_report input report ()
......@@ -23,11 +23,20 @@ let riscv_run = ref false
let show = ref false
let params : int list ref = ref []
let input_file : string option ref = ref None
let output_json = ref false
let output_json : string ref = ref "-"
let nostart = ref false
let nostats = ref false
let has_mul = ref true
let no_dump = ref false
let no_dot = ref false
let no_cfg_constprop = ref false
let no_cfg_dae = ref false
let no_cfg_ne = ref false
let no_linear_dse = ref false
let alloc_order_st = ref true
let naive_regalloc = ref true
let rig_dump : string option ref = ref None
let handwritten_lexer = ref Config.lex_hand
let alpaga_parser = ref Config.alpaga_parser
let timeout = ref 3.0
open Yaccparser
open Generated_parser
open Report
open Utils
open Options
open Ast
open Symbols
let parse = parse_S
let to_yacc_token = function
| SYM_EOF -> Yaccparser.SYM_EOF
| SYM_IDENTIFIER(s) -> Yaccparser.SYM_IDENTIFIER s
| SYM_INTEGER(i) -> Yaccparser.SYM_INTEGER i
| SYM_VOID -> Yaccparser.SYM_VOID
| SYM_CHAR -> Yaccparser.SYM_CHAR
| SYM_INT -> Yaccparser.SYM_INT
| SYM_STRUCT -> Yaccparser.SYM_STRUCT
| SYM_SEMICOLON -> Yaccparser.SYM_SEMICOLON
| SYM_POINT -> Yaccparser.SYM_POINT
| SYM_IF -> Yaccparser.SYM_IF
| SYM_ELSE -> Yaccparser.SYM_ELSE
| SYM_PLUS -> Yaccparser.SYM_PLUS
| SYM_MINUS -> Yaccparser.SYM_MINUS
| SYM_ASTERISK -> Yaccparser.SYM_ASTERISK
| SYM_DIV -> Yaccparser.SYM_DIV
| SYM_EQUALITY -> Yaccparser.SYM_EQUALITY
| SYM_ASSIGN -> Yaccparser.SYM_ASSIGN
| SYM_LPARENTHESIS -> Yaccparser.SYM_LPARENTHESIS
| SYM_RPARENTHESIS -> Yaccparser.SYM_RPARENTHESIS
| SYM_LBRACE -> Yaccparser.SYM_LBRACE
| SYM_RBRACE -> Yaccparser.SYM_RBRACE
| SYM_WHILE -> Yaccparser.SYM_WHILE
| SYM_RETURN -> Yaccparser.SYM_RETURN
| SYM_COMMA -> Yaccparser.SYM_COMMA
| SYM_LT -> Yaccparser.SYM_LT
| SYM_LEQ -> Yaccparser.SYM_LEQ
| SYM_GT -> Yaccparser.SYM_GT
| SYM_GEQ -> Yaccparser.SYM_GEQ
| SYM_NOTEQ -> Yaccparser.SYM_NOTEQ
| SYM_MOD -> Yaccparser.SYM_MOD
| SYM_BOOL_NOT -> Yaccparser.SYM_BOOL_NOT
| SYM_BOOL_AND -> Yaccparser.SYM_BOOL_AND
| SYM_BOOL_OR -> Yaccparser.SYM_BOOL_OR
| SYM_ARROW -> Yaccparser.SYM_ARROW
| SYM_BITWISE_OR -> Yaccparser.SYM_BITWISE_OR
| SYM_BITWISE_AND -> Yaccparser.SYM_BITWISE_AND
| SYM_BIT_NOT -> Yaccparser.SYM_BIT_NOT
| SYM_XOR -> Yaccparser.SYM_XOR
| SYM_CHARACTER(c) -> Yaccparser.SYM_CHARACTER c
| SYM_STRING(s) -> Yaccparser.SYM_STRING s
| SYM_LBRACKET -> Yaccparser.SYM_LBRACKET
| SYM_RBRACKET -> Yaccparser.SYM_RBRACKET
| SYM_ALLOC -> Yaccparser.SYM_ALLOC
| SYM_PRINT -> Yaccparser.SYM_PRINT
| SYM_EXTERN -> Yaccparser.SYM_EXTERN
| SYM_INCLUDE(s) -> Yaccparser.SYM_INCLUDE s
| SYM_AMPERSAND -> Yaccparser.SYM_AMPERSAND
let advance (l: ('a list * 'a list)) : ('a * ('a list * 'a list)) option =
let lbef, laft = l in
match laft with
[] -> None
| a::r -> Some (a, (a::lbef, r))
let back (l: ('a list * 'a list)) : ('a list * 'a list) option =
let lbef, laft = l in
match lbef with
[] -> None
| a::r -> Some ((r, a::laft))
let menhir_parser (toks: (Symbols.token * Lexing.position option) list) () =
let mtoks = ref ([], toks) in
let get_tok () =
match advance !mtoks with
| None -> (Yaccparser.SYM_EOF, Lexing.dummy_pos, Lexing.dummy_pos)
| Some ((t, p), l) ->
mtoks := l;
(to_yacc_token t, Lexing.dummy_pos, Lexing.dummy_pos)
in
let mparser = MenhirLib.Convert.Simplified.traditional2revised Yaccparser.main in
match mparser get_tok with
| ast -> OK (ast, [])
| exception Yaccparser.Error ->
match back !mtoks with
| None -> Error (Printf.sprintf "Parser error while reading '???'\n")
| Some (lbef, laft) ->
Error (Printf.sprintf "Parser error while reading '%s'\n"
(String.concat " " (List.map (fun (t, _) -> string_of_symbol t) (take 20 laft)))
)
let parser toks () =
if !Options.alpaga_parser
then parse toks ()
else menhir_parser toks ()
let pass_parse tokens =
match parser tokens () with
| Error msg -> record_compile_result ~error:(Some msg) "Parsing"; Error msg
| OK (ast, tokens) ->
record_compile_result "Parsing";
dump !ast_tree draw_ast_tree ast (call_dot "ast" "AST");
if !ast_dump then Format.printf "%s\n" (string_of_ast ast) else ();
OK (ast, tokens)
open Batteries
open Utils
type mem_access_size =
| MAS1
| MAS4
| MAS8
let string_of_mem_access_size mas =
match mas with
| MAS1 -> "{1}"
| MAS4 -> "{4}"
| MAS8 -> "{8}"
let mas_of_size n =
match n with
| 1 -> OK MAS1
| 4 -> OK MAS4
| 8 -> OK MAS8
| _ -> Error (Printf.sprintf "Unknown memory access size for size = %d" n)
let size_of_mas mas =
match mas with
| MAS1 -> 1
| MAS4 -> 4
| MAS8 -> 8
let archi_mas () =
match !Archi.archi with
| A64 -> MAS8
| A32 -> MAS4
type 'a gdef = Gfun of 'a
type 'a prog = (string * 'a gdef) list
......@@ -29,7 +61,7 @@ let set_val env v i =
Hashtbl.replace env v i
let get_val env v =
Hashtbl.find_opt env v
Hashtbl.find_option env v
let find_function (ep: 'a prog) fname : 'a res =
match List.assoc_opt fname ep with
......