Skip to content
Snippets Groups Projects
Commit 7169d5c1 authored by Wilke Pierre's avatar Wilke Pierre
Browse files

Possibilité de choisir différents analyseurs lexicaux et syntaxiques.

Par défaut, le compilateur utilise le lexer généré d'après vos expressions
régulières, NFA et DFA. En éditant le fichier opts.mk à la racine du projet et
en décommentant la ligne "CONF_OPTS+=-L", le compilateur utilisera un lexer
généré automatiquement via ocamllex.

De la même manière, par défaut, l'analyseur syntaxique utilisé sera celui généré
par ALPAGA d'après la grammaire expr_grammar_action.g. En décommentant dans le
fichier opts.mk la ligne "CONF_OPTS+=-m", le compilateur utilisera plutôt un
parser généré automatiquement par Menhir.

Cela pour vous permettre d'avancer dans votre TP même sans avoir fini le lexer
et l'analyseur syntaxique.
parent 64c25294
No related branches found
No related tags found
No related merge requests found
all: main.native all: main.native
include opts.mk
.PHONY: main.native .PHONY: main.native
src/config.ml: configure src/config.ml: configure opts.mk
./configure ./configure ${CONF_OPTS}
main.native: src/config.ml main.native: src/config.ml
make -C alpaga make -C alpaga
......
#!/bin/bash #!/bin/bash
LEX_HAND="true"
ALPAGA_PARSER="true"
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-l|--lex-hand)
LEX_HAND="true"
shift
;;
-L|--lex-ocamllex)
LEX_HAND="false"
shift
;;
-a|--alpaga-parser)
ALPAGA_PARSER="true"
shift
;;
-m|--menhir-parser)
ALPAGA_PARSER="false"
shift
;;
*) # unknown option
shift # past argument
;;
esac
done
RUNTIME=$(pwd)/runtime RUNTIME=$(pwd)/runtime
RVAS=$(which -a riscv64-unknown-linux-gnu-as riscv64-unknown-elf-as riscv64-linux-gnu-as 2>/dev/null | grep -v "not found" | head -n1) RVAS=$(which -a riscv64-unknown-linux-gnu-as riscv64-unknown-elf-as riscv64-linux-gnu-as 2>/dev/null | grep -v "not found" | head -n1)
...@@ -14,4 +44,5 @@ echo "let qemu64 = \"${QEMU64}\"" >> src/config.ml ...@@ -14,4 +44,5 @@ echo "let qemu64 = \"${QEMU64}\"" >> src/config.ml
echo "let rv_as = \"${RVAS}\"" >> src/config.ml echo "let rv_as = \"${RVAS}\"" >> src/config.ml
echo "let rv_ld = \"${RVLD}\"" >> src/config.ml echo "let rv_ld = \"${RVLD}\"" >> src/config.ml
echo "let os_target = \"linux\"" >> src/config.ml echo "let os_target = \"linux\"" >> src/config.ml
echo "let lex_hand = $LEX_HAND" >> src/config.ml
echo "let alpaga_parser = $ALPAGA_PARSER" >> src/config.ml
opts.mk 0 → 100644
CONF_OPTS=
# Use handwritten lexer
CONF_OPTS+=-l
# Use ocamllex lexer
#CONF_OPTS+=-L
# Use alpaga parser
CONF_OPTS+=-a
# Use menhir parser
#CONF_OPTS+=-m
...@@ -7,4 +7,5 @@ PKG logs.lwt ...@@ -7,4 +7,5 @@ PKG logs.lwt
PKG batteries PKG batteries
PKG yojson PKG yojson
PKG websocket PKG websocket
PKG websocket-lwt-unix PKG websocket-lwt-unix
\ No newline at end of file PKG menhirLib
\ No newline at end of file
include ../opts.mk
SRC=archi.ml ast.ml builtins.ml config.ml cfg_constprop.ml cfg_dead_assign.ml \ SRC=archi.ml ast.ml builtins.ml config.ml cfg_constprop.ml cfg_dead_assign.ml \
cfg.ml cfg_print.ml cfg_gen.ml cfg_liveness.ml cfg_nop_elim.ml cfg_run.ml \ cfg.ml cfg_print.ml cfg_gen.ml cfg_liveness.ml cfg_nop_elim.ml cfg_run.ml \
elang.ml elang_print.ml elang_gen.ml elang_run.ml e_regexp.ml \ elang.ml elang_print.ml elang_gen.ml elang_run.ml e_regexp.ml \
...@@ -22,8 +24,8 @@ test_lexer: archi.ml config.ml e_regexp.ml lexer_generator.ml symbols.ml test_le ...@@ -22,8 +24,8 @@ test_lexer: archi.ml config.ml e_regexp.ml lexer_generator.ml symbols.ml test_le
dot -Tsvg /tmp/dfa.dot -o /tmp/dfa.svg dot -Tsvg /tmp/dfa.dot -o /tmp/dfa.svg
dot -Tsvg /tmp/nfa.dot -o /tmp/nfa.svg dot -Tsvg /tmp/nfa.dot -o /tmp/nfa.svg
config.ml: ../configure config.ml: ../configure ../opts.mk
cd .. && ./configure cd .. && ./configure ${CONF_OPTS}
clean: clean:
rm -rf _build rm -rf _build
......
true: debug true: debug
true: bin_annot true: bin_annot
<runtime>: -traverse <runtime>: -traverse
<*>: package(lwt lwt.unix logs logs.lwt batteries yojson websocket websocket-lwt-unix) <*>: package(lwt lwt.unix logs logs.lwt batteries yojson websocket websocket-lwt-unix menhirLib)
true: thread true: thread
<tykernel>: -traverse true: use_menhir, explain, table
\ No newline at end of file <tykernel>: -traverse
{
open Symbols
exception SyntaxError of string
exception Eof
}
let digit = ['0'-'9']
let letter = ['a'-'z' 'A'-'Z']
let id = letter (digit|letter|'_')*
rule token = parse
| [' ' '\t' '\r'] { token lexbuf }
| '\n' { Lexing.new_line lexbuf; token lexbuf }
| ("0x" ['0'-'9''a'-'f''A'-'F']+) as i { SYM_INTEGER (int_of_string i) }
| ['0'-'9']+ as i { SYM_INTEGER (int_of_string i) }
| '+' { SYM_PLUS }
| "->" { SYM_ARROW }
| "&&" { SYM_BOOL_AND }
| "||" { SYM_BOOL_OR }
| "!" { SYM_BOOL_NOT }
| '&' { SYM_BITWISE_AND }
| '~' { SYM_BIT_NOT }
| '-' { SYM_MINUS }
| '*' { SYM_ASTERISK }
| "//" { single_line_comment lexbuf }
| "/*" { multi_line_comment lexbuf }
| '/' { SYM_DIV }
| '.' { SYM_POINT }
| "void" { SYM_VOID }
| "char" { SYM_CHAR }
| "int" { SYM_INT }
| "print" { SYM_PRINT }
| "struct" { SYM_STRUCT }
| "if" { SYM_IF }
| "else" { SYM_ELSE }
| "alloc" { SYM_ALLOC }
| "==" { SYM_EQUALITY }
| "=" { SYM_ASSIGN }
| "while" { SYM_WHILE }
| "return" { SYM_RETURN }
| id as s { SYM_IDENTIFIER s}
| '{' { SYM_LBRACE }
| '}' { SYM_RBRACE }
| '(' { SYM_LPARENTHESIS }
| ')' { SYM_RPARENTHESIS }
| '[' { SYM_LBRACKET }
| ']' { SYM_RBRACKET }
| ';' { SYM_SEMICOLON }
| ',' { SYM_COMMA }
| ">=" { SYM_GEQ }
| "<=" { SYM_LEQ }
| '>' { SYM_GT }
| '<' { SYM_LT }
| "!=" { SYM_NOTEQ }
| '^' { SYM_XOR }
| '%' { SYM_MOD }
| '\'' { parse_char lexbuf }
| '"' { read_string (Buffer.create 17) lexbuf }
| eof { SYM_EOF }
| _ as x { failwith (Printf.sprintf "unexpected char '%c' at %s \n" x (string_of_position (Lexing.lexeme_start_p lexbuf)))}
and parse_char = parse
| "\\n'" { SYM_CHARACTER '\n' }
| "\\0'" { SYM_CHARACTER (char_of_int 0) }
| "\\r'" { SYM_CHARACTER '\r' }
| "\\t'" { SYM_CHARACTER '\t' }
| _ as c '\'' { SYM_CHARACTER c}
| _ as x { failwith (Printf.sprintf "unexpected char literal '%c'\n" x) }
and read_string buf =
parse
| '"' { SYM_STRING (Buffer.contents buf) }
| '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf }
| '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
| '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf }
| '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf }
| '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf }
| '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf }
| '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf }
| [^ '"' '\\']+
{ Buffer.add_string buf (Lexing.lexeme lexbuf);
read_string buf lexbuf
}
| _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) }
| eof { raise (SyntaxError ("String is not terminated")) }
and single_line_comment = parse
| ['\n' '\r'] { Lexing.new_line lexbuf; token lexbuf }
| _ { single_line_comment lexbuf }
and multi_line_comment = parse
| "*/" { token lexbuf }
| '\n' { Lexing.new_line lexbuf; multi_line_comment lexbuf }
| _ {multi_line_comment lexbuf}
...@@ -91,6 +91,8 @@ let speclist = ...@@ -91,6 +91,8 @@ let speclist =
("-nostart", Arg.Set nostart, "Don't output _start code."); ("-nostart", Arg.Set nostart, "Don't output _start code.");
("-nostats", Arg.Set nostats, "Don't output stats."); ("-nostats", Arg.Set nostats, "Don't output stats.");
("-nomul", Arg.Unit (fun _ -> has_mul := false), "Target architecture without mul instruction."); ("-nomul", Arg.Unit (fun _ -> has_mul := false), "Target architecture without mul instruction.");
("-lex-hand", Arg.Unit (fun _ -> Options.handwritten_lexer := true), "Use handwritten lexer generator");
("-lex-auto", Arg.Unit (fun _ -> Options.handwritten_lexer := false), "Use OCamlLex lexer");
("-linux", Arg.Unit (fun _ -> target := Linux), "emit linux syscalls"); ("-linux", Arg.Unit (fun _ -> target := Linux), "emit linux syscalls");
("-xv6", Arg.Unit (fun _ -> target := Xv6), "emit xv6 syscalls"); ("-xv6", Arg.Unit (fun _ -> target := Xv6), "emit xv6 syscalls");
("--", Arg.Rest (fun p -> params := int_of_string p::!params), "Run parameters.") ("--", Arg.Rest (fun p -> params := int_of_string p::!params), "Run parameters.")
......
...@@ -37,3 +37,5 @@ let no_linear_dse = ref false ...@@ -37,3 +37,5 @@ let no_linear_dse = ref false
let alloc_order_st = ref true let alloc_order_st = ref true
let naive_regalloc = ref true let naive_regalloc = ref true
let rig_dump : string option ref = ref None let rig_dump : string option ref = ref None
let handwritten_lexer = ref Config.lex_hand
let alpaga_parser = ref Config.alpaga_parser
open Yaccparser
open Generated_parser open Generated_parser
open Report open Report
open Utils open Utils
open Options open Options
open Ast open Ast
open Symbols
let parse = parse_S let parse = parse_S
let to_yacc_token = function
| SYM_EOF -> Yaccparser.SYM_EOF
| SYM_IDENTIFIER(s) -> Yaccparser.SYM_IDENTIFIER s
| SYM_INTEGER(i) -> Yaccparser.SYM_INTEGER i
| SYM_VOID -> Yaccparser.SYM_VOID
| SYM_CHAR -> Yaccparser.SYM_CHAR
| SYM_INT -> Yaccparser.SYM_INT
| SYM_STRUCT -> Yaccparser.SYM_STRUCT
| SYM_SEMICOLON -> Yaccparser.SYM_SEMICOLON
| SYM_POINT -> Yaccparser.SYM_POINT
| SYM_IF -> Yaccparser.SYM_IF
| SYM_ELSE -> Yaccparser.SYM_ELSE
| SYM_PLUS -> Yaccparser.SYM_PLUS
| SYM_MINUS -> Yaccparser.SYM_MINUS
| SYM_ASTERISK -> Yaccparser.SYM_ASTERISK
| SYM_DIV -> Yaccparser.SYM_DIV
| SYM_EQUALITY -> Yaccparser.SYM_EQUALITY
| SYM_ASSIGN -> Yaccparser.SYM_ASSIGN
| SYM_LPARENTHESIS -> Yaccparser.SYM_LPARENTHESIS
| SYM_RPARENTHESIS -> Yaccparser.SYM_RPARENTHESIS
| SYM_LBRACE -> Yaccparser.SYM_LBRACE
| SYM_RBRACE -> Yaccparser.SYM_RBRACE
| SYM_WHILE -> Yaccparser.SYM_WHILE
| SYM_RETURN -> Yaccparser.SYM_RETURN
| SYM_COMMA -> Yaccparser.SYM_COMMA
| SYM_LT -> Yaccparser.SYM_LT
| SYM_LEQ -> Yaccparser.SYM_LEQ
| SYM_GT -> Yaccparser.SYM_GT
| SYM_GEQ -> Yaccparser.SYM_GEQ
| SYM_NOTEQ -> Yaccparser.SYM_NOTEQ
| SYM_MOD -> Yaccparser.SYM_MOD
| SYM_BOOL_NOT -> Yaccparser.SYM_BOOL_NOT
| SYM_BOOL_AND -> Yaccparser.SYM_BOOL_AND
| SYM_BOOL_OR -> Yaccparser.SYM_BOOL_OR
| SYM_ARROW -> Yaccparser.SYM_ARROW
| SYM_BITWISE_OR -> Yaccparser.SYM_BITWISE_OR
| SYM_BITWISE_AND -> Yaccparser.SYM_BITWISE_AND
| SYM_BIT_NOT -> Yaccparser.SYM_BIT_NOT
| SYM_XOR -> Yaccparser.SYM_XOR
| SYM_CHARACTER(c) -> Yaccparser.SYM_CHARACTER c
| SYM_STRING(s) -> Yaccparser.SYM_STRING s
| SYM_LBRACKET -> Yaccparser.SYM_LBRACKET
| SYM_RBRACKET -> Yaccparser.SYM_RBRACKET
| SYM_ALLOC -> Yaccparser.SYM_ALLOC
| SYM_PRINT -> Yaccparser.SYM_PRINT
| SYM_EXTERN -> Yaccparser.SYM_EXTERN
| SYM_INCLUDE(s) -> Yaccparser.SYM_INCLUDE s
| SYM_AMPERSAND -> Yaccparser.SYM_AMPERSAND
let advance (l: ('a list * 'a list)) : ('a * ('a list * 'a list)) option =
let lbef, laft = l in
match laft with
[] -> None
| a::r -> Some (a, (a::lbef, r))
let back (l: ('a list * 'a list)) : ('a list * 'a list) option =
let lbef, laft = l in
match lbef with
[] -> None
| a::r -> Some ((r, a::laft))
let menhir_parser (toks: (Symbols.token * Lexing.position option) list) () =
let mtoks = ref ([], toks) in
let get_tok () =
match advance !mtoks with
| None -> (Yaccparser.SYM_EOF, Lexing.dummy_pos, Lexing.dummy_pos)
| Some ((t, p), l) ->
mtoks := l;
(to_yacc_token t, Lexing.dummy_pos, Lexing.dummy_pos)
in
let mparser = MenhirLib.Convert.Simplified.traditional2revised Yaccparser.main in
match mparser get_tok with
| ast -> OK (ast, [])
| exception Yaccparser.Error ->
match back !mtoks with
| None -> Error (Printf.sprintf "Parser error while reading '???'\n")
| Some (lbef, laft) ->
Error (Printf.sprintf "Parser error while reading '%s'\n"
(String.concat " " (List.map (fun (t, _) -> string_of_symbol t) (take 20 laft)))
)
let parser toks () =
if !Options.alpaga_parser
then parse toks ()
else menhir_parser toks ()
let pass_parse tokens = let pass_parse tokens =
match parse tokens () with match parser tokens () with
| Error msg -> record_compile_result ~error:(Some msg) "Parsing"; Error msg | Error msg -> record_compile_result ~error:(Some msg) "Parsing"; Error msg
| OK (ast, tokens) -> | OK (ast, tokens) ->
record_compile_result "Parsing"; record_compile_result "Parsing";
......
...@@ -5,10 +5,31 @@ open Utils ...@@ -5,10 +5,31 @@ open Utils
open Options open Options
open Symbols open Symbols
let tokenize file = let tokenize_handwritten file =
Printf.printf "Handwritten lexer\n";
Lexer_generator.tokenize_file file >>= fun tokens -> Lexer_generator.tokenize_file file >>= fun tokens ->
OK (List.map (fun tok -> (tok, None)) tokens) OK (List.map (fun tok -> (tok, None)) tokens)
let tokenize_ocamllex file =
Printf.printf "OCamlLex lexer\n";
let ic = open_in file in
let lexbuf = Lexing.from_channel ic in
lexbuf.Lexing.lex_curr_p <- { lexbuf.Lexing.lex_curr_p with pos_fname = file };
let rec get_symbols () =
let s = Lexer.token lexbuf in
let ss = (s, Lexing.lexeme_start_p lexbuf) in
if s = SYM_EOF
then [ss]
else ss :: get_symbols ()
in
let l = get_symbols () in
close_in ic;
OK (List.map (fun (tok, pos) -> (tok, Some pos)) l)
let tokenize file =
if !Options.handwritten_lexer
then tokenize_handwritten file
else tokenize_ocamllex file
let pass_tokenize file = let pass_tokenize file =
tokenize file >>* (fun msg -> tokenize file >>* (fun msg ->
......
%{
(* open Symbols *)
open Ast
%}
%token SYM_EOF
%token SYM_VOID SYM_CHAR SYM_INT SYM_STRUCT SYM_POINT SYM_BOOL_NOT SYM_BOOL_AND SYM_BOOL_OR
%token SYM_ARROW SYM_BITWISE_OR SYM_BITWISE_AND SYM_BIT_NOT SYM_XOR SYM_LBRACKET SYM_RBRACKET
%token SYM_ALLOC SYM_EXTERN SYM_AMPERSAND
%token<char> SYM_CHARACTER
%token<string> SYM_STRING
%token<string> SYM_INCLUDE
%token<string> SYM_IDENTIFIER
%token<int> SYM_INTEGER
%token SYM_PLUS SYM_MINUS SYM_ASTERISK SYM_DIV SYM_MOD
%token SYM_LPARENTHESIS SYM_RPARENTHESIS SYM_LBRACE SYM_RBRACE
%token SYM_ASSIGN SYM_SEMICOLON SYM_RETURN SYM_IF SYM_WHILE SYM_ELSE SYM_COMMA SYM_PRINT
%token SYM_EQUALITY SYM_NOTEQ SYM_LT SYM_LEQ SYM_GT SYM_GEQ
%left SYM_EQUALITY SYM_NOTEQ
%left SYM_GEQ SYM_LEQ SYM_LT SYM_GT
%left SYM_PLUS SYM_MINUS
%left SYM_ASTERISK SYM_DIV SYM_MOD
%nonassoc UMINUS
%start main
%type <Ast.tree> main
%%
main:
| fundefs SYM_EOF { Node(Tlistglobdef, $1) }
;
fundefs:
| fundef fundefs { $1 :: $2 }
| { [] }
;
fundef:
identifier SYM_LPARENTHESIS lparams SYM_RPARENTHESIS instr {
let fargs = $3 in
let instr = $5 in
Node (Tfundef, [$1; Node (Tfunargs, fargs) ; instr ])
}
;
identifier:
SYM_IDENTIFIER { StringLeaf ($1) }
;
integer : SYM_INTEGER { IntLeaf ($1) };
lparams :
identifier rest_params { Node (Targ, [$1]) :: $2 }
| { [] };
rest_params :
SYM_COMMA identifier rest_params {
Node (Targ, [$2]) :: $3
}
| { [] };
instrs :
| instr instrs { $1 :: $2 }
| { [] };
linstrs :
SYM_LBRACE instrs SYM_RBRACE { Node (Tblock, $2) };
instr :
identifier SYM_ASSIGN expr SYM_SEMICOLON {
Node (Tassign, [Node (Tassignvar,[$1; $3])])
}
| SYM_IF SYM_LPARENTHESIS expr SYM_RPARENTHESIS linstrs ntelse { Node (Tif, [$3; $5; $6]) }
| SYM_WHILE SYM_LPARENTHESIS expr SYM_RPARENTHESIS instr { Node( Twhile, [$3; $5]) }
| SYM_RETURN expr SYM_SEMICOLON { Node(Treturn, [$2]) }
| SYM_PRINT expr SYM_SEMICOLON { Node(Tprint, [$2]) }
| linstrs { $1 };
ntelse :
SYM_ELSE linstrs { $2 }
| { Node(Tblock, []) };
expr :
| expr SYM_EQUALITY expr { Node (Tceq, [$1; $3]) }
| expr SYM_NOTEQ expr { Node (Tne, [$1; $3]) }
| expr SYM_PLUS expr { Node (Tadd, [$1; $3]) }
| expr SYM_MINUS expr { Node (Tsub, [$1; $3]) }
| expr SYM_ASTERISK expr { Node (Tmul, [$1; $3]) }
| expr SYM_DIV expr { Node (Tdiv, [$1; $3]) }
| expr SYM_MOD expr { Node (Tmod, [$1; $3]) }
| expr SYM_LT expr { Node (Tclt, [$1; $3]) }
| expr SYM_GT expr { Node (Tcgt, [$1; $3]) }
| expr SYM_LEQ expr { Node (Tcle, [$1; $3]) }
| expr SYM_GEQ expr { Node (Tcge, [$1; $3]) }
| SYM_MINUS expr %prec UMINUS { Node (Tneg, [$2])}
| integer { Node(Tint, [$1])}
| identifier { $1 }
| SYM_LPARENTHESIS expr SYM_RPARENTHESIS { $2 }
;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment