comparison lab4/lexer.mll @ 0:bfdcc3820b32

Basis
author Mike Spivey <mike@cs.ox.ac.uk>
date Thu, 05 Oct 2017 08:04:15 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bfdcc3820b32
1 (* lab4/lexer.mll *)
2 (* Copyright (c) 2017 J. M. Spivey *)
3
4 {
5 open Print
6 open Optree
7 open Dict
8 open Parser
9 open Lexing
10 open Source
11
12 let lineno = ref 1 (* Current line in input file *)
13
14 let symtable =
15 Util.make_hash 100
16 [ ("array", ARRAY); ("begin", BEGIN);
17 ("const", CONST); ("do", DO); ("if", IF ); ("else", ELSE);
18 ("end", END); ("of", OF); ("proc", PROC); ("record", RECORD);
19 ("return", RETURN); ("then", THEN); ("to", TO);
20 ("type", TYPE); ("var", VAR); ("while", WHILE);
21 ("pointer", POINTER); ("nil", NIL);
22 ("repeat", REPEAT); ("until", UNTIL); ("for", FOR);
23 ("elsif", ELSIF); ("case", CASE);
24 ("and", MULOP And); ("div", MULOP Div); ("or", ADDOP Or);
25 ("not", NOT); ("mod", MULOP Mod) ]
26
27 let lookup s =
28 try Hashtbl.find symtable s with
29 Not_found ->
30 let t = IDENT (intern s) in
31 Hashtbl.add symtable s t; t
32
33 (* |strtbl| -- table of string constants from source program *)
34 let strtbl = ref []
35
36 (* |get_string| -- convert a string constant *)
37 let get_string s =
38 let lab = gensym () in
39 let n = String.length s in
40 let s' = Bytes.create n
41 and i = ref 0 and j = ref 0 in
42 while !i <> n do
43 let c = s.[!i] in
44 Bytes.set s' !j c;
45 if c = '"' then incr i;
46 incr i; incr j
47 done;
48 strtbl := (lab, Bytes.sub_string s' 0 !j)::!strtbl;
49 STRING (lab, !j)
50
51 (* |string_table| -- return contents of string table *)
52 let string_table () = List.rev !strtbl
53
54 let next_line lexbuf =
55 incr lineno; Source.note_line !lineno lexbuf
56 }
57
58 (* This lexer uses two conventions that are supported by recent versions
59 of ocamllex: named regular expressions, and variables that are bound to
60 substrings of the token. *)
61
62 let letter = ['A'-'Z''a'-'z']
63
64 let digit = ['0'-'9']
65
66 let q = '\''
67 let qq = '"'
68 let notq = [^'\'']
69 let notqq = [^'"']
70
71 rule token = parse
72 letter (letter | digit | '_')* as s
73 { lookup s }
74 | digit+ as s { NUMBER (int_of_string s) }
75 | q (notq as c) q { CHAR c }
76 | q q q q { CHAR '\'' }
77 | qq ((notqq | qq qq)* as s) qq { get_string s }
78 | ";" { SEMI }
79 | "." { DOT }
80 | "|" { VBAR }
81 | ":" { COLON }
82 | "^" { ARROW }
83 | "(" { LPAR }
84 | ")" { RPAR }
85 | "," { COMMA }
86 | "[" { SUB }
87 | "]" { BUS }
88 | "=" { EQUAL }
89 | "+" { ADDOP Plus }
90 | "-" { MINUS }
91 | "*" { MULOP Times }
92 | "<" { RELOP Lt }
93 | ">" { RELOP Gt }
94 | "<>" { RELOP Neq }
95 | "<=" { RELOP Leq }
96 | ">=" { RELOP Geq }
97 | ":=" { ASSIGN }
98 | [' ''\t']+ { token lexbuf }
99 | "(*" { comment lexbuf; token lexbuf }
100 | "\n" { next_line lexbuf; token lexbuf }
101 | _ { BADTOK }
102 | eof { err_message "unexpected end of file"
103 [] !lineno;
104 exit 1 }
105
106 and comment = parse
107 "*)" { () }
108 | "\n" { next_line lexbuf; comment lexbuf }
109 | _ { comment lexbuf }
110 | eof { err_message "end of file in comment"
111 [] !lineno;
112 exit 1 }
113