annotate ppc/lexer.mll @ 0:bfdcc3820b32

Basis
author Mike Spivey <mike@cs.ox.ac.uk>
date Thu, 05 Oct 2017 08:04:15 +0100
parents
children
rev   line source
0
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
1 (* ppc/lexer.mll *)
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
2 (* Copyright (c) 2017 J. M. Spivey *)
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
3
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
4 {
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
5 open Print
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
6 open Keiko
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
7 open Dict
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
8 open Parser
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
9 open Lexing
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
10 open Source
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
11
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
12 let lineno = ref 1 (* Current line in input file *)
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
13
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
14 let symtable =
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
15 Util.make_hash 100
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
16 [ ("array", ARRAY); ("begin", BEGIN);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
17 ("const", CONST); ("do", DO); ("if", IF ); ("else", ELSE);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
18 ("end", END); ("of", OF); ("proc", PROC); ("record", RECORD);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
19 ("return", RETURN); ("then", THEN); ("to", TO);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
20 ("type", TYPE); ("var", VAR); ("while", WHILE);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
21 ("pointer", POINTER); ("nil", NIL);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
22 ("repeat", REPEAT); ("until", UNTIL); ("for", FOR);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
23 ("elsif", ELSIF); ("case", CASE);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
24 ("and", MULOP And); ("div", MULOP Div); ("or", ADDOP Or);
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
25 ("not", NOT); ("mod", MULOP Mod) ]
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
26
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
27 let lookup s =
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
28 try Hashtbl.find symtable s with
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
29 Not_found ->
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
30 let t = IDENT (intern s) in
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
31 Hashtbl.add symtable s t; t
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
32
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
33 (* |strtbl| -- table of string constants from source program *)
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
34 let strtbl = ref []
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
35
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
36 (* |get_string| -- convert a string constant *)
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
37 let get_string s =
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
38 let lab = gensym () in
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
39 let n = String.length s in
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
40 let s' = Bytes.create n
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
41 and i = ref 0 and j = ref 0 in
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
42 while !i <> n do
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
43 let c = s.[!i] in
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
44 Bytes.set s' !j c;
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
45 if c = '"' then incr i;
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
46 incr i; incr j
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
47 done;
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
48 strtbl := (lab, Bytes.sub_string s' 0 !j)::!strtbl;
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
49 STRING (lab, !j)
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
50
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
51 (* |string_table| -- return contents of string table *)
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
52 let string_table () = List.rev !strtbl
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
53
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
54 let next_line lexbuf =
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
55 incr lineno; Source.note_line !lineno lexbuf
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
56 }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
57
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
58 (* This lexer uses two conventions that are supported by recent versions
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
59 of ocamllex: named regular expressions, and variables that are bound to
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
60 substrings of the token. *)
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
61
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
62 let letter = ['A'-'Z''a'-'z']
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
63
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
64 let digit = ['0'-'9']
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
65
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
66 let q = '\''
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
67 let qq = '"'
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
68 let notq = [^'\'']
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
69 let notqq = [^'"']
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
70
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
71 rule token = parse
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
72 letter (letter | digit | '_')* as s
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
73 { lookup s }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
74 | digit+ as s { NUMBER (int_of_string s) }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
75 | q (notq as c) q { CHAR c }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
76 | q q q q { CHAR '\'' }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
77 | qq ((notqq | qq qq)* as s) qq { get_string s }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
78 | ";" { SEMI }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
79 | "." { DOT }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
80 | "|" { VBAR }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
81 | ":" { COLON }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
82 | "^" { ARROW }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
83 | "(" { LPAR }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
84 | ")" { RPAR }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
85 | "," { COMMA }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
86 | "[" { SUB }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
87 | "]" { BUS }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
88 | "=" { EQUAL }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
89 | "+" { ADDOP Plus }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
90 | "-" { MINUS }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
91 | "*" { MULOP Times }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
92 | "<" { RELOP Lt }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
93 | ">" { RELOP Gt }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
94 | "<>" { RELOP Neq }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
95 | "<=" { RELOP Leq }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
96 | ">=" { RELOP Geq }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
97 | ":=" { ASSIGN }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
98 | [' ''\t']+ { token lexbuf }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
99 | "(*" { comment lexbuf; token lexbuf }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
100 | "\n" { next_line lexbuf; token lexbuf }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
101 | _ { BADTOK }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
102 | eof { err_message "unexpected end of file"
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
103 [] !lineno;
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
104 exit 1 }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
105
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
106 and comment = parse
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
107 "*)" { () }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
108 | "\n" { next_line lexbuf; comment lexbuf }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
109 | _ { comment lexbuf }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
110 | eof { err_message "end of file in comment"
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
111 [] !lineno;
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
112 exit 1 }
Mike Spivey <mike@cs.ox.ac.uk>
parents:
diff changeset
113