annotate lab4/lexer.mll @ 1:b5139af1a420 tip basis

Fixed permissions on compile scripts
author Mike Spivey <mike@cs.ox.ac.uk>
date Fri, 13 Oct 2017 17:27:58 +0100
parents bfdcc3820b32
children
rev   line source
mike@0 1 (* lab4/lexer.mll *)
mike@0 2 (* Copyright (c) 2017 J. M. Spivey *)
mike@0 3
mike@0 4 {
mike@0 5 open Print
mike@0 6 open Optree
mike@0 7 open Dict
mike@0 8 open Parser
mike@0 9 open Lexing
mike@0 10 open Source
mike@0 11
mike@0 12 let lineno = ref 1 (* Current line in input file *)
mike@0 13
mike@0 14 let symtable =
mike@0 15 Util.make_hash 100
mike@0 16 [ ("array", ARRAY); ("begin", BEGIN);
mike@0 17 ("const", CONST); ("do", DO); ("if", IF ); ("else", ELSE);
mike@0 18 ("end", END); ("of", OF); ("proc", PROC); ("record", RECORD);
mike@0 19 ("return", RETURN); ("then", THEN); ("to", TO);
mike@0 20 ("type", TYPE); ("var", VAR); ("while", WHILE);
mike@0 21 ("pointer", POINTER); ("nil", NIL);
mike@0 22 ("repeat", REPEAT); ("until", UNTIL); ("for", FOR);
mike@0 23 ("elsif", ELSIF); ("case", CASE);
mike@0 24 ("and", MULOP And); ("div", MULOP Div); ("or", ADDOP Or);
mike@0 25 ("not", NOT); ("mod", MULOP Mod) ]
mike@0 26
mike@0 27 let lookup s =
mike@0 28 try Hashtbl.find symtable s with
mike@0 29 Not_found ->
mike@0 30 let t = IDENT (intern s) in
mike@0 31 Hashtbl.add symtable s t; t
mike@0 32
mike@0 33 (* |strtbl| -- table of string constants from source program *)
mike@0 34 let strtbl = ref []
mike@0 35
mike@0 36 (* |get_string| -- convert a string constant *)
mike@0 37 let get_string s =
mike@0 38 let lab = gensym () in
mike@0 39 let n = String.length s in
mike@0 40 let s' = Bytes.create n
mike@0 41 and i = ref 0 and j = ref 0 in
mike@0 42 while !i <> n do
mike@0 43 let c = s.[!i] in
mike@0 44 Bytes.set s' !j c;
mike@0 45 if c = '"' then incr i;
mike@0 46 incr i; incr j
mike@0 47 done;
mike@0 48 strtbl := (lab, Bytes.sub_string s' 0 !j)::!strtbl;
mike@0 49 STRING (lab, !j)
mike@0 50
mike@0 51 (* |string_table| -- return contents of string table *)
mike@0 52 let string_table () = List.rev !strtbl
mike@0 53
mike@0 54 let next_line lexbuf =
mike@0 55 incr lineno; Source.note_line !lineno lexbuf
mike@0 56 }
mike@0 57
mike@0 58 (* This lexer uses two conventions that are supported by recent versions
mike@0 59 of ocamllex: named regular expressions, and variables that are bound to
mike@0 60 substrings of the token. *)
mike@0 61
mike@0 62 let letter = ['A'-'Z''a'-'z']
mike@0 63
mike@0 64 let digit = ['0'-'9']
mike@0 65
mike@0 66 let q = '\''
mike@0 67 let qq = '"'
mike@0 68 let notq = [^'\'']
mike@0 69 let notqq = [^'"']
mike@0 70
mike@0 71 rule token = parse
mike@0 72 letter (letter | digit | '_')* as s
mike@0 73 { lookup s }
mike@0 74 | digit+ as s { NUMBER (int_of_string s) }
mike@0 75 | q (notq as c) q { CHAR c }
mike@0 76 | q q q q { CHAR '\'' }
mike@0 77 | qq ((notqq | qq qq)* as s) qq { get_string s }
mike@0 78 | ";" { SEMI }
mike@0 79 | "." { DOT }
mike@0 80 | "|" { VBAR }
mike@0 81 | ":" { COLON }
mike@0 82 | "^" { ARROW }
mike@0 83 | "(" { LPAR }
mike@0 84 | ")" { RPAR }
mike@0 85 | "," { COMMA }
mike@0 86 | "[" { SUB }
mike@0 87 | "]" { BUS }
mike@0 88 | "=" { EQUAL }
mike@0 89 | "+" { ADDOP Plus }
mike@0 90 | "-" { MINUS }
mike@0 91 | "*" { MULOP Times }
mike@0 92 | "<" { RELOP Lt }
mike@0 93 | ">" { RELOP Gt }
mike@0 94 | "<>" { RELOP Neq }
mike@0 95 | "<=" { RELOP Leq }
mike@0 96 | ">=" { RELOP Geq }
mike@0 97 | ":=" { ASSIGN }
mike@0 98 | [' ''\t']+ { token lexbuf }
mike@0 99 | "(*" { comment lexbuf; token lexbuf }
mike@0 100 | "\n" { next_line lexbuf; token lexbuf }
mike@0 101 | _ { BADTOK }
mike@0 102 | eof { err_message "unexpected end of file"
mike@0 103 [] !lineno;
mike@0 104 exit 1 }
mike@0 105
mike@0 106 and comment = parse
mike@0 107 "*)" { () }
mike@0 108 | "\n" { next_line lexbuf; comment lexbuf }
mike@0 109 | _ { comment lexbuf }
mike@0 110 | eof { err_message "end of file in comment"
mike@0 111 [] !lineno;
mike@0 112 exit 1 }
mike@0 113