annotate lab1/lexer.mll @ 0:bfdcc3820b32

Basis
author Mike Spivey <mike@cs.ox.ac.uk>
date Thu, 05 Oct 2017 08:04:15 +0100
parents
children
rev   line source
mike@0 1 (* lab1/lexer.mll *)
mike@0 2 (* Copyright (c) 2017 J. M. Spivey *)
mike@0 3
mike@0 4 {
mike@0 5 open Lexing
mike@0 6 open Tree
mike@0 7 open Keiko
mike@0 8 open Parser
mike@0 9
mike@0 10 (* |lineno| -- line number for use in error messages *)
mike@0 11 let lineno = ref 1
mike@0 12
mike@0 13 (* |make_hash| -- create hash table from list of pairs *)
mike@0 14 let make_hash n ps =
mike@0 15 let t = Hashtbl.create n in
mike@0 16 List.iter (fun (k, v) -> Hashtbl.add t k v) ps;
mike@0 17 t
mike@0 18
mike@0 19 (* |kwtable| -- a little table to recognize keywords *)
mike@0 20 let kwtable =
mike@0 21 make_hash 64
mike@0 22 [ ("begin", BEGIN); ("do", DO); ("if", IF ); ("else", ELSE);
mike@0 23 ("end", END); ("then", THEN); ("while", WHILE); ("print", PRINT);
mike@0 24 ("newline", NEWLINE); ("and", MULOP And); ("div", MULOP Div);
mike@0 25 ("or", ADDOP Or); ("not", MONOP Not); ("mod", MULOP Mod);
mike@0 26 ("true", NUMBER 1); ("false", NUMBER 0) ]
mike@0 27
mike@0 28 (* |idtable| -- table of all identifiers seen so far *)
mike@0 29 let idtable = Hashtbl.create 64
mike@0 30
mike@0 31 (* |lookup| -- convert string to keyword or identifier *)
mike@0 32 let lookup s =
mike@0 33 try Hashtbl.find kwtable s with
mike@0 34 Not_found ->
mike@0 35 Hashtbl.replace idtable s ();
mike@0 36 IDENT s
mike@0 37
mike@0 38 (* |get_vars| -- get list of identifiers in the program *)
mike@0 39 let get_vars () =
mike@0 40 Hashtbl.fold (fun k () ks -> k::ks) idtable []
mike@0 41 }
mike@0 42
mike@0 43 rule token =
mike@0 44 parse
mike@0 45 ['A'-'Z''a'-'z']['A'-'Z''a'-'z''0'-'9''_']* as s
mike@0 46 { lookup s }
mike@0 47 | ['0'-'9']+ as s { NUMBER (int_of_string s) }
mike@0 48 | ";" { SEMI }
mike@0 49 | "." { DOT }
mike@0 50 | ":" { COLON }
mike@0 51 | "(" { LPAR }
mike@0 52 | ")" { RPAR }
mike@0 53 | "," { COMMA }
mike@0 54 | "=" { RELOP Eq }
mike@0 55 | "+" { ADDOP Plus }
mike@0 56 | "-" { MINUS }
mike@0 57 | "*" { MULOP Times }
mike@0 58 | "<" { RELOP Lt }
mike@0 59 | ">" { RELOP Gt }
mike@0 60 | "<>" { RELOP Neq }
mike@0 61 | "<=" { RELOP Leq }
mike@0 62 | ">=" { RELOP Geq }
mike@0 63 | ":=" { ASSIGN }
mike@0 64 | [' ''\t']+ { token lexbuf }
mike@0 65 | "(*" { comment lexbuf; token lexbuf }
mike@0 66 | "\n" { incr lineno; Source.note_line !lineno lexbuf;
mike@0 67 token lexbuf }
mike@0 68 | _ { BADTOK }
mike@0 69 | eof { EOF }
mike@0 70
mike@0 71 and comment =
mike@0 72 parse
mike@0 73 "*)" { () }
mike@0 74 | "\n" { incr lineno; Source.note_line !lineno lexbuf;
mike@0 75 comment lexbuf }
mike@0 76 | _ { comment lexbuf }
mike@0 77 | eof { () }