annotate keiko/symtab.c @ 0:bfdcc3820b32

Basis
author Mike Spivey <mike@cs.ox.ac.uk>
date Thu, 05 Oct 2017 08:04:15 +0100
parents
children
rev   line source
mike@0 1 /*
mike@0 2 * symtab.c
mike@0 3 *
mike@0 4 * This file is part of the Oxford Oberon-2 compiler
mike@0 5 * Copyright (c) 2006--2016 J. M. Spivey
mike@0 6 * All rights reserved
mike@0 7 *
mike@0 8 * Redistribution and use in source and binary forms, with or without
mike@0 9 * modification, are permitted provided that the following conditions are met:
mike@0 10 *
mike@0 11 * 1. Redistributions of source code must retain the above copyright notice,
mike@0 12 * this list of conditions and the following disclaimer.
mike@0 13 * 2. Redistributions in binary form must reproduce the above copyright notice,
mike@0 14 * this list of conditions and the following disclaimer in the documentation
mike@0 15 * and/or other materials provided with the distribution.
mike@0 16 * 3. The name of the author may not be used to endorse or promote products
mike@0 17 * derived from this software without specific prior written permission.
mike@0 18 *
mike@0 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
mike@0 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
mike@0 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
mike@0 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
mike@0 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
mike@0 24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
mike@0 25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
mike@0 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
mike@0 27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
mike@0 28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
mike@0 29 */
mike@0 30
mike@0 31 #include "oblink.h"
mike@0 32
mike@0 33 /* This module implements two completely independent symbol tables:
mike@0 34 one for global symbols, and another for labels used in branches.
mike@0 35 Global symbols have symbolic names, and are used only in the data
mike@0 36 segment. Labels have names that are integers. */
mike@0 37
mike@0 38
mike@0 39 /* GLOBAL SYMBOLS */
mike@0 40
mike@0 41 struct _symbol {
mike@0 42 const char *s_name; /* Name of the symbol */
mike@0 43 segment s_seg; /* Segment, or UNDEFINED */
mike@0 44 int s_kind; /* Kind of symbol -- X_PROC, etc. */
mike@0 45 int s_value; /* Numeric value */
mike@0 46 unsigned s_index; /* Index as a local label */
mike@0 47 unsigned s_check; /* Checksum for module */
mike@0 48 int s_nlines; /* Line count for module */
mike@0 49 symbol s_next; /* Next in hash chain */
mike@0 50 int s_uchain; /* Start of use chain in data segment */
mike@0 51 char *s_file; /* Source file that uses the symbol */
mike@0 52 };
mike@0 53
mike@0 54 #define HSIZE 1024
mike@0 55
mike@0 56 static symbol stable[HSIZE];
mike@0 57
mike@0 58 static growdecl(dict);
mike@0 59 #define dict growbuf(dict, symbol)
mike@0 60 #define ndict growsize(dict)
mike@0 61
mike@0 62 /* make_symbol -- create a symbol, but don't put it in the hash table */
mike@0 63 symbol make_symbol(const char *name) {
mike@0 64 symbol s =
mike@0 65 (symbol) must_alloc(sizeof(struct _symbol), "symbol table entry");
mike@0 66 s->s_name = must_strdup(name);
mike@0 67 s->s_seg = UNDEFINED;
mike@0 68 s->s_kind = X_NONE;
mike@0 69 s->s_value = -1;
mike@0 70 s->s_next = NULL;
mike@0 71 s->s_index = 0;
mike@0 72 s->s_uchain = -1;
mike@0 73 s->s_check = s->s_nlines = 0;
mike@0 74 s->s_file = NULL;
mike@0 75
mike@0 76 buf_grow(dict);
mike@0 77 dict[ndict++] = s;
mike@0 78 return s;
mike@0 79 }
mike@0 80
mike@0 81 static symbol lookup(const char *name, mybool create) {
mike@0 82 if (dict == NULL)
mike@0 83 buf_init(dict, INIT_SMEM, 1, symbol, "symbol table");
mike@0 84
mike@0 85 unsigned h = 0;
mike@0 86 for (const char *p = name; *p != '\0'; p++) h = 5 * h + *p;
mike@0 87 h %= HSIZE;
mike@0 88
mike@0 89 symbol s;
mike@0 90 for (s = stable[h]; s != NULL; s = s->s_next)
mike@0 91 if (strcmp(name, s->s_name) == 0)
mike@0 92 return s;
mike@0 93
mike@0 94 if (create) {
mike@0 95 s = make_symbol(name);
mike@0 96 s->s_next = stable[h];
mike@0 97 stable[h] = s;
mike@0 98 }
mike@0 99
mike@0 100 return s;
mike@0 101 }
mike@0 102
mike@0 103 /* find_symbol -- find a global symbol, or create one if necessary */
mike@0 104 symbol find_symbol(const char *name) {
mike@0 105 return lookup(name, TRUE);
mike@0 106 }
mike@0 107
mike@0 108 /* known -- test if a symbol has been entered */
mike@0 109 mybool known(const char *name) {
mike@0 110 symbol s = lookup(name, FALSE);
mike@0 111 return (s != NULL);
mike@0 112 }
mike@0 113
mike@0 114 const char *sym_name(symbol s) {
mike@0 115 return s->s_name;
mike@0 116 }
mike@0 117
mike@0 118 /* sym_value -- compute value of global symbol */
mike@0 119 int sym_value(symbol s) {
mike@0 120 if (s->s_file == NULL) s->s_file = err_file;
mike@0 121
mike@0 122 if (s->s_seg == UNDEFINED) {
mike@0 123 err_file = s->s_file;
mike@0 124 error("undefined symbol %s", s->s_name);
mike@0 125 s->s_seg = ABS;
mike@0 126 }
mike@0 127
mike@0 128 return s->s_value;
mike@0 129 }
mike@0 130
mike@0 131 #ifdef DEBUG
mike@0 132 static const char *seg_name[] = {
mike@0 133 "abs", "data", "bss", "code", "undefined"
mike@0 134 };
mike@0 135 #endif
mike@0 136
mike@0 137 /* def_global -- set value of a global symbol */
mike@0 138 void def_global(symbol s, segment seg, int off, int kind) {
mike@0 139 if (s->s_seg != UNDEFINED)
mike@0 140 error("multiply defined symbol %s", s->s_name);
mike@0 141
mike@0 142 s->s_seg = seg;
mike@0 143 s->s_value = off;
mike@0 144 s->s_kind = kind;
mike@0 145
mike@0 146 #ifdef DEBUG
mike@0 147 if (dflag)
mike@0 148 fprintf(stderr, "Symbol %s = %d(%s)\n",
mike@0 149 s->s_name, s->s_value, seg_name[s->s_seg]);
mike@0 150 #endif
mike@0 151 }
mike@0 152
mike@0 153 /* Uses of globals are linked in chains, so the values can be patched at
mike@0 154 the last minute before the data segment is output. Because the buffer for
mike@0 155 the data segment may grow and be relocated, we must store the links as
mike@0 156 offsets from the start of the buffer. */
mike@0 157
mike@0 158 /* use_global -- add location to use chain for a global symbol */
mike@0 159 void use_global(symbol s, uchar *base, int offset) {
mike@0 160 if (s->s_file == NULL) s->s_file = err_file;
mike@0 161 *((int *) &base[offset]) = s->s_uchain;
mike@0 162 s->s_uchain = offset;
mike@0 163 }
mike@0 164
mike@0 165 /* fix_data -- fix up global refs in the data segment */
mike@0 166 void fix_data(uchar *base, int bss) {
mike@0 167 /* Shift BSS symbols by offset bss */
mike@0 168 for (int i = 0; i < ndict; i++) {
mike@0 169 symbol s = dict[i];
mike@0 170 if (s->s_seg == BSS) s->s_value += bss;
mike@0 171 }
mike@0 172
mike@0 173 /* Fix up each symbol */
mike@0 174 for (int i = 0; i < ndict; i++) {
mike@0 175 symbol s = dict[i];
mike@0 176 int val;
mike@0 177
mike@0 178 if (s->s_uchain == -1) continue;
mike@0 179
mike@0 180 if (dflag > 0) printf("Fixing %s\n", s->s_name);
mike@0 181
mike@0 182 val = sym_value(s);
mike@0 183
mike@0 184 /* Run along the use chain, inserting the value */
mike@0 185 for (int u = s->s_uchain, v; u != -1; u = v) {
mike@0 186 v = *((int *) &base[u]);
mike@0 187 put4(&base[u], val);
mike@0 188 relocate(u, (s->s_seg == ABS ? R_WORD : R_DATA));
mike@0 189 }
mike@0 190 }
mike@0 191 }
mike@0 192
mike@0 193 /* module_data -- add data for module */
mike@0 194 void module_data(symbol s, unsigned checksum, int nlines) {
mike@0 195 s->s_check = checksum;
mike@0 196 s->s_nlines = nlines;
mike@0 197 }
mike@0 198
mike@0 199 static int cf_syms(symbol *a, symbol *b) {
mike@0 200 int z = (*a)->s_kind - (*b)->s_kind;
mike@0 201 if (z == 0) z = (*a)->s_value - (*b)->s_value;
mike@0 202 return z;
mike@0 203 }
mike@0 204
mike@0 205 /* write_symtab -- write the symbol table */
mike@0 206 int write_symtab(void) {
mike@0 207 int nwritten = 0;
mike@0 208
mike@0 209 qsort(dict, ndict, sizeof(symbol),
mike@0 210 (int (*)(const void *, const void *)) cf_syms);
mike@0 211
mike@0 212 for (int i = 0; i < ndict; i++) {
mike@0 213 symbol s = dict[i];
mike@0 214
mike@0 215 if (s->s_kind == X_SYM || s->s_kind == X_NONE) continue;
mike@0 216
mike@0 217 write_int(4, s->s_kind);
mike@0 218 write_string(s->s_name);
mike@0 219 write_int(4, s->s_value);
mike@0 220
mike@0 221 if (s->s_kind == X_MODULE) {
mike@0 222 write_int(4, s->s_check);
mike@0 223 write_int(4, s->s_nlines);
mike@0 224 }
mike@0 225
mike@0 226 nwritten++;
mike@0 227 }
mike@0 228
mike@0 229 return nwritten;
mike@0 230 }
mike@0 231
mike@0 232
mike@0 233 /* LOCAL LABELS */
mike@0 234
mike@0 235 /* The table contains local symbols for the current procedure only.
mike@0 236 As the procedure is initially assembled into the linker's buffer,
mike@0 237 the value of each label is defined as its location in the buffer.
mike@0 238 When the procedure is complete, we replace each use of a
mike@0 239 label by the corresponding value. The values are turned into
mike@0 240 offsets as the code is output. */
mike@0 241
mike@0 242 struct _locdef {
mike@0 243 symbol l_lab;
mike@0 244 phrase l_val;
mike@0 245 };
mike@0 246
mike@0 247 static growdecl(locdefs);
mike@0 248 #define locdefs growbuf(locdefs, struct _locdef)
mike@0 249 #define n_locs growsize(locdefs)
mike@0 250
mike@0 251 void init_labels(void) {
mike@0 252 if (locdefs == NULL)
mike@0 253 buf_init(locdefs, INIT_LMEM, 1, struct _locdef, "labels");
mike@0 254 n_locs = 0;
mike@0 255 }
mike@0 256
mike@0 257 int make_label(symbol s) {
mike@0 258 int n = s->s_index;
mike@0 259
mike@0 260 if (s->s_index >= n_locs || locdefs[s->s_index].l_lab != s) {
mike@0 261 buf_grow(locdefs);
mike@0 262 n = n_locs++;
mike@0 263 locdefs[n].l_lab = s;
mike@0 264 locdefs[n].l_val = NULL;
mike@0 265 s->s_index = n;
mike@0 266 }
mike@0 267
mike@0 268 return n;
mike@0 269 }
mike@0 270
mike@0 271 const char *label_name(int n) {
mike@0 272 return sym_name(locdefs[n].l_lab);
mike@0 273 }
mike@0 274
mike@0 275 void def_label(symbol s, phrase val) {
mike@0 276 int n = make_label(s);
mike@0 277 if (locdefs[n].l_val != NULL)
mike@0 278 error("multiply defined label %s", s->s_name);
mike@0 279 locdefs[n].l_val = val;
mike@0 280 }
mike@0 281
mike@0 282 phrase find_label(int n) {
mike@0 283 phrase val = locdefs[n].l_val;
mike@0 284 if (val == NULL)
mike@0 285 error("undefined label %s", locdefs[n].l_lab->s_name);
mike@0 286 return val;
mike@0 287 }