rpm  4.5
llex.c
Go to the documentation of this file.
1 /*
2 ** $Id: llex.c,v 1.1 2004/03/16 21:58:30 niemeyer Exp $
3 ** Lexical Analyzer
4 ** See Copyright Notice in lua.h
5 */
6 
7 
8 #include <ctype.h>
9 #include <string.h>
10 
11 #define llex_c
12 
13 #include "lua.h"
14 
15 #include "ldo.h"
16 #include "llex.h"
17 #include "lobject.h"
18 #include "lparser.h"
19 #include "lstate.h"
20 #include "lstring.h"
21 #include "lzio.h"
22 
23 
24 
25 #define next(LS) (LS->current = zgetc(LS->z))
26 
27 
28 
29 /* ORDER RESERVED */
30 /*@observer@*/ /*@unchecked@*/
31 static const char *const token2string [] = {
32  "and", "break", "do", "else", "elseif",
33  "end", "false", "for", "function", "if",
34  "in", "local", "nil", "not", "or", "repeat",
35  "return", "then", "true", "until", "while", "*name",
36  "..", "...", "==", ">=", "<=", "~=",
37  "*number", "*string", "<eof>"
38 };
39 
40 
41 void luaX_init (lua_State *L) {
42  int i;
43  for (i=0; i<NUM_RESERVED; i++) {
44  TString *ts = luaS_new(L, token2string[i]);
45  luaS_fix(ts); /* reserved words are never collected */
46  lua_assert(strlen(token2string[i])+1 <= TOKEN_LEN);
47  ts->tsv.reserved = cast(lu_byte, i+1); /* reserved word */
48  }
49 }
50 
51 
52 #define MAXSRC 80
53 
54 
55 void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) {
56  if (val > limit) {
57  msg = luaO_pushfstring(ls->L, "too many %s (limit=%d)", msg, limit);
58  luaX_syntaxerror(ls, msg);
59  }
60 }
61 
62 
63 void luaX_errorline (LexState *ls, const char *s, const char *token, int line) {
64  lua_State *L = ls->L;
65  char buff[MAXSRC];
66  luaO_chunkid(buff, getstr(ls->source), MAXSRC);
67  luaO_pushfstring(L, "%s:%d: %s near `%s'", buff, line, s, token);
68  luaD_throw(L, LUA_ERRSYNTAX);
69 }
70 
71 
72 static void luaX_error (LexState *ls, const char *s, const char *token)
73  /*@modifies ls @*/
74 {
75  luaX_errorline(ls, s, token, ls->linenumber);
76 }
77 
78 
79 void luaX_syntaxerror (LexState *ls, const char *msg) {
80  const char *lasttoken;
81  switch (ls->t.token) {
82  case TK_NAME:
83  lasttoken = getstr(ls->t.seminfo.ts);
84  break;
85  case TK_STRING:
86  case TK_NUMBER:
87  lasttoken = luaZ_buffer(ls->buff);
88  break;
89  default:
90  lasttoken = luaX_token2str(ls, ls->t.token);
91  break;
92  }
93  luaX_error(ls, msg, lasttoken);
94 }
95 
96 
97 const char *luaX_token2str (LexState *ls, int token) {
98  if (token < FIRST_RESERVED) {
99  lua_assert(token == (unsigned char)token);
100  return luaO_pushfstring(ls->L, "%c", token);
101  }
102  else
103  return token2string[token-FIRST_RESERVED];
104 }
105 
106 
107 static void luaX_lexerror (LexState *ls, const char *s, int token)
108  /*@modifies ls @*/
109 {
110  if (token == TK_EOS)
111  luaX_error(ls, s, luaX_token2str(ls, token));
112  else
113  luaX_error(ls, s, luaZ_buffer(ls->buff));
114 }
115 
116 
117 static void inclinenumber (LexState *LS)
118  /*@modifies LS @*/
119 {
120  next(LS); /* skip `\n' */
121  ++LS->linenumber;
122  luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk");
123 }
124 
125 
126 void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) {
127  LS->L = L;
128  LS->lookahead.token = TK_EOS; /* no look-ahead token */
129  LS->z = z;
130  LS->fs = NULL;
131  LS->linenumber = 1;
132  LS->lastline = 1;
133  LS->source = source;
134  next(LS); /* read first char */
135  if (LS->current == '#') {
136  do { /* skip first line */
137  next(LS);
138  } while (LS->current != '\n' && LS->current != EOZ);
139  }
140 }
141 
142 
143 
144 /*
145 ** =======================================================
146 ** LEXICAL ANALYZER
147 ** =======================================================
148 */
149 
150 
151 /* use buffer to store names, literal strings and numbers */
152 
153 /* extra space to allocate when growing buffer */
154 #define EXTRABUFF 32
155 
156 /* maximum number of chars that can be read without checking buffer size */
157 #define MAXNOCHECK 5
158 
159 #define checkbuffer(LS, len) \
160  if (((len)+MAXNOCHECK)*sizeof(char) > luaZ_sizebuffer((LS)->buff)) \
161  luaZ_openspace((LS)->L, (LS)->buff, (len)+EXTRABUFF)
162 
163 #define save(LS, c, l) \
164  (luaZ_buffer((LS)->buff)[l++] = cast(char, c))
165 #define save_and_next(LS, l) (save(LS, LS->current, l), next(LS))
166 
167 
168 static size_t readname (LexState *LS)
169  /*@modifies LS @*/
170 {
171  size_t l = 0;
172  checkbuffer(LS, l);
173  do {
174  checkbuffer(LS, l);
175  save_and_next(LS, l);
176  } while (isalnum(LS->current) || LS->current == '_');
177  save(LS, '\0', l);
178  return l-1;
179 }
180 
181 
182 /* LUA_NUMBER */
183 static void read_numeral (LexState *LS, int comma, SemInfo *seminfo)
184  /*@modifies LS, seminfo @*/
185 {
186  size_t l = 0;
187  checkbuffer(LS, l);
188  if (comma) save(LS, '.', l);
189  while (isdigit(LS->current)) {
190  checkbuffer(LS, l);
191  save_and_next(LS, l);
192  }
193  if (LS->current == '.') {
194  save_and_next(LS, l);
195  if (LS->current == '.') {
196  save_and_next(LS, l);
197  save(LS, '\0', l);
198  luaX_lexerror(LS,
199  "ambiguous syntax (decimal point x string concatenation)",
200  TK_NUMBER);
201  }
202  }
203  while (isdigit(LS->current)) {
204  checkbuffer(LS, l);
205  save_and_next(LS, l);
206  }
207  if (LS->current == 'e' || LS->current == 'E') {
208  save_and_next(LS, l); /* read `E' */
209  if (LS->current == '+' || LS->current == '-')
210  save_and_next(LS, l); /* optional exponent sign */
211  while (isdigit(LS->current)) {
212  checkbuffer(LS, l);
213  save_and_next(LS, l);
214  }
215  }
216  save(LS, '\0', l);
217  if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r))
218  luaX_lexerror(LS, "malformed number", TK_NUMBER);
219 }
220 
221 
222 static void read_long_string (LexState *LS, /*@null@*/ SemInfo *seminfo)
223  /*@modifies LS, seminfo @*/
224 {
225  int cont = 0;
226  size_t l = 0;
227  checkbuffer(LS, l);
228  save(LS, '[', l); /* save first `[' */
229  save_and_next(LS, l); /* pass the second `[' */
230  if (LS->current == '\n') /* string starts with a newline? */
231  inclinenumber(LS); /* skip it */
232  for (;;) {
233  checkbuffer(LS, l);
234  switch (LS->current) {
235  case EOZ:
236  save(LS, '\0', l);
237  luaX_lexerror(LS, (seminfo) ? "unfinished long string" :
238  "unfinished long comment", TK_EOS);
239  break; /* to avoid warnings */
240  case '[':
241  save_and_next(LS, l);
242  if (LS->current == '[') {
243  cont++;
244  save_and_next(LS, l);
245  }
246  continue;
247  case ']':
248  save_and_next(LS, l);
249  if (LS->current == ']') {
250  if (cont == 0) goto endloop;
251  cont--;
252  save_and_next(LS, l);
253  }
254  continue;
255  case '\n':
256  save(LS, '\n', l);
257  inclinenumber(LS);
258  if (!seminfo) l = 0; /* reset buffer to avoid wasting space */
259  continue;
260  default:
261  save_and_next(LS, l);
262  }
263  } endloop:
264  save_and_next(LS, l); /* skip the second `]' */
265  save(LS, '\0', l);
266  if (seminfo)
267  seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 2, l - 5);
268 }
269 
270 
271 static void read_string (LexState *LS, int del, SemInfo *seminfo)
272  /*@modifies LS, seminfo @*/
273 {
274  size_t l = 0;
275  checkbuffer(LS, l);
276  save_and_next(LS, l);
277  while (LS->current != del) {
278  checkbuffer(LS, l);
279  switch (LS->current) {
280  case EOZ:
281  save(LS, '\0', l);
282  luaX_lexerror(LS, "unfinished string", TK_EOS);
283  break; /* to avoid warnings */
284  case '\n':
285  save(LS, '\0', l);
286  luaX_lexerror(LS, "unfinished string", TK_STRING);
287  break; /* to avoid warnings */
288  case '\\':
289  next(LS); /* do not save the `\' */
290  switch (LS->current) {
291  case 'a': save(LS, '\a', l); next(LS); break;
292  case 'b': save(LS, '\b', l); next(LS); break;
293  case 'f': save(LS, '\f', l); next(LS); break;
294  case 'n': save(LS, '\n', l); next(LS); break;
295  case 'r': save(LS, '\r', l); next(LS); break;
296  case 't': save(LS, '\t', l); next(LS); break;
297  case 'v': save(LS, '\v', l); next(LS); break;
298  case '\n': save(LS, '\n', l); inclinenumber(LS); break;
299  case EOZ: break; /* will raise an error next loop */
300  default: {
301  if (!isdigit(LS->current))
302  save_and_next(LS, l); /* handles \\, \", \', and \? */
303  else { /* \xxx */
304  int c = 0;
305  int i = 0;
306  do {
307  c = 10*c + (LS->current-'0');
308  next(LS);
309  } while (++i<3 && isdigit(LS->current));
310  if (c > UCHAR_MAX) {
311  save(LS, '\0', l);
312  luaX_lexerror(LS, "escape sequence too large", TK_STRING);
313  }
314  save(LS, c, l);
315  }
316  }
317  }
318  break;
319  default:
320  save_and_next(LS, l);
321  }
322  }
323  save_and_next(LS, l); /* skip delimiter */
324  save(LS, '\0', l);
325  seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 1, l - 3);
326 }
327 
328 
329 int luaX_lex (LexState *LS, SemInfo *seminfo) {
330  for (;;) {
331  switch (LS->current) {
332 
333  case '\n': {
334  inclinenumber(LS);
335  continue;
336  }
337  case '-': {
338  next(LS);
339  if (LS->current != '-') return '-';
340  /* else is a comment */
341  next(LS);
342  if (LS->current == '[' && (next(LS), LS->current == '['))
343  read_long_string(LS, NULL); /* long comment */
344  else /* short comment */
345  while (LS->current != '\n' && LS->current != EOZ)
346  next(LS);
347  continue;
348  }
349  case '[': {
350  next(LS);
351  if (LS->current != '[') return '[';
352  else {
353  read_long_string(LS, seminfo);
354  return TK_STRING;
355  }
356  }
357  case '=': {
358  next(LS);
359  if (LS->current != '=') return '=';
360  else { next(LS); return TK_EQ; }
361  }
362  case '<': {
363  next(LS);
364  if (LS->current != '=') return '<';
365  else { next(LS); return TK_LE; }
366  }
367  case '>': {
368  next(LS);
369  if (LS->current != '=') return '>';
370  else { next(LS); return TK_GE; }
371  }
372  case '~': {
373  next(LS);
374  if (LS->current != '=') return '~';
375  else { next(LS); return TK_NE; }
376  }
377  case '"':
378  case '\'': {
379  read_string(LS, LS->current, seminfo);
380  return TK_STRING;
381  }
382  case '.': {
383  next(LS);
384  if (LS->current == '.') {
385  next(LS);
386  if (LS->current == '.') {
387  next(LS);
388  return TK_DOTS; /* ... */
389  }
390  else return TK_CONCAT; /* .. */
391  }
392  else if (!isdigit(LS->current)) return '.';
393  else {
394  read_numeral(LS, 1, seminfo);
395  return TK_NUMBER;
396  }
397  }
398  case EOZ: {
399  return TK_EOS;
400  }
401  default: {
402  if (isspace(LS->current)) {
403  next(LS);
404  continue;
405  }
406  else if (isdigit(LS->current)) {
407  read_numeral(LS, 0, seminfo);
408  return TK_NUMBER;
409  }
410  else if (isalpha(LS->current) || LS->current == '_') {
411  /* identifier or reserved word */
412  size_t l = readname(LS);
413  TString *ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l);
414  if (ts->tsv.reserved > 0) /* reserved word? */
415  return ts->tsv.reserved - 1 + FIRST_RESERVED;
416  seminfo->ts = ts;
417  return TK_NAME;
418  }
419  else {
420  int c = LS->current;
421  if (iscntrl(c))
422  luaX_error(LS, "invalid control char",
423  luaO_pushfstring(LS->L, "char(%d)", c));
424  next(LS);
425  return c; /* single-char tokens (+ - / ...) */
426  }
427  }
428  }
429  }
430 }
431 
432 #undef next