]>
Commit | Line | Data |
---|---|---|
5fee5ec3 IB |
1 | /** |
2 | * Implements the lexical analyzer, which converts source code into lexical tokens. | |
3 | * | |
4 | * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical) | |
5 | * | |
c43b5909 IB |
6 | * Copyright: Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved |
7 | * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) | |
8 | * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) | |
5fee5ec3 IB |
9 | * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d) |
10 | * Documentation: https://dlang.org/phobos/dmd_lexer.html | |
11 | * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d | |
12 | */ | |
13 | ||
14 | module dmd.lexer; | |
15 | ||
16 | import core.stdc.ctype; | |
17 | import core.stdc.errno; | |
18 | import core.stdc.stdarg; | |
19 | import core.stdc.stdio; | |
20 | import core.stdc.stdlib : getenv; | |
21 | import core.stdc.string; | |
22 | import core.stdc.time; | |
23 | ||
24 | import dmd.entity; | |
25 | import dmd.errors; | |
26 | import dmd.globals; | |
27 | import dmd.id; | |
28 | import dmd.identifier; | |
0fb57034 | 29 | import dmd.root.array; |
5fee5ec3 | 30 | import dmd.root.ctfloat; |
0fb57034 | 31 | import dmd.common.outbuffer; |
5fee5ec3 IB |
32 | import dmd.root.port; |
33 | import dmd.root.rmem; | |
34 | import dmd.root.string; | |
c43b5909 | 35 | import dmd.root.utf; |
5fee5ec3 | 36 | import dmd.tokens; |
5fee5ec3 IB |
37 | import dmd.utils; |
38 | ||
39 | nothrow: | |
40 | ||
5fee5ec3 IB |
41 | version (DMDLIB) |
42 | { | |
43 | version = LocOffset; | |
44 | } | |
45 | ||
46 | /*********************************************************** | |
47 | */ | |
48 | class Lexer | |
49 | { | |
50 | private __gshared OutBuffer stringbuffer; | |
51 | ||
52 | Loc scanloc; // for error messages | |
53 | Loc prevloc; // location of token before current | |
54 | ||
55 | const(char)* p; // current character | |
56 | ||
57 | Token token; | |
58 | ||
59 | // For ImportC | |
60 | bool Ccompile; /// true if compiling ImportC | |
61 | ||
62 | // The following are valid only if (Ccompile == true) | |
1027dc45 IB |
63 | ubyte boolsize; /// size of a C _Bool, default 1 |
64 | ubyte shortsize; /// size of a C short, default 2 | |
65 | ubyte intsize; /// size of a C int, default 4 | |
5fee5ec3 | 66 | ubyte longsize; /// size of C long, 4 or 8 |
1027dc45 | 67 | ubyte long_longsize; /// size of a C long long, default 8 |
5fee5ec3 IB |
68 | ubyte long_doublesize; /// size of C long double, 8 or D real.sizeof |
69 | ubyte wchar_tsize; /// size of C wchar_t, 2 or 4 | |
70 | ||
71 | private | |
72 | { | |
73 | const(char)* base; // pointer to start of buffer | |
74 | const(char)* end; // pointer to last element of buffer | |
75 | const(char)* line; // start of current line | |
76 | ||
77 | bool doDocComment; // collect doc comment information | |
78 | bool anyToken; // seen at least one token | |
79 | bool commentToken; // comments are TOK.comment's | |
235d5a96 | 80 | bool tokenizeNewlines; // newlines are turned into TOK.endOfLine's |
8977f4be IB |
81 | |
82 | version (DMDLIB) | |
83 | { | |
84 | bool whitespaceToken; // tokenize whitespaces | |
85 | } | |
86 | ||
5fee5ec3 IB |
87 | int inTokenStringConstant; // can be larger than 1 when in nested q{} strings |
88 | int lastDocLine; // last line of previous doc comment | |
89 | ||
90 | Token* tokenFreelist; | |
91 | } | |
92 | ||
93 | nothrow: | |
94 | ||
95 | /********************* | |
96 | * Creates a Lexer for the source code base[begoffset..endoffset+1]. | |
97 | * The last character, base[endoffset], must be null (0) or EOF (0x1A). | |
98 | * | |
99 | * Params: | |
100 | * filename = used for error messages | |
101 | * base = source code, must be terminated by a null (0) or EOF (0x1A) character | |
102 | * begoffset = starting offset into base[] | |
103 | * endoffset = the last offset to read into base[] | |
104 | * doDocComment = handle documentation comments | |
105 | * commentToken = comments become TOK.comment's | |
106 | */ | |
107 | this(const(char)* filename, const(char)* base, size_t begoffset, | |
108 | size_t endoffset, bool doDocComment, bool commentToken) pure | |
109 | { | |
110 | scanloc = Loc(filename, 1, 1); | |
31350635 IB |
111 | // debug printf("Lexer::Lexer(%p)\n", base); |
112 | // debug printf("lexer.filename = %s\n", filename); | |
5fee5ec3 IB |
113 | token = Token.init; |
114 | this.base = base; | |
115 | this.end = base + endoffset; | |
116 | p = base + begoffset; | |
117 | line = p; | |
118 | this.doDocComment = doDocComment; | |
119 | this.commentToken = commentToken; | |
235d5a96 | 120 | this.tokenizeNewlines = false; |
5fee5ec3 IB |
121 | this.inTokenStringConstant = 0; |
122 | this.lastDocLine = 0; | |
123 | //initKeywords(); | |
124 | /* If first line starts with '#!', ignore the line | |
125 | */ | |
126 | if (p && p[0] == '#' && p[1] == '!') | |
127 | { | |
128 | p += 2; | |
129 | while (1) | |
130 | { | |
131 | char c = *p++; | |
132 | switch (c) | |
133 | { | |
134 | case 0: | |
135 | case 0x1A: | |
136 | p--; | |
137 | goto case; | |
138 | case '\n': | |
139 | break; | |
140 | default: | |
141 | continue; | |
142 | } | |
143 | break; | |
144 | } | |
145 | endOfLine(); | |
146 | } | |
147 | } | |
148 | ||
610d7898 IB |
149 | /****************** |
150 | * Used for unittests for a mock Lexer | |
151 | */ | |
152 | this() { } | |
153 | ||
154 | /************************************** | |
155 | * Reset lexer to lex #define's | |
156 | */ | |
157 | final void resetDefineLines(const(char)[] slice) | |
158 | { | |
159 | base = slice.ptr; | |
160 | end = base + slice.length; | |
161 | assert(*end == 0); | |
162 | p = base; | |
163 | line = p; | |
164 | tokenizeNewlines = true; | |
165 | inTokenStringConstant = 0; | |
166 | lastDocLine = 0; | |
167 | scanloc = Loc("#defines", 1, 1); | |
168 | } | |
169 | ||
170 | /********************************** | |
171 | * Set up for next #define line. | |
172 | * p should be at start of next line. | |
173 | */ | |
174 | final void nextDefineLine() | |
175 | { | |
176 | tokenizeNewlines = true; | |
177 | } | |
178 | ||
8977f4be IB |
179 | version (DMDLIB) |
180 | { | |
181 | this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset, | |
182 | bool doDocComment, bool commentToken, bool whitespaceToken) | |
183 | { | |
184 | this(filename, base, begoffset, endoffset, doDocComment, commentToken); | |
185 | this.whitespaceToken = whitespaceToken; | |
186 | } | |
187 | ||
188 | bool empty() const pure @property @nogc @safe | |
189 | { | |
190 | return front() == TOK.endOfFile; | |
191 | } | |
192 | ||
193 | TOK front() const pure @property @nogc @safe | |
194 | { | |
195 | return token.value; | |
196 | } | |
197 | ||
198 | void popFront() | |
199 | { | |
200 | nextToken(); | |
201 | } | |
202 | } | |
203 | ||
5fee5ec3 IB |
204 | /// Returns: a newly allocated `Token`. |
205 | Token* allocateToken() pure nothrow @safe | |
206 | { | |
207 | if (tokenFreelist) | |
208 | { | |
209 | Token* t = tokenFreelist; | |
210 | tokenFreelist = t.next; | |
211 | t.next = null; | |
212 | return t; | |
213 | } | |
214 | return new Token(); | |
215 | } | |
216 | ||
217 | /// Frees the given token by returning it to the freelist. | |
218 | private void releaseToken(Token* token) pure nothrow @nogc @safe | |
219 | { | |
220 | if (mem.isGCEnabled) | |
221 | *token = Token.init; | |
222 | token.next = tokenFreelist; | |
223 | tokenFreelist = token; | |
224 | } | |
225 | ||
226 | final TOK nextToken() | |
227 | { | |
228 | prevloc = token.loc; | |
229 | if (token.next) | |
230 | { | |
231 | Token* t = token.next; | |
232 | memcpy(&token, t, Token.sizeof); | |
233 | releaseToken(t); | |
234 | } | |
235 | else | |
236 | { | |
237 | scan(&token); | |
238 | } | |
239 | //printf(token.toChars()); | |
240 | return token.value; | |
241 | } | |
242 | ||
243 | /*********************** | |
244 | * Look ahead at next token's value. | |
245 | */ | |
246 | final TOK peekNext() | |
247 | { | |
248 | return peek(&token).value; | |
249 | } | |
250 | ||
251 | /*********************** | |
252 | * Look 2 tokens ahead at value. | |
253 | */ | |
254 | final TOK peekNext2() | |
255 | { | |
256 | Token* t = peek(&token); | |
257 | return peek(t).value; | |
258 | } | |
259 | ||
260 | /**************************** | |
261 | * Turn next token in buffer into a token. | |
235d5a96 IB |
262 | * Params: |
263 | * t = the token to set the resulting Token to | |
5fee5ec3 IB |
264 | */ |
265 | final void scan(Token* t) | |
266 | { | |
267 | const lastLine = scanloc.linnum; | |
268 | Loc startLoc; | |
269 | t.blockComment = null; | |
270 | t.lineComment = null; | |
271 | ||
272 | while (1) | |
273 | { | |
274 | t.ptr = p; | |
275 | //printf("p = %p, *p = '%c'\n",p,*p); | |
276 | t.loc = loc(); | |
277 | switch (*p) | |
278 | { | |
279 | case 0: | |
280 | case 0x1A: | |
281 | t.value = TOK.endOfFile; // end of file | |
282 | // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile. | |
283 | return; | |
284 | case ' ': | |
9c7d5e88 IB |
285 | // Skip 4 spaces at a time after aligning 'p' to a 4-byte boundary. |
286 | while ((cast(size_t)p) % uint.sizeof) | |
287 | { | |
288 | if (*p != ' ') | |
289 | goto LendSkipFourSpaces; | |
290 | p++; | |
291 | } | |
292 | while (*(cast(uint*)p) == 0x20202020) // ' ' == 0x20 | |
293 | p += 4; | |
294 | // Skip over any remaining space on the line. | |
295 | while (*p == ' ') | |
296 | p++; | |
297 | LendSkipFourSpaces: | |
8977f4be IB |
298 | version (DMDLIB) |
299 | { | |
300 | if (whitespaceToken) | |
301 | { | |
302 | t.value = TOK.whitespace; | |
303 | return; | |
304 | } | |
305 | } | |
9c7d5e88 | 306 | continue; // skip white space |
5fee5ec3 IB |
307 | case '\t': |
308 | case '\v': | |
309 | case '\f': | |
310 | p++; | |
8977f4be IB |
311 | version (DMDLIB) |
312 | { | |
313 | if (whitespaceToken) | |
314 | { | |
315 | t.value = TOK.whitespace; | |
316 | return; | |
317 | } | |
318 | } | |
5fee5ec3 IB |
319 | continue; // skip white space |
320 | case '\r': | |
321 | p++; | |
322 | if (*p != '\n') // if CR stands by itself | |
235d5a96 | 323 | { |
5fee5ec3 | 324 | endOfLine(); |
235d5a96 IB |
325 | if (tokenizeNewlines) |
326 | { | |
327 | t.value = TOK.endOfLine; | |
328 | tokenizeNewlines = false; | |
329 | return; | |
330 | } | |
331 | } | |
8977f4be IB |
332 | version (DMDLIB) |
333 | { | |
334 | if (whitespaceToken) | |
335 | { | |
336 | t.value = TOK.whitespace; | |
337 | return; | |
338 | } | |
339 | } | |
5fee5ec3 IB |
340 | continue; // skip white space |
341 | case '\n': | |
342 | p++; | |
343 | endOfLine(); | |
235d5a96 IB |
344 | if (tokenizeNewlines) |
345 | { | |
346 | t.value = TOK.endOfLine; | |
347 | tokenizeNewlines = false; | |
348 | return; | |
349 | } | |
8977f4be IB |
350 | version (DMDLIB) |
351 | { | |
352 | if (whitespaceToken) | |
353 | { | |
354 | t.value = TOK.whitespace; | |
355 | return; | |
356 | } | |
357 | } | |
5fee5ec3 IB |
358 | continue; // skip white space |
359 | case '0': | |
360 | if (!isZeroSecond(p[1])) // if numeric literal does not continue | |
361 | { | |
362 | ++p; | |
363 | t.unsvalue = 0; | |
364 | t.value = TOK.int32Literal; | |
365 | return; | |
366 | } | |
367 | goto Lnumber; | |
368 | ||
369 | case '1': .. case '9': | |
370 | if (!isDigitSecond(p[1])) // if numeric literal does not continue | |
371 | { | |
372 | t.unsvalue = *p - '0'; | |
373 | ++p; | |
374 | t.value = TOK.int32Literal; | |
375 | return; | |
376 | } | |
377 | Lnumber: | |
378 | t.value = number(t); | |
379 | return; | |
380 | ||
381 | case '\'': | |
382 | if (issinglechar(p[1]) && p[2] == '\'') | |
383 | { | |
384 | t.unsvalue = p[1]; // simple one character literal | |
6384eff5 | 385 | t.value = TOK.charLiteral; |
5fee5ec3 IB |
386 | p += 3; |
387 | } | |
388 | else if (Ccompile) | |
389 | { | |
390 | clexerCharConstant(*t, 0); | |
391 | } | |
392 | else | |
393 | { | |
394 | t.value = charConstant(t); | |
395 | } | |
396 | return; | |
397 | ||
398 | case 'u': | |
399 | case 'U': | |
400 | case 'L': | |
401 | if (!Ccompile) | |
402 | goto case_ident; | |
403 | if (p[1] == '\'') // C wide character constant | |
404 | { | |
405 | char c = *p; | |
406 | if (c == 'L') // convert L to u or U | |
407 | c = (wchar_tsize == 4) ? 'u' : 'U'; | |
408 | ++p; | |
409 | clexerCharConstant(*t, c); | |
410 | return; | |
411 | } | |
412 | else if (p[1] == '\"') // C wide string literal | |
413 | { | |
414 | const c = *p; | |
415 | ++p; | |
416 | escapeStringConstant(t); | |
417 | t.postfix = c == 'L' ? (wchar_tsize == 2 ? 'w' : 'd') : | |
418 | c == 'u' ? 'w' : | |
419 | 'd'; | |
420 | return; | |
421 | } | |
fbdaa581 IB |
422 | else if (p[1] == '8' && p[2] == '\"') // C UTF-8 string literal |
423 | { | |
424 | p += 2; | |
425 | escapeStringConstant(t); | |
426 | return; | |
427 | } | |
5fee5ec3 IB |
428 | goto case_ident; |
429 | ||
430 | case 'r': | |
7e287503 | 431 | if (Ccompile || p[1] != '"') |
5fee5ec3 IB |
432 | goto case_ident; |
433 | p++; | |
434 | goto case '`'; | |
435 | case '`': | |
7e287503 IB |
436 | if (Ccompile) |
437 | goto default; | |
5fee5ec3 IB |
438 | wysiwygStringConstant(t); |
439 | return; | |
5fee5ec3 | 440 | case 'q': |
7e287503 IB |
441 | if (Ccompile) |
442 | goto case_ident; | |
5fee5ec3 IB |
443 | if (p[1] == '"') |
444 | { | |
445 | p++; | |
446 | delimitedStringConstant(t); | |
447 | return; | |
448 | } | |
449 | else if (p[1] == '{') | |
450 | { | |
451 | p++; | |
452 | tokenStringConstant(t); | |
453 | return; | |
454 | } | |
455 | else | |
456 | goto case_ident; | |
457 | case '"': | |
458 | escapeStringConstant(t); | |
459 | return; | |
460 | case 'a': | |
461 | case 'b': | |
462 | case 'c': | |
463 | case 'd': | |
464 | case 'e': | |
465 | case 'f': | |
466 | case 'g': | |
467 | case 'h': | |
468 | case 'i': | |
469 | case 'j': | |
470 | case 'k': | |
471 | case 'l': | |
472 | case 'm': | |
473 | case 'n': | |
474 | case 'o': | |
475 | case 'p': | |
476 | /*case 'q': case 'r':*/ | |
477 | case 's': | |
478 | case 't': | |
479 | //case 'u': | |
480 | case 'v': | |
481 | case 'w': | |
7e287503 | 482 | case 'x': |
5fee5ec3 IB |
483 | case 'y': |
484 | case 'z': | |
485 | case 'A': | |
486 | case 'B': | |
487 | case 'C': | |
488 | case 'D': | |
489 | case 'E': | |
490 | case 'F': | |
491 | case 'G': | |
492 | case 'H': | |
493 | case 'I': | |
494 | case 'J': | |
495 | case 'K': | |
496 | //case 'L': | |
497 | case 'M': | |
498 | case 'N': | |
499 | case 'O': | |
500 | case 'P': | |
501 | case 'Q': | |
502 | case 'R': | |
503 | case 'S': | |
504 | case 'T': | |
505 | //case 'U': | |
506 | case 'V': | |
507 | case 'W': | |
508 | case 'X': | |
509 | case 'Y': | |
510 | case 'Z': | |
511 | case '_': | |
512 | case_ident: | |
513 | { | |
514 | while (1) | |
515 | { | |
516 | const c = *++p; | |
517 | if (isidchar(c)) | |
518 | continue; | |
519 | else if (c & 0x80) | |
520 | { | |
521 | const s = p; | |
522 | const u = decodeUTF(); | |
523 | if (isUniAlpha(u)) | |
524 | continue; | |
525 | error("char 0x%04x not allowed in identifier", u); | |
526 | p = s; | |
527 | } | |
528 | break; | |
529 | } | |
530 | Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr)); | |
531 | t.ident = id; | |
532 | t.value = cast(TOK)id.getValue(); | |
533 | ||
534 | anyToken = 1; | |
535 | ||
536 | /* Different keywords for C and D | |
537 | */ | |
538 | if (Ccompile) | |
539 | { | |
540 | if (t.value != TOK.identifier) | |
541 | { | |
542 | t.value = Ckeywords[t.value]; // filter out D keywords | |
543 | } | |
544 | } | |
545 | else if (t.value >= FirstCKeyword) | |
546 | t.value = TOK.identifier; // filter out C keywords | |
547 | ||
548 | else if (*t.ptr == '_') // if special identifier token | |
549 | { | |
550 | // Lazy initialization | |
551 | TimeStampInfo.initialize(t.loc); | |
552 | ||
553 | if (id == Id.DATE) | |
554 | { | |
555 | t.ustring = TimeStampInfo.date.ptr; | |
556 | goto Lstr; | |
557 | } | |
558 | else if (id == Id.TIME) | |
559 | { | |
560 | t.ustring = TimeStampInfo.time.ptr; | |
561 | goto Lstr; | |
562 | } | |
563 | else if (id == Id.VENDOR) | |
564 | { | |
565 | t.ustring = global.vendor.xarraydup.ptr; | |
566 | goto Lstr; | |
567 | } | |
568 | else if (id == Id.TIMESTAMP) | |
569 | { | |
570 | t.ustring = TimeStampInfo.timestamp.ptr; | |
571 | Lstr: | |
572 | t.value = TOK.string_; | |
573 | t.postfix = 0; | |
574 | t.len = cast(uint)strlen(t.ustring); | |
575 | } | |
576 | else if (id == Id.VERSIONX) | |
577 | { | |
578 | t.value = TOK.int64Literal; | |
579 | t.unsvalue = global.versionNumber(); | |
580 | } | |
581 | else if (id == Id.EOFX) | |
582 | { | |
583 | t.value = TOK.endOfFile; | |
584 | // Advance scanner to end of file | |
585 | while (!(*p == 0 || *p == 0x1A)) | |
586 | p++; | |
587 | } | |
588 | } | |
589 | //printf("t.value = %d\n",t.value); | |
590 | return; | |
591 | } | |
592 | case '/': | |
593 | p++; | |
594 | switch (*p) | |
595 | { | |
596 | case '=': | |
597 | p++; | |
598 | t.value = TOK.divAssign; | |
599 | return; | |
600 | case '*': | |
601 | p++; | |
602 | startLoc = loc(); | |
603 | while (1) | |
604 | { | |
605 | while (1) | |
606 | { | |
607 | const c = *p; | |
608 | switch (c) | |
609 | { | |
610 | case '/': | |
611 | break; | |
612 | case '\n': | |
613 | endOfLine(); | |
614 | p++; | |
615 | continue; | |
616 | case '\r': | |
617 | p++; | |
618 | if (*p != '\n') | |
619 | endOfLine(); | |
620 | continue; | |
621 | case 0: | |
622 | case 0x1A: | |
623 | error("unterminated /* */ comment"); | |
624 | p = end; | |
625 | t.loc = loc(); | |
626 | t.value = TOK.endOfFile; | |
627 | return; | |
628 | default: | |
629 | if (c & 0x80) | |
630 | { | |
631 | const u = decodeUTF(); | |
632 | if (u == PS || u == LS) | |
633 | endOfLine(); | |
634 | } | |
635 | p++; | |
636 | continue; | |
637 | } | |
638 | break; | |
639 | } | |
640 | p++; | |
641 | if (p[-2] == '*' && p - 3 != t.ptr) | |
642 | break; | |
643 | } | |
644 | if (commentToken) | |
645 | { | |
646 | t.loc = startLoc; | |
647 | t.value = TOK.comment; | |
648 | return; | |
649 | } | |
650 | else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) | |
651 | { | |
652 | // if /** but not /**/ | |
653 | getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); | |
654 | lastDocLine = scanloc.linnum; | |
655 | } | |
656 | continue; | |
657 | case '/': // do // style comments | |
658 | startLoc = loc(); | |
659 | while (1) | |
660 | { | |
661 | const c = *++p; | |
662 | switch (c) | |
663 | { | |
664 | case '\n': | |
665 | break; | |
666 | case '\r': | |
667 | if (p[1] == '\n') | |
668 | p++; | |
669 | break; | |
670 | case 0: | |
671 | case 0x1A: | |
672 | if (commentToken) | |
673 | { | |
674 | p = end; | |
675 | t.loc = startLoc; | |
676 | t.value = TOK.comment; | |
677 | return; | |
678 | } | |
679 | if (doDocComment && t.ptr[2] == '/') | |
680 | { | |
681 | getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); | |
682 | lastDocLine = scanloc.linnum; | |
683 | } | |
684 | p = end; | |
685 | t.loc = loc(); | |
686 | t.value = TOK.endOfFile; | |
687 | return; | |
688 | default: | |
689 | if (c & 0x80) | |
690 | { | |
691 | const u = decodeUTF(); | |
692 | if (u == PS || u == LS) | |
693 | break; | |
694 | } | |
695 | continue; | |
696 | } | |
697 | break; | |
698 | } | |
699 | if (commentToken) | |
700 | { | |
8977f4be IB |
701 | version (DMDLIB) {} |
702 | else | |
703 | { | |
704 | p++; | |
705 | endOfLine(); | |
706 | } | |
5fee5ec3 IB |
707 | t.loc = startLoc; |
708 | t.value = TOK.comment; | |
709 | return; | |
710 | } | |
711 | if (doDocComment && t.ptr[2] == '/') | |
712 | { | |
713 | getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); | |
714 | lastDocLine = scanloc.linnum; | |
715 | } | |
716 | p++; | |
717 | endOfLine(); | |
718 | continue; | |
719 | case '+': | |
7e287503 | 720 | if (!Ccompile) |
5fee5ec3 IB |
721 | { |
722 | int nest; | |
723 | startLoc = loc(); | |
724 | p++; | |
725 | nest = 1; | |
726 | while (1) | |
727 | { | |
728 | char c = *p; | |
729 | switch (c) | |
730 | { | |
731 | case '/': | |
732 | p++; | |
733 | if (*p == '+') | |
734 | { | |
735 | p++; | |
736 | nest++; | |
737 | } | |
738 | continue; | |
739 | case '+': | |
740 | p++; | |
741 | if (*p == '/') | |
742 | { | |
743 | p++; | |
744 | if (--nest == 0) | |
745 | break; | |
746 | } | |
747 | continue; | |
748 | case '\r': | |
749 | p++; | |
750 | if (*p != '\n') | |
751 | endOfLine(); | |
752 | continue; | |
753 | case '\n': | |
754 | endOfLine(); | |
755 | p++; | |
756 | continue; | |
757 | case 0: | |
758 | case 0x1A: | |
759 | error("unterminated /+ +/ comment"); | |
760 | p = end; | |
761 | t.loc = loc(); | |
762 | t.value = TOK.endOfFile; | |
763 | return; | |
764 | default: | |
765 | if (c & 0x80) | |
766 | { | |
767 | uint u = decodeUTF(); | |
768 | if (u == PS || u == LS) | |
769 | endOfLine(); | |
770 | } | |
771 | p++; | |
772 | continue; | |
773 | } | |
774 | break; | |
775 | } | |
776 | if (commentToken) | |
777 | { | |
778 | t.loc = startLoc; | |
779 | t.value = TOK.comment; | |
780 | return; | |
781 | } | |
782 | if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) | |
783 | { | |
784 | // if /++ but not /++/ | |
785 | getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); | |
786 | lastDocLine = scanloc.linnum; | |
787 | } | |
788 | continue; | |
789 | } | |
7e287503 | 790 | break; |
5fee5ec3 IB |
791 | default: |
792 | break; | |
793 | } | |
794 | t.value = TOK.div; | |
795 | return; | |
796 | case '.': | |
797 | p++; | |
798 | if (isdigit(*p)) | |
799 | { | |
800 | /* Note that we don't allow ._1 and ._ as being | |
801 | * valid floating point numbers. | |
802 | */ | |
803 | p--; | |
804 | t.value = inreal(t); | |
805 | } | |
806 | else if (p[0] == '.') | |
807 | { | |
808 | if (p[1] == '.') | |
809 | { | |
810 | p += 2; | |
811 | t.value = TOK.dotDotDot; | |
812 | } | |
813 | else | |
814 | { | |
815 | p++; | |
816 | t.value = TOK.slice; | |
817 | } | |
818 | } | |
819 | else | |
820 | t.value = TOK.dot; | |
821 | return; | |
822 | case '&': | |
823 | p++; | |
824 | if (*p == '=') | |
825 | { | |
826 | p++; | |
827 | t.value = TOK.andAssign; | |
828 | } | |
829 | else if (*p == '&') | |
830 | { | |
831 | p++; | |
832 | t.value = TOK.andAnd; | |
833 | } | |
834 | else | |
835 | t.value = TOK.and; | |
836 | return; | |
837 | case '|': | |
838 | p++; | |
839 | if (*p == '=') | |
840 | { | |
841 | p++; | |
842 | t.value = TOK.orAssign; | |
843 | } | |
844 | else if (*p == '|') | |
845 | { | |
846 | p++; | |
847 | t.value = TOK.orOr; | |
848 | } | |
849 | else | |
850 | t.value = TOK.or; | |
851 | return; | |
852 | case '-': | |
853 | p++; | |
854 | if (*p == '=') | |
855 | { | |
856 | p++; | |
857 | t.value = TOK.minAssign; | |
858 | } | |
859 | else if (*p == '-') | |
860 | { | |
861 | p++; | |
862 | t.value = TOK.minusMinus; | |
863 | } | |
864 | else if (*p == '>') | |
865 | { | |
866 | ++p; | |
867 | t.value = TOK.arrow; | |
868 | } | |
869 | else | |
870 | t.value = TOK.min; | |
871 | return; | |
872 | case '+': | |
873 | p++; | |
874 | if (*p == '=') | |
875 | { | |
876 | p++; | |
877 | t.value = TOK.addAssign; | |
878 | } | |
879 | else if (*p == '+') | |
880 | { | |
881 | p++; | |
882 | t.value = TOK.plusPlus; | |
883 | } | |
884 | else | |
885 | t.value = TOK.add; | |
886 | return; | |
887 | case '<': | |
888 | p++; | |
889 | if (*p == '=') | |
890 | { | |
891 | p++; | |
892 | t.value = TOK.lessOrEqual; // <= | |
893 | } | |
894 | else if (*p == '<') | |
895 | { | |
896 | p++; | |
897 | if (*p == '=') | |
898 | { | |
899 | p++; | |
900 | t.value = TOK.leftShiftAssign; // <<= | |
901 | } | |
902 | else | |
903 | t.value = TOK.leftShift; // << | |
904 | } | |
905 | else if (*p == ':' && Ccompile) | |
906 | { | |
907 | ++p; | |
908 | t.value = TOK.leftBracket; // <: | |
909 | } | |
910 | else if (*p == '%' && Ccompile) | |
911 | { | |
912 | ++p; | |
913 | t.value = TOK.leftCurly; // <% | |
914 | } | |
915 | else | |
916 | t.value = TOK.lessThan; // < | |
917 | return; | |
918 | case '>': | |
919 | p++; | |
920 | if (*p == '=') | |
921 | { | |
922 | p++; | |
923 | t.value = TOK.greaterOrEqual; // >= | |
924 | } | |
925 | else if (*p == '>') | |
926 | { | |
927 | p++; | |
928 | if (*p == '=') | |
929 | { | |
930 | p++; | |
931 | t.value = TOK.rightShiftAssign; // >>= | |
932 | } | |
933 | else if (*p == '>') | |
934 | { | |
935 | p++; | |
936 | if (*p == '=') | |
937 | { | |
938 | p++; | |
939 | t.value = TOK.unsignedRightShiftAssign; // >>>= | |
940 | } | |
941 | else | |
942 | t.value = TOK.unsignedRightShift; // >>> | |
943 | } | |
944 | else | |
945 | t.value = TOK.rightShift; // >> | |
946 | } | |
947 | else | |
948 | t.value = TOK.greaterThan; // > | |
949 | return; | |
950 | case '!': | |
951 | p++; | |
952 | if (*p == '=') | |
953 | { | |
954 | p++; | |
955 | t.value = TOK.notEqual; // != | |
956 | } | |
957 | else | |
958 | t.value = TOK.not; // ! | |
959 | return; | |
960 | case '=': | |
961 | p++; | |
962 | if (*p == '=') | |
963 | { | |
964 | p++; | |
965 | t.value = TOK.equal; // == | |
966 | } | |
967 | else if (*p == '>') | |
968 | { | |
969 | p++; | |
970 | t.value = TOK.goesTo; // => | |
971 | } | |
972 | else | |
973 | t.value = TOK.assign; // = | |
974 | return; | |
975 | case '~': | |
976 | p++; | |
977 | if (*p == '=') | |
978 | { | |
979 | p++; | |
980 | t.value = TOK.concatenateAssign; // ~= | |
981 | } | |
982 | else | |
983 | t.value = TOK.tilde; // ~ | |
984 | return; | |
985 | case '^': | |
986 | p++; | |
987 | if (*p == '^') | |
988 | { | |
989 | p++; | |
990 | if (*p == '=') | |
991 | { | |
992 | p++; | |
993 | t.value = TOK.powAssign; // ^^= | |
994 | } | |
995 | else | |
996 | t.value = TOK.pow; // ^^ | |
997 | } | |
998 | else if (*p == '=') | |
999 | { | |
1000 | p++; | |
1001 | t.value = TOK.xorAssign; // ^= | |
1002 | } | |
1003 | else | |
1004 | t.value = TOK.xor; // ^ | |
1005 | return; | |
1006 | case '(': | |
1007 | p++; | |
1008 | t.value = TOK.leftParenthesis; | |
1009 | return; | |
1010 | case ')': | |
1011 | p++; | |
1012 | t.value = TOK.rightParenthesis; | |
1013 | return; | |
1014 | case '[': | |
1015 | p++; | |
1016 | t.value = TOK.leftBracket; | |
1017 | return; | |
1018 | case ']': | |
1019 | p++; | |
1020 | t.value = TOK.rightBracket; | |
1021 | return; | |
1022 | case '{': | |
1023 | p++; | |
1024 | t.value = TOK.leftCurly; | |
1025 | return; | |
1026 | case '}': | |
1027 | p++; | |
1028 | t.value = TOK.rightCurly; | |
1029 | return; | |
1030 | case '?': | |
1031 | p++; | |
1032 | t.value = TOK.question; | |
1033 | return; | |
1034 | case ',': | |
1035 | p++; | |
1036 | t.value = TOK.comma; | |
1037 | return; | |
1038 | case ';': | |
1039 | p++; | |
1040 | t.value = TOK.semicolon; | |
1041 | return; | |
1042 | case ':': | |
1043 | p++; | |
1044 | if (*p == ':') | |
1045 | { | |
1046 | ++p; | |
1047 | t.value = TOK.colonColon; | |
1048 | } | |
1049 | else if (*p == '>' && Ccompile) | |
1050 | { | |
1051 | ++p; | |
1052 | t.value = TOK.rightBracket; | |
1053 | } | |
1054 | else | |
1055 | t.value = TOK.colon; | |
1056 | return; | |
1057 | case '$': | |
1058 | p++; | |
1059 | t.value = TOK.dollar; | |
1060 | return; | |
1061 | case '@': | |
1062 | p++; | |
1063 | t.value = TOK.at; | |
1064 | return; | |
1065 | case '*': | |
1066 | p++; | |
1067 | if (*p == '=') | |
1068 | { | |
1069 | p++; | |
1070 | t.value = TOK.mulAssign; | |
1071 | } | |
1072 | else | |
1073 | t.value = TOK.mul; | |
1074 | return; | |
1075 | case '%': | |
1076 | p++; | |
1077 | if (*p == '=') | |
1078 | { | |
1079 | p++; | |
1080 | t.value = TOK.modAssign; | |
1081 | } | |
1082 | else if (*p == '>' && Ccompile) | |
1083 | { | |
1084 | ++p; | |
1085 | t.value = TOK.rightCurly; | |
1086 | } | |
1087 | else if (*p == ':' && Ccompile) | |
1088 | { | |
1089 | goto case '#'; // %: means # | |
1090 | } | |
1091 | else | |
1092 | t.value = TOK.mod; | |
1093 | return; | |
1094 | case '#': | |
1095 | { | |
235d5a96 IB |
1096 | // https://issues.dlang.org/show_bug.cgi?id=22825 |
1097 | // Special token sequences are terminated by newlines, | |
1098 | // and should not be skipped over. | |
1099 | this.tokenizeNewlines = true; | |
5fee5ec3 | 1100 | p++; |
7e287503 | 1101 | if (parseSpecialTokenSequence()) |
5fee5ec3 | 1102 | continue; |
5fee5ec3 IB |
1103 | t.value = TOK.pound; |
1104 | return; | |
1105 | } | |
1106 | default: | |
1107 | { | |
1108 | dchar c = *p; | |
1109 | if (c & 0x80) | |
1110 | { | |
1111 | c = decodeUTF(); | |
1112 | // Check for start of unicode identifier | |
1113 | if (isUniAlpha(c)) | |
1114 | goto case_ident; | |
1115 | if (c == PS || c == LS) | |
1116 | { | |
1117 | endOfLine(); | |
1118 | p++; | |
235d5a96 IB |
1119 | if (tokenizeNewlines) |
1120 | { | |
1121 | t.value = TOK.endOfLine; | |
1122 | tokenizeNewlines = false; | |
1123 | return; | |
1124 | } | |
5fee5ec3 IB |
1125 | continue; |
1126 | } | |
1127 | } | |
1128 | if (c < 0x80 && isprint(c)) | |
1129 | error("character '%c' is not a valid token", c); | |
1130 | else | |
1131 | error("character 0x%02x is not a valid token", c); | |
1132 | p++; | |
1133 | continue; | |
1134 | } | |
1135 | } | |
1136 | } | |
1137 | } | |
1138 | ||
1139 | final Token* peek(Token* ct) | |
1140 | { | |
1141 | Token* t; | |
1142 | if (ct.next) | |
1143 | t = ct.next; | |
1144 | else | |
1145 | { | |
1146 | t = allocateToken(); | |
1147 | scan(t); | |
1148 | ct.next = t; | |
1149 | } | |
1150 | return t; | |
1151 | } | |
1152 | ||
1153 | /********************************* | |
1154 | * tk is on the opening (. | |
1155 | * Look ahead and return token that is past the closing ). | |
1156 | */ | |
1157 | final Token* peekPastParen(Token* tk) | |
1158 | { | |
1159 | //printf("peekPastParen()\n"); | |
1160 | int parens = 1; | |
1161 | int curlynest = 0; | |
1162 | while (1) | |
1163 | { | |
1164 | tk = peek(tk); | |
1165 | //tk.print(); | |
1166 | switch (tk.value) | |
1167 | { | |
1168 | case TOK.leftParenthesis: | |
1169 | parens++; | |
1170 | continue; | |
1171 | case TOK.rightParenthesis: | |
1172 | --parens; | |
1173 | if (parens) | |
1174 | continue; | |
1175 | tk = peek(tk); | |
1176 | break; | |
1177 | case TOK.leftCurly: | |
1178 | curlynest++; | |
1179 | continue; | |
1180 | case TOK.rightCurly: | |
1181 | if (--curlynest >= 0) | |
1182 | continue; | |
1183 | break; | |
1184 | case TOK.semicolon: | |
1185 | if (curlynest) | |
1186 | continue; | |
1187 | break; | |
1188 | case TOK.endOfFile: | |
1189 | break; | |
1190 | default: | |
1191 | continue; | |
1192 | } | |
1193 | return tk; | |
1194 | } | |
1195 | } | |
1196 | ||
1197 | /******************************************* | |
1198 | * Parse escape sequence. | |
1199 | */ | |
1200 | private uint escapeSequence() | |
1201 | { | |
1202 | return Lexer.escapeSequence(token.loc, p, Ccompile); | |
1203 | } | |
1204 | ||
1205 | /******** | |
1206 | * Parse the given string literal escape sequence into a single character. | |
1207 | * D https://dlang.org/spec/lex.html#escape_sequences | |
1208 | * C11 6.4.4.4 | |
1209 | * Params: | |
1210 | * loc = location to use for error messages | |
1211 | * sequence = pointer to string with escape sequence to parse. Updated to | |
1212 | * point past the end of the escape sequence | |
1213 | * Ccompile = true for compile C11 escape sequences | |
1214 | * Returns: | |
1215 | * the escape sequence as a single character | |
1216 | */ | |
610d7898 | 1217 | private dchar escapeSequence(const ref Loc loc, ref const(char)* sequence, bool Ccompile) |
5fee5ec3 IB |
1218 | { |
1219 | const(char)* p = sequence; // cache sequence reference on stack | |
1220 | scope(exit) sequence = p; | |
1221 | ||
1222 | uint c = *p; | |
1223 | int ndigits; | |
1224 | switch (c) | |
1225 | { | |
1226 | case '\'': | |
1227 | case '"': | |
1228 | case '?': | |
1229 | case '\\': | |
1230 | Lconsume: | |
1231 | p++; | |
1232 | break; | |
1233 | case 'a': | |
1234 | c = 7; | |
1235 | goto Lconsume; | |
1236 | case 'b': | |
1237 | c = 8; | |
1238 | goto Lconsume; | |
1239 | case 'f': | |
1240 | c = 12; | |
1241 | goto Lconsume; | |
1242 | case 'n': | |
1243 | c = 10; | |
1244 | goto Lconsume; | |
1245 | case 'r': | |
1246 | c = 13; | |
1247 | goto Lconsume; | |
1248 | case 't': | |
1249 | c = 9; | |
1250 | goto Lconsume; | |
1251 | case 'v': | |
1252 | c = 11; | |
1253 | goto Lconsume; | |
1254 | case 'u': | |
1255 | ndigits = 4; | |
1256 | goto Lhex; | |
1257 | case 'U': | |
1258 | ndigits = 8; | |
1259 | goto Lhex; | |
1260 | case 'x': | |
1261 | ndigits = 2; | |
1262 | Lhex: | |
1263 | p++; | |
1264 | c = *p; | |
1265 | if (ishex(cast(char)c)) | |
1266 | { | |
1267 | uint v = 0; | |
1268 | int n = 0; | |
ae56e2da | 1269 | if (Ccompile && ndigits == 2) |
5fee5ec3 | 1270 | { |
ae56e2da IB |
1271 | /* C11 6.4.4.4-7 one to infinity hex digits |
1272 | */ | |
1273 | do | |
5fee5ec3 | 1274 | { |
ae56e2da IB |
1275 | if (isdigit(cast(char)c)) |
1276 | c -= '0'; | |
1277 | else if (islower(c)) | |
1278 | c -= 'a' - 10; | |
1279 | else | |
1280 | c -= 'A' - 10; | |
1281 | v = v * 16 + c; | |
1282 | c = *++p; | |
1283 | } while (ishex(cast(char)c)); | |
5fee5ec3 | 1284 | } |
ae56e2da | 1285 | else |
5fee5ec3 | 1286 | { |
ae56e2da IB |
1287 | while (1) |
1288 | { | |
1289 | if (isdigit(cast(char)c)) | |
1290 | c -= '0'; | |
1291 | else if (islower(c)) | |
1292 | c -= 'a' - 10; | |
1293 | else | |
1294 | c -= 'A' - 10; | |
1295 | v = v * 16 + c; | |
1296 | c = *++p; | |
1297 | if (++n == ndigits) | |
1298 | break; | |
1299 | if (!ishex(cast(char)c)) | |
1300 | { | |
610d7898 | 1301 | error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits); |
ae56e2da IB |
1302 | break; |
1303 | } | |
1304 | } | |
1305 | if (ndigits != 2 && !utf_isValidDchar(v)) | |
1306 | { | |
610d7898 | 1307 | error(loc, "invalid UTF character \\U%08x", v); |
ae56e2da IB |
1308 | v = '?'; // recover with valid UTF character |
1309 | } | |
5fee5ec3 IB |
1310 | } |
1311 | c = v; | |
1312 | } | |
1313 | else | |
1314 | { | |
610d7898 | 1315 | error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c); |
5fee5ec3 IB |
1316 | p++; |
1317 | } | |
1318 | break; | |
1319 | case '&': | |
1320 | if (Ccompile) | |
1321 | goto default; | |
1322 | ||
1323 | // named character entity | |
1324 | for (const idstart = ++p; 1; p++) | |
1325 | { | |
1326 | switch (*p) | |
1327 | { | |
1328 | case ';': | |
b6df1132 | 1329 | c = HtmlNamedEntity(idstart[0 .. p - idstart]); |
5fee5ec3 IB |
1330 | if (c == ~0) |
1331 | { | |
610d7898 | 1332 | error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart); |
5fee5ec3 IB |
1333 | c = '?'; |
1334 | } | |
1335 | p++; | |
1336 | break; | |
1337 | default: | |
1338 | if (isalpha(*p) || (p != idstart && isdigit(*p))) | |
1339 | continue; | |
610d7898 | 1340 | error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart); |
5fee5ec3 IB |
1341 | c = '?'; |
1342 | break; | |
1343 | } | |
1344 | break; | |
1345 | } | |
1346 | break; | |
1347 | case 0: | |
1348 | case 0x1A: | |
1349 | // end of file | |
1350 | c = '\\'; | |
1351 | break; | |
1352 | default: | |
1353 | if (isoctal(cast(char)c)) | |
1354 | { | |
1355 | uint v = 0; | |
1356 | int n = 0; | |
1357 | do | |
1358 | { | |
1359 | v = v * 8 + (c - '0'); | |
1360 | c = *++p; | |
1361 | } | |
1362 | while (++n < 3 && isoctal(cast(char)c)); | |
1363 | c = v; | |
1364 | if (c > 0xFF) | |
610d7898 | 1365 | error(loc, "escape octal sequence \\%03o is larger than \\377", c); |
5fee5ec3 IB |
1366 | } |
1367 | else | |
1368 | { | |
610d7898 | 1369 | error(loc, "undefined escape sequence \\%c", c); |
5fee5ec3 IB |
1370 | p++; |
1371 | } | |
1372 | break; | |
1373 | } | |
1374 | return c; | |
1375 | } | |
1376 | ||
1377 | /** | |
1378 | Lex a wysiwyg string. `p` must be pointing to the first character before the | |
1379 | contents of the string literal. The character pointed to by `p` will be used as | |
1380 | the terminating character (i.e. backtick or double-quote). | |
1381 | Params: | |
1382 | result = pointer to the token that accepts the result | |
1383 | */ | |
1384 | private void wysiwygStringConstant(Token* result) | |
1385 | { | |
1386 | result.value = TOK.string_; | |
1387 | Loc start = loc(); | |
1388 | auto terminator = p[0]; | |
1389 | p++; | |
1390 | stringbuffer.setsize(0); | |
1391 | while (1) | |
1392 | { | |
1393 | dchar c = p[0]; | |
1394 | p++; | |
1395 | switch (c) | |
1396 | { | |
1397 | case '\n': | |
1398 | endOfLine(); | |
1399 | break; | |
1400 | case '\r': | |
1401 | if (p[0] == '\n') | |
1402 | continue; // ignore | |
1403 | c = '\n'; // treat EndOfLine as \n character | |
1404 | endOfLine(); | |
1405 | break; | |
1406 | case 0: | |
1407 | case 0x1A: | |
1408 | error("unterminated string constant starting at %s", start.toChars()); | |
1409 | result.setString(); | |
1410 | // rewind `p` so it points to the EOF character | |
1411 | p--; | |
1412 | return; | |
1413 | default: | |
1414 | if (c == terminator) | |
1415 | { | |
1416 | result.setString(stringbuffer); | |
1417 | stringPostfix(result); | |
1418 | return; | |
1419 | } | |
1420 | else if (c & 0x80) | |
1421 | { | |
1422 | p--; | |
1423 | const u = decodeUTF(); | |
1424 | p++; | |
1425 | if (u == PS || u == LS) | |
1426 | endOfLine(); | |
1427 | stringbuffer.writeUTF8(u); | |
1428 | continue; | |
1429 | } | |
1430 | break; | |
1431 | } | |
1432 | stringbuffer.writeByte(c); | |
1433 | } | |
1434 | } | |
1435 | ||
5fee5ec3 IB |
1436 | /** |
1437 | Lex a delimited string. Some examples of delimited strings are: | |
1438 | --- | |
1439 | q"(foo(xxx))" // "foo(xxx)" | |
1440 | q"[foo$(LPAREN)]" // "foo$(LPAREN)" | |
1441 | q"/foo]/" // "foo]" | |
1442 | q"HERE | |
1443 | foo | |
1444 | HERE" // "foo\n" | |
1445 | --- | |
1446 | It is assumed that `p` points to the opening double-quote '"'. | |
1447 | Params: | |
1448 | result = pointer to the token that accepts the result | |
1449 | */ | |
1450 | private void delimitedStringConstant(Token* result) | |
1451 | { | |
1452 | result.value = TOK.string_; | |
1453 | Loc start = loc(); | |
1454 | dchar delimleft = 0; | |
1455 | dchar delimright = 0; | |
1456 | uint nest = 1; | |
1457 | uint nestcount = ~0; // dead assignment, needed to suppress warning | |
1458 | Identifier hereid = null; | |
1459 | uint blankrol = 0; | |
1460 | uint startline = 0; | |
1461 | p++; | |
1462 | stringbuffer.setsize(0); | |
1463 | while (1) | |
1464 | { | |
1465 | dchar c = *p++; | |
1466 | //printf("c = '%c'\n", c); | |
1467 | switch (c) | |
1468 | { | |
1469 | case '\n': | |
1470 | Lnextline: | |
1471 | endOfLine(); | |
1472 | startline = 1; | |
1473 | if (blankrol) | |
1474 | { | |
1475 | blankrol = 0; | |
1476 | continue; | |
1477 | } | |
1478 | if (hereid) | |
1479 | { | |
1480 | stringbuffer.writeUTF8(c); | |
1481 | continue; | |
1482 | } | |
1483 | break; | |
1484 | case '\r': | |
1485 | if (*p == '\n') | |
1486 | continue; // ignore | |
1487 | c = '\n'; // treat EndOfLine as \n character | |
1488 | goto Lnextline; | |
1489 | case 0: | |
1490 | case 0x1A: | |
1491 | error("unterminated delimited string constant starting at %s", start.toChars()); | |
1492 | result.setString(); | |
1493 | // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token). | |
1494 | p--; | |
1495 | return; | |
1496 | default: | |
1497 | if (c & 0x80) | |
1498 | { | |
1499 | p--; | |
1500 | c = decodeUTF(); | |
1501 | p++; | |
1502 | if (c == PS || c == LS) | |
1503 | goto Lnextline; | |
1504 | } | |
1505 | break; | |
1506 | } | |
1507 | if (delimleft == 0) | |
1508 | { | |
1509 | delimleft = c; | |
1510 | nest = 1; | |
1511 | nestcount = 1; | |
1512 | if (c == '(') | |
1513 | delimright = ')'; | |
1514 | else if (c == '{') | |
1515 | delimright = '}'; | |
1516 | else if (c == '[') | |
1517 | delimright = ']'; | |
1518 | else if (c == '<') | |
1519 | delimright = '>'; | |
1520 | else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) | |
1521 | { | |
1522 | // Start of identifier; must be a heredoc | |
1523 | Token tok; | |
1524 | p--; | |
1525 | scan(&tok); // read in heredoc identifier | |
1526 | if (tok.value != TOK.identifier) | |
1527 | { | |
1528 | error("identifier expected for heredoc, not %s", tok.toChars()); | |
1529 | delimright = c; | |
1530 | } | |
1531 | else | |
1532 | { | |
1533 | hereid = tok.ident; | |
1534 | //printf("hereid = '%s'\n", hereid.toChars()); | |
1535 | blankrol = 1; | |
1536 | } | |
1537 | nest = 0; | |
1538 | } | |
1539 | else | |
1540 | { | |
1541 | delimright = c; | |
1542 | nest = 0; | |
1543 | if (isspace(c)) | |
1544 | error("delimiter cannot be whitespace"); | |
1545 | } | |
1546 | } | |
1547 | else | |
1548 | { | |
1549 | if (blankrol) | |
1550 | { | |
1551 | error("heredoc rest of line should be blank"); | |
1552 | blankrol = 0; | |
1553 | continue; | |
1554 | } | |
1555 | if (nest == 1) | |
1556 | { | |
1557 | if (c == delimleft) | |
1558 | nestcount++; | |
1559 | else if (c == delimright) | |
1560 | { | |
1561 | nestcount--; | |
1562 | if (nestcount == 0) | |
1563 | goto Ldone; | |
1564 | } | |
1565 | } | |
1566 | else if (c == delimright) | |
1567 | goto Ldone; | |
1568 | if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid) | |
1569 | { | |
1570 | Token tok; | |
1571 | auto psave = p; | |
1572 | p--; | |
1573 | scan(&tok); // read in possible heredoc identifier | |
1574 | //printf("endid = '%s'\n", tok.ident.toChars()); | |
1575 | if (tok.value == TOK.identifier && tok.ident is hereid) | |
1576 | { | |
1577 | /* should check that rest of line is blank | |
1578 | */ | |
1579 | goto Ldone; | |
1580 | } | |
1581 | p = psave; | |
1582 | } | |
1583 | stringbuffer.writeUTF8(c); | |
1584 | startline = 0; | |
1585 | } | |
1586 | } | |
1587 | Ldone: | |
1588 | if (*p == '"') | |
1589 | p++; | |
1590 | else if (hereid) | |
6384eff5 IB |
1591 | error("delimited string must end in `%s\"`", hereid.toChars()); |
1592 | else if (isspace(delimright)) | |
1593 | error("delimited string must end in `\"`"); | |
5fee5ec3 | 1594 | else |
6384eff5 | 1595 | error("delimited string must end in `%c\"`", delimright); |
5fee5ec3 IB |
1596 | result.setString(stringbuffer); |
1597 | stringPostfix(result); | |
1598 | } | |
1599 | ||
1600 | /** | |
1601 | Lex a token string. Some examples of token strings are: | |
1602 | --- | |
1603 | q{ foo(xxx) } // " foo(xxx) " | |
1604 | q{foo$(LPAREN)} // "foo$(LPAREN)" | |
1605 | q{{foo}"}"} // "{foo}"}"" | |
1606 | --- | |
1607 | It is assumed that `p` points to the opening curly-brace. | |
1608 | Params: | |
1609 | result = pointer to the token that accepts the result | |
1610 | */ | |
1611 | private void tokenStringConstant(Token* result) | |
1612 | { | |
1613 | result.value = TOK.string_; | |
1614 | ||
1615 | uint nest = 1; | |
1616 | const start = loc(); | |
1617 | const pstart = ++p; | |
1618 | inTokenStringConstant++; | |
1619 | scope(exit) inTokenStringConstant--; | |
1620 | while (1) | |
1621 | { | |
1622 | Token tok; | |
1623 | scan(&tok); | |
1624 | switch (tok.value) | |
1625 | { | |
1626 | case TOK.leftCurly: | |
1627 | nest++; | |
1628 | continue; | |
1629 | case TOK.rightCurly: | |
1630 | if (--nest == 0) | |
1631 | { | |
1632 | result.setString(pstart, p - 1 - pstart); | |
1633 | stringPostfix(result); | |
1634 | return; | |
1635 | } | |
1636 | continue; | |
1637 | case TOK.endOfFile: | |
1638 | error("unterminated token string constant starting at %s", start.toChars()); | |
1639 | result.setString(); | |
1640 | return; | |
1641 | default: | |
1642 | continue; | |
1643 | } | |
1644 | } | |
1645 | } | |
1646 | ||
1647 | /** | |
1648 | Scan a quoted string while building the processed string value by | |
1649 | handling escape sequences. The result is returned in the given `t` token. | |
1650 | This function assumes that `p` currently points to the opening quote | |
1651 | of the string. | |
1652 | Params: | |
1653 | t = the token to set the resulting string to | |
1654 | * References: | |
1655 | * D https://dlang.org/spec/lex.html#double_quoted_strings | |
1656 | * ImportC C11 6.4.5 | |
1657 | */ | |
1658 | private void escapeStringConstant(Token* t) | |
1659 | { | |
1660 | t.value = TOK.string_; | |
1661 | ||
1662 | const start = loc(); | |
1663 | const tc = *p++; // opening quote | |
1664 | stringbuffer.setsize(0); | |
1665 | while (1) | |
1666 | { | |
1667 | dchar c = *p++; | |
1668 | switch (c) | |
1669 | { | |
1670 | case '\\': | |
1671 | switch (*p) | |
1672 | { | |
1673 | case '&': | |
1674 | if (Ccompile) | |
1675 | goto default; | |
1676 | goto case; | |
1677 | ||
1678 | case 'u': | |
1679 | case 'U': | |
1680 | c = escapeSequence(); | |
1681 | stringbuffer.writeUTF8(c); | |
1682 | continue; | |
1683 | default: | |
1684 | c = escapeSequence(); | |
1685 | break; | |
1686 | } | |
1687 | break; | |
1688 | case '\n': | |
1689 | endOfLine(); | |
1690 | if (Ccompile) | |
1691 | goto Lunterminated; | |
1692 | break; | |
1693 | case '\r': | |
1694 | if (*p == '\n') | |
1695 | continue; // ignore | |
1696 | c = '\n'; // treat EndOfLine as \n character | |
1697 | endOfLine(); | |
1698 | if (Ccompile) | |
1699 | goto Lunterminated; | |
1700 | break; | |
1701 | case '\'': | |
1702 | case '"': | |
1703 | if (c != tc) | |
1704 | goto default; | |
1705 | t.setString(stringbuffer); | |
1706 | if (!Ccompile) | |
1707 | stringPostfix(t); | |
1708 | return; | |
1709 | case 0: | |
1710 | case 0x1A: | |
1711 | // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token). | |
1712 | p--; | |
1713 | Lunterminated: | |
1714 | error("unterminated string constant starting at %s", start.toChars()); | |
1715 | t.setString(); | |
1716 | return; | |
1717 | default: | |
1718 | if (c & 0x80) | |
1719 | { | |
1720 | p--; | |
1721 | c = decodeUTF(); | |
1722 | if (c == LS || c == PS) | |
1723 | { | |
1724 | c = '\n'; | |
1725 | endOfLine(); | |
1726 | if (Ccompile) | |
1727 | goto Lunterminated; | |
1728 | } | |
1729 | p++; | |
1730 | stringbuffer.writeUTF8(c); | |
1731 | continue; | |
1732 | } | |
1733 | break; | |
1734 | } | |
1735 | stringbuffer.writeByte(c); | |
1736 | } | |
1737 | } | |
1738 | ||
1739 | /************************************** | |
1740 | * Reference: | |
1741 | * https://dlang.org/spec/lex.html#characterliteral | |
1742 | */ | |
1743 | private TOK charConstant(Token* t) | |
1744 | { | |
1745 | TOK tk = TOK.charLiteral; | |
1746 | //printf("Lexer::charConstant\n"); | |
1747 | p++; | |
1748 | dchar c = *p++; | |
1749 | switch (c) | |
1750 | { | |
1751 | case '\\': | |
1752 | switch (*p) | |
1753 | { | |
1754 | case 'u': | |
1755 | t.unsvalue = escapeSequence(); | |
1756 | tk = TOK.wcharLiteral; | |
1757 | break; | |
1758 | case 'U': | |
1759 | case '&': | |
1760 | t.unsvalue = escapeSequence(); | |
1761 | tk = TOK.dcharLiteral; | |
1762 | break; | |
1763 | default: | |
1764 | t.unsvalue = escapeSequence(); | |
1765 | break; | |
1766 | } | |
1767 | break; | |
1768 | case '\n': | |
1769 | L1: | |
1770 | endOfLine(); | |
1771 | goto case; | |
1772 | case '\r': | |
1773 | goto case '\''; | |
1774 | case 0: | |
1775 | case 0x1A: | |
1776 | // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token). | |
1777 | p--; | |
1778 | goto case; | |
1779 | case '\'': | |
1780 | error("unterminated character constant"); | |
1781 | t.unsvalue = '?'; | |
1782 | return tk; | |
1783 | default: | |
1784 | if (c & 0x80) | |
1785 | { | |
1786 | p--; | |
1787 | c = decodeUTF(); | |
1788 | p++; | |
1789 | if (c == LS || c == PS) | |
1790 | goto L1; | |
1791 | if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) | |
1792 | tk = TOK.wcharLiteral; | |
1793 | else | |
1794 | tk = TOK.dcharLiteral; | |
1795 | } | |
1796 | t.unsvalue = c; | |
1797 | break; | |
1798 | } | |
1799 | if (*p != '\'') | |
1800 | { | |
1801 | while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' && | |
1802 | *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}') | |
1803 | { | |
1804 | if (*p & 0x80) | |
1805 | { | |
1806 | const s = p; | |
1807 | c = decodeUTF(); | |
1808 | if (c == LS || c == PS) | |
1809 | { | |
1810 | p = s; | |
1811 | break; | |
1812 | } | |
1813 | } | |
1814 | p++; | |
1815 | } | |
1816 | ||
1817 | if (*p == '\'') | |
1818 | { | |
1819 | error("character constant has multiple characters"); | |
1820 | p++; | |
1821 | } | |
1822 | else | |
1823 | error("unterminated character constant"); | |
1824 | t.unsvalue = '?'; | |
1825 | return tk; | |
1826 | } | |
1827 | p++; | |
1828 | return tk; | |
1829 | } | |
1830 | ||
1831 | /*************************************** | |
1832 | * Lex C character constant. | |
1833 | * Parser is on the opening quote. | |
1834 | * Params: | |
1835 | * t = token to fill in | |
1836 | * prefix = one of `u`, `U` or 0. | |
1837 | * Reference: | |
1838 | * C11 6.4.4.4 | |
1839 | */ | |
1840 | private void clexerCharConstant(ref Token t, char prefix) | |
1841 | { | |
1842 | escapeStringConstant(&t); | |
1843 | const(char)[] str = t.ustring[0 .. t.len]; | |
1844 | const n = str.length; | |
1845 | const loc = t.loc; | |
1846 | if (n == 0) | |
1847 | { | |
1848 | error(loc, "empty character constant"); | |
1849 | t.value = TOK.semicolon; | |
1850 | return; | |
1851 | } | |
1852 | ||
1853 | uint u; | |
1854 | switch (prefix) | |
1855 | { | |
1856 | case 0: | |
1857 | if (n == 1) // fast case | |
1858 | { | |
1859 | u = str[0]; | |
1860 | } | |
1861 | else if (n > 4) | |
1862 | error(loc, "max number of chars in character literal is 4, had %d", | |
1863 | cast(int)n); | |
1864 | else | |
1865 | { | |
1866 | foreach (i, c; str) | |
1867 | (cast(char*)&u)[n - 1 - i] = c; | |
1868 | } | |
1869 | break; | |
1870 | ||
1871 | case 'u': | |
1872 | dchar d1; | |
1873 | size_t idx; | |
1874 | auto msg = utf_decodeChar(str, idx, d1); | |
1875 | dchar d2 = 0; | |
1876 | if (idx < n && !msg) | |
1877 | msg = utf_decodeChar(str, idx, d2); | |
1878 | if (msg) | |
1879 | error(loc, "%s", msg); | |
1880 | else if (idx < n) | |
1881 | error(loc, "max number of chars in 16 bit character literal is 2, had %d", | |
1882 | (n + 1) >> 1); | |
1883 | else if (d1 > 0x1_0000) | |
1884 | error(loc, "%d does not fit in 16 bits", d1); | |
1885 | else if (d2 > 0x1_0000) | |
1886 | error(loc, "%d does not fit in 16 bits", d2); | |
1887 | u = d1; | |
1888 | if (d2) | |
1889 | u = (d1 << 16) | d2; | |
1890 | break; | |
1891 | ||
1892 | case 'U': | |
1893 | dchar d; | |
1894 | size_t idx; | |
1895 | auto msg = utf_decodeChar(str, idx, d); | |
1896 | if (msg) | |
1897 | error(loc, "%s", msg); | |
1898 | else if (idx < n) | |
1899 | error(loc, "max number of chars in 32 bit character literal is 1, had %d", | |
1900 | (n + 3) >> 2); | |
1901 | u = d; | |
1902 | break; | |
1903 | ||
1904 | default: | |
1905 | assert(0); | |
1906 | } | |
6384eff5 | 1907 | t.value = n == 1 ? TOK.charLiteral : TOK.int32Literal; |
5fee5ec3 IB |
1908 | t.unsvalue = u; |
1909 | } | |
1910 | ||
1911 | /*************************************** | |
1912 | * Get postfix of string literal. | |
1913 | */ | |
1914 | private void stringPostfix(Token* t) pure @nogc | |
1915 | { | |
1916 | switch (*p) | |
1917 | { | |
1918 | case 'c': | |
1919 | case 'w': | |
1920 | case 'd': | |
1921 | t.postfix = *p; | |
1922 | p++; | |
1923 | break; | |
1924 | default: | |
1925 | t.postfix = 0; | |
1926 | break; | |
1927 | } | |
1928 | } | |
1929 | ||
1930 | /************************************** | |
1931 | * Read in a number. | |
1932 | * If it's an integer, store it in tok.TKutok.Vlong. | |
1933 | * integers can be decimal, octal or hex | |
1934 | * Handle the suffixes U, UL, LU, L, etc. | |
1935 | * If it's double, store it in tok.TKutok.Vdouble. | |
1936 | * Returns: | |
1937 | * TKnum | |
1938 | * TKdouble,... | |
1939 | */ | |
1940 | private TOK number(Token* t) | |
1941 | { | |
1942 | int base = 10; | |
1943 | const start = p; | |
1944 | uinteger_t n = 0; // unsigned >=64 bit integer type | |
1945 | int d; | |
1946 | bool err = false; | |
1947 | bool overflow = false; | |
1948 | bool anyBinaryDigitsNoSingleUS = false; | |
1949 | bool anyHexDigitsNoSingleUS = false; | |
fd43568c | 1950 | char errorDigit = 0; |
5fee5ec3 IB |
1951 | dchar c = *p; |
1952 | if (c == '0') | |
1953 | { | |
1954 | ++p; | |
1955 | c = *p; | |
1956 | switch (c) | |
1957 | { | |
1958 | case '0': | |
1959 | case '1': | |
1960 | case '2': | |
1961 | case '3': | |
1962 | case '4': | |
1963 | case '5': | |
1964 | case '6': | |
1965 | case '7': | |
1966 | base = 8; | |
1967 | break; | |
1968 | ||
1969 | case '8': | |
1970 | case '9': | |
fd43568c | 1971 | errorDigit = cast(char) c; |
5fee5ec3 IB |
1972 | base = 8; |
1973 | break; | |
1974 | case 'x': | |
1975 | case 'X': | |
1976 | ++p; | |
1977 | base = 16; | |
1978 | break; | |
1979 | case 'b': | |
1980 | case 'B': | |
1981 | if (Ccompile) | |
1982 | error("binary constants not allowed"); | |
1983 | ++p; | |
1984 | base = 2; | |
1985 | break; | |
1986 | case '.': | |
1987 | if (p[1] == '.') | |
1988 | goto Ldone; // if ".." | |
1989 | if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80) | |
6384eff5 IB |
1990 | { |
1991 | if (Ccompile && (p[1] == 'f' || p[1] == 'F' || p[1] == 'l' || p[1] == 'L')) | |
1992 | goto Lreal; // if `0.f` or `0.L` | |
5fee5ec3 | 1993 | goto Ldone; // if ".identifier" or ".unicode" |
6384eff5 | 1994 | } |
5fee5ec3 IB |
1995 | goto Lreal; // '.' is part of current token |
1996 | case 'i': | |
1997 | case 'f': | |
1998 | case 'F': | |
1999 | goto Lreal; | |
2000 | case '_': | |
2001 | if (Ccompile) | |
2002 | error("embedded `_` not allowed"); | |
2003 | ++p; | |
2004 | base = 8; | |
2005 | break; | |
2006 | case 'L': | |
2007 | if (p[1] == 'i') | |
2008 | goto Lreal; | |
2009 | break; | |
2010 | default: | |
2011 | break; | |
2012 | } | |
2013 | } | |
2014 | while (1) | |
2015 | { | |
2016 | c = *p; | |
2017 | switch (c) | |
2018 | { | |
2019 | case '0': | |
2020 | case '1': | |
2021 | case '2': | |
2022 | case '3': | |
2023 | case '4': | |
2024 | case '5': | |
2025 | case '6': | |
2026 | case '7': | |
2027 | case '8': | |
2028 | case '9': | |
2029 | ++p; | |
2030 | d = c - '0'; | |
2031 | break; | |
2032 | case 'a': | |
2033 | case 'b': | |
2034 | case 'c': | |
2035 | case 'd': | |
2036 | case 'e': | |
2037 | case 'f': | |
2038 | case 'A': | |
2039 | case 'B': | |
2040 | case 'C': | |
2041 | case 'D': | |
2042 | case 'E': | |
2043 | case 'F': | |
2044 | ++p; | |
2045 | if (base != 16) | |
2046 | { | |
2047 | if (c == 'e' || c == 'E' || c == 'f' || c == 'F') | |
2048 | goto Lreal; | |
2049 | } | |
2050 | if (c >= 'a') | |
2051 | d = c + 10 - 'a'; | |
2052 | else | |
2053 | d = c + 10 - 'A'; | |
2054 | break; | |
2055 | case 'L': | |
2056 | if (p[1] == 'i') | |
2057 | goto Lreal; | |
2058 | goto Ldone; | |
2059 | case '.': | |
2060 | if (p[1] == '.') | |
2061 | goto Ldone; // if ".." | |
0fb57034 | 2062 | if (base <= 10 && n > 0 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)) |
6384eff5 IB |
2063 | { |
2064 | if (Ccompile && base == 10 && | |
fbdaa581 IB |
2065 | (p[1] == 'e' || p[1] == 'E' || p[1] == 'f' || p[1] == 'F' || p[1] == 'l' || p[1] == 'L')) |
2066 | goto Lreal; // if `1.e6` or `1.f` or `1.L` | |
5fee5ec3 | 2067 | goto Ldone; // if ".identifier" or ".unicode" |
6384eff5 | 2068 | } |
5fee5ec3 IB |
2069 | if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80)) |
2070 | goto Ldone; // if ".identifier" or ".unicode" | |
2071 | if (base == 2) | |
2072 | goto Ldone; // if ".identifier" or ".unicode" | |
2073 | goto Lreal; // otherwise as part of a floating point literal | |
2074 | case 'p': | |
2075 | case 'P': | |
2076 | case 'i': | |
2077 | Lreal: | |
2078 | p = start; | |
2079 | return inreal(t); | |
2080 | case '_': | |
2081 | if (Ccompile) | |
2082 | goto default; | |
2083 | ++p; | |
2084 | continue; | |
2085 | default: | |
2086 | goto Ldone; | |
2087 | } | |
2088 | // got a digit here, set any necessary flags, check for errors | |
2089 | anyHexDigitsNoSingleUS = true; | |
2090 | anyBinaryDigitsNoSingleUS = true; | |
fd43568c | 2091 | if (!errorDigit && d >= base) |
5fee5ec3 | 2092 | { |
fd43568c | 2093 | errorDigit = cast(char) c; |
5fee5ec3 IB |
2094 | } |
2095 | // Avoid expensive overflow check if we aren't at risk of overflow | |
2096 | if (n <= 0x0FFF_FFFF_FFFF_FFFFUL) | |
2097 | n = n * base + d; | |
2098 | else | |
2099 | { | |
2100 | import core.checkedint : mulu, addu; | |
2101 | ||
2102 | n = mulu(n, base, overflow); | |
2103 | n = addu(n, d, overflow); | |
2104 | } | |
2105 | } | |
2106 | Ldone: | |
fd43568c IB |
2107 | if (errorDigit) |
2108 | { | |
2109 | error("%s digit expected, not `%c`", base == 2 ? "binary".ptr : | |
2110 | base == 8 ? "octal".ptr : | |
2111 | "decimal".ptr, errorDigit); | |
2112 | err = true; | |
2113 | } | |
5fee5ec3 IB |
2114 | if (overflow && !err) |
2115 | { | |
2116 | error("integer overflow"); | |
2117 | err = true; | |
2118 | } | |
2119 | if ((base == 2 && !anyBinaryDigitsNoSingleUS) || | |
2120 | (base == 16 && !anyHexDigitsNoSingleUS)) | |
2121 | error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start); | |
2122 | ||
2123 | t.unsvalue = n; | |
2124 | ||
2125 | if (Ccompile) | |
2126 | return cnumber(base, n); | |
2127 | ||
2128 | enum FLAGS : int | |
2129 | { | |
2130 | none = 0, | |
2131 | decimal = 1, // decimal | |
2132 | unsigned = 2, // u or U suffix | |
2133 | long_ = 4, // L suffix | |
2134 | } | |
2135 | ||
2136 | FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none; | |
2137 | // Parse trailing 'u', 'U', 'l' or 'L' in any combination | |
2138 | const psuffix = p; | |
2139 | while (1) | |
2140 | { | |
2141 | FLAGS f; | |
2142 | switch (*p) | |
2143 | { | |
2144 | case 'U': | |
2145 | case 'u': | |
2146 | f = FLAGS.unsigned; | |
2147 | goto L1; | |
2148 | case 'l': | |
2149 | f = FLAGS.long_; | |
2150 | error("lower case integer suffix 'l' is not allowed. Please use 'L' instead"); | |
2151 | goto L1; | |
2152 | case 'L': | |
2153 | f = FLAGS.long_; | |
2154 | L1: | |
2155 | p++; | |
2156 | if ((flags & f) && !err) | |
2157 | { | |
2158 | error("unrecognized token"); | |
2159 | err = true; | |
2160 | } | |
2161 | flags = cast(FLAGS)(flags | f); | |
2162 | continue; | |
2163 | default: | |
2164 | break; | |
2165 | } | |
2166 | break; | |
2167 | } | |
2168 | if (base == 8 && n >= 8) | |
2169 | { | |
2170 | if (err) | |
2171 | // can't translate invalid octal value, just show a generic message | |
2172 | error("octal literals larger than 7 are no longer supported"); | |
2173 | else | |
31350635 | 2174 | error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!\"%llo%.*s\"` instead", |
5fee5ec3 IB |
2175 | n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix); |
2176 | } | |
2177 | TOK result; | |
2178 | switch (flags) | |
2179 | { | |
2180 | case FLAGS.none: | |
2181 | /* Octal or Hexadecimal constant. | |
2182 | * First that fits: int, uint, long, ulong | |
2183 | */ | |
2184 | if (n & 0x8000000000000000L) | |
2185 | result = TOK.uns64Literal; | |
2186 | else if (n & 0xFFFFFFFF00000000L) | |
2187 | result = TOK.int64Literal; | |
2188 | else if (n & 0x80000000) | |
2189 | result = TOK.uns32Literal; | |
2190 | else | |
2191 | result = TOK.int32Literal; | |
2192 | break; | |
2193 | case FLAGS.decimal: | |
2194 | /* First that fits: int, long, long long | |
2195 | */ | |
2196 | if (n & 0x8000000000000000L) | |
2197 | { | |
2198 | result = TOK.uns64Literal; | |
2199 | } | |
2200 | else if (n & 0xFFFFFFFF80000000L) | |
2201 | result = TOK.int64Literal; | |
2202 | else | |
2203 | result = TOK.int32Literal; | |
2204 | break; | |
2205 | case FLAGS.unsigned: | |
2206 | case FLAGS.decimal | FLAGS.unsigned: | |
2207 | /* First that fits: uint, ulong | |
2208 | */ | |
2209 | if (n & 0xFFFFFFFF00000000L) | |
2210 | result = TOK.uns64Literal; | |
2211 | else | |
2212 | result = TOK.uns32Literal; | |
2213 | break; | |
2214 | case FLAGS.decimal | FLAGS.long_: | |
2215 | if (n & 0x8000000000000000L) | |
2216 | { | |
2217 | if (!err) | |
2218 | { | |
2219 | error("signed integer overflow"); | |
2220 | err = true; | |
2221 | } | |
2222 | result = TOK.uns64Literal; | |
2223 | } | |
2224 | else | |
2225 | result = TOK.int64Literal; | |
2226 | break; | |
2227 | case FLAGS.long_: | |
2228 | if (n & 0x8000000000000000L) | |
2229 | result = TOK.uns64Literal; | |
2230 | else | |
2231 | result = TOK.int64Literal; | |
2232 | break; | |
2233 | case FLAGS.unsigned | FLAGS.long_: | |
2234 | case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_: | |
2235 | result = TOK.uns64Literal; | |
2236 | break; | |
2237 | default: | |
2238 | debug | |
2239 | { | |
2240 | printf("%x\n", flags); | |
2241 | } | |
2242 | assert(0); | |
2243 | } | |
2244 | return result; | |
2245 | } | |
2246 | ||
2247 | /************************************** | |
2248 | * Lex C integer-suffix | |
2249 | * Params: | |
2250 | * base = number base | |
2251 | * n = raw integer value | |
2252 | * Returns: | |
2253 | * token value | |
2254 | */ | |
2255 | private TOK cnumber(int base, uinteger_t n) | |
2256 | { | |
2257 | /* C11 6.4.4.1 | |
2258 | * Parse trailing suffixes: | |
2259 | * u or U | |
2260 | * l or L | |
2261 | * ll or LL | |
2262 | */ | |
2263 | enum FLAGS : uint | |
2264 | { | |
2265 | octalhex = 1, // octal or hexadecimal | |
2266 | decimal = 2, // decimal | |
2267 | unsigned = 4, // u or U suffix | |
2268 | long_ = 8, // l or L suffix | |
2269 | llong = 0x10 // ll or LL | |
2270 | } | |
2271 | FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.octalhex; | |
2272 | bool err; | |
2273 | Lsuffixes: | |
2274 | while (1) | |
2275 | { | |
2276 | FLAGS f; | |
2277 | const cs = *p; | |
2278 | switch (cs) | |
2279 | { | |
2280 | case 'U': | |
2281 | case 'u': | |
2282 | f = FLAGS.unsigned; | |
2283 | break; | |
2284 | ||
2285 | case 'l': | |
2286 | case 'L': | |
2287 | f = FLAGS.long_; | |
2288 | if (cs == p[1]) | |
2289 | { | |
2290 | f = FLAGS.long_ | FLAGS.llong; | |
2291 | ++p; | |
2292 | } | |
2293 | break; | |
2294 | ||
2295 | default: | |
2296 | break Lsuffixes; | |
2297 | } | |
2298 | ++p; | |
2299 | if ((flags & f) && !err) | |
2300 | { | |
2301 | error("duplicate integer suffixes"); | |
2302 | err = true; | |
2303 | } | |
2304 | flags = cast(FLAGS)(flags | f); | |
2305 | } | |
2306 | ||
5fee5ec3 IB |
2307 | TOK result = TOK.int32Literal; // default |
2308 | switch (flags) | |
2309 | { | |
2310 | /* Since D doesn't have a variable sized `long` or `unsigned long` type, | |
2311 | * this code deviates from C by picking D int, uint, long, or ulong instead | |
2312 | */ | |
2313 | ||
2314 | case FLAGS.octalhex: | |
2315 | /* Octal or Hexadecimal constant. | |
2316 | * First that fits: int, unsigned, long, unsigned long, | |
2317 | * long long, unsigned long long | |
2318 | */ | |
6384eff5 IB |
2319 | if (n & 0x8000000000000000L) |
2320 | result = TOK.uns64Literal; // unsigned long | |
2321 | else if (n & 0xFFFFFFFF00000000L) | |
2322 | result = TOK.int64Literal; // long | |
2323 | else if (n & 0x80000000) | |
2324 | result = TOK.uns32Literal; | |
5fee5ec3 | 2325 | else |
6384eff5 | 2326 | result = TOK.int32Literal; |
5fee5ec3 IB |
2327 | break; |
2328 | ||
2329 | case FLAGS.decimal: | |
2330 | /* First that fits: int, long, long long | |
2331 | */ | |
6384eff5 IB |
2332 | if (n & 0x8000000000000000L) |
2333 | result = TOK.uns64Literal; // unsigned long | |
2334 | else if (n & 0xFFFFFFFF80000000L) | |
2335 | result = TOK.int64Literal; // long | |
5fee5ec3 | 2336 | else |
6384eff5 | 2337 | result = TOK.int32Literal; |
5fee5ec3 IB |
2338 | break; |
2339 | ||
2340 | case FLAGS.octalhex | FLAGS.unsigned: | |
2341 | case FLAGS.decimal | FLAGS.unsigned: | |
2342 | /* First that fits: unsigned, unsigned long, unsigned long long | |
2343 | */ | |
6384eff5 IB |
2344 | if (n & 0xFFFFFFFF00000000L) |
2345 | result = TOK.uns64Literal; // unsigned long | |
5fee5ec3 | 2346 | else |
6384eff5 | 2347 | result = TOK.uns32Literal; |
5fee5ec3 IB |
2348 | break; |
2349 | ||
2350 | case FLAGS.decimal | FLAGS.long_: | |
2351 | /* First that fits: long, long long | |
2352 | */ | |
1027dc45 | 2353 | if (longsize == 4 || long_longsize == 4) |
5fee5ec3 | 2354 | { |
6384eff5 | 2355 | if (n & 0xFFFFFFFF_80000000L) |
5fee5ec3 IB |
2356 | result = TOK.int64Literal; |
2357 | else | |
6384eff5 | 2358 | result = TOK.int32Literal; // long |
5fee5ec3 IB |
2359 | } |
2360 | else | |
2361 | { | |
6384eff5 | 2362 | result = TOK.int64Literal; // long |
5fee5ec3 IB |
2363 | } |
2364 | break; | |
2365 | ||
2366 | case FLAGS.octalhex | FLAGS.long_: | |
2367 | /* First that fits: long, unsigned long, long long, | |
2368 | * unsigned long long | |
2369 | */ | |
1027dc45 | 2370 | if (longsize == 4 || long_longsize == 4) |
5fee5ec3 IB |
2371 | { |
2372 | if (n & 0x8000000000000000L) | |
2373 | result = TOK.uns64Literal; | |
2374 | else if (n & 0xFFFFFFFF00000000L) | |
2375 | result = TOK.int64Literal; | |
2376 | else if (n & 0x80000000) | |
2377 | result = TOK.uns32Literal; // unsigned long | |
2378 | else | |
2379 | result = TOK.int32Literal; // long | |
2380 | } | |
2381 | else | |
2382 | { | |
2383 | if (n & 0x80000000_00000000L) | |
2384 | result = TOK.uns64Literal; // unsigned long | |
2385 | else | |
2386 | result = TOK.int64Literal; // long | |
2387 | } | |
2388 | break; | |
2389 | ||
2390 | case FLAGS.octalhex | FLAGS.unsigned | FLAGS.long_: | |
2391 | case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_: | |
2392 | /* First that fits: unsigned long, unsigned long long | |
2393 | */ | |
1027dc45 | 2394 | if (longsize == 4 || long_longsize == 4) |
5fee5ec3 IB |
2395 | { |
2396 | if (n & 0xFFFFFFFF00000000L) | |
2397 | result = TOK.uns64Literal; | |
2398 | else | |
2399 | result = TOK.uns32Literal; // unsigned long | |
2400 | } | |
2401 | else | |
2402 | { | |
2403 | result = TOK.uns64Literal; // unsigned long | |
2404 | } | |
2405 | break; | |
2406 | ||
2407 | case FLAGS.octalhex | FLAGS.long_ | FLAGS.llong: | |
2408 | /* First that fits: long long, unsigned long long | |
2409 | */ | |
2410 | if (n & 0x8000000000000000L) | |
2411 | result = TOK.uns64Literal; | |
2412 | else | |
2413 | result = TOK.int64Literal; | |
2414 | break; | |
2415 | ||
2416 | case FLAGS.decimal | FLAGS.long_ | FLAGS.llong: | |
2417 | /* long long | |
2418 | */ | |
2419 | result = TOK.int64Literal; | |
2420 | break; | |
2421 | ||
2422 | case FLAGS.octalhex | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong: | |
2423 | case FLAGS.decimal | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong: | |
2424 | result = TOK.uns64Literal; | |
2425 | break; | |
2426 | ||
2427 | default: | |
2428 | debug printf("%x\n",flags); | |
2429 | assert(0); | |
2430 | } | |
2431 | return result; | |
2432 | } | |
2433 | ||
2434 | /************************************** | |
2435 | * Read in characters, converting them to real. | |
2436 | * Bugs: | |
2437 | * Exponent overflow not detected. | |
2438 | * Too much requested precision is not detected. | |
2439 | */ | |
2440 | private TOK inreal(Token* t) | |
2441 | { | |
2442 | //printf("Lexer::inreal()\n"); | |
2443 | debug | |
2444 | { | |
2445 | assert(*p == '.' || isdigit(*p)); | |
2446 | } | |
2447 | bool isWellformedString = true; | |
2448 | stringbuffer.setsize(0); | |
2449 | auto pstart = p; | |
2450 | bool hex = false; | |
2451 | dchar c = *p++; | |
2452 | // Leading '0x' | |
2453 | if (c == '0') | |
2454 | { | |
2455 | c = *p++; | |
2456 | if (c == 'x' || c == 'X') | |
2457 | { | |
2458 | hex = true; | |
2459 | c = *p++; | |
2460 | } | |
2461 | } | |
2462 | // Digits to left of '.' | |
2463 | while (1) | |
2464 | { | |
2465 | if (c == '.') | |
2466 | { | |
2467 | c = *p++; | |
2468 | break; | |
2469 | } | |
2470 | if (isdigit(c) || (hex && isxdigit(c)) || c == '_') | |
2471 | { | |
2472 | c = *p++; | |
2473 | continue; | |
2474 | } | |
2475 | break; | |
2476 | } | |
2477 | // Digits to right of '.' | |
2478 | while (1) | |
2479 | { | |
2480 | if (isdigit(c) || (hex && isxdigit(c)) || c == '_') | |
2481 | { | |
2482 | c = *p++; | |
2483 | continue; | |
2484 | } | |
2485 | break; | |
2486 | } | |
2487 | if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P'))) | |
2488 | { | |
2489 | c = *p++; | |
2490 | if (c == '-' || c == '+') | |
2491 | { | |
2492 | c = *p++; | |
2493 | } | |
2494 | bool anyexp = false; | |
2495 | while (1) | |
2496 | { | |
2497 | if (isdigit(c)) | |
2498 | { | |
2499 | anyexp = true; | |
2500 | c = *p++; | |
2501 | continue; | |
2502 | } | |
2503 | if (c == '_') | |
2504 | { | |
2505 | if (Ccompile) | |
2506 | error("embedded `_` in numeric literals not allowed"); | |
2507 | c = *p++; | |
2508 | continue; | |
2509 | } | |
2510 | if (!anyexp) | |
2511 | { | |
2512 | error("missing exponent"); | |
2513 | isWellformedString = false; | |
2514 | } | |
2515 | break; | |
2516 | } | |
2517 | } | |
2518 | else if (hex) | |
2519 | { | |
2520 | error("exponent required for hex float"); | |
2521 | isWellformedString = false; | |
2522 | } | |
2523 | --p; | |
2524 | while (pstart < p) | |
2525 | { | |
2526 | if (*pstart != '_') | |
2527 | stringbuffer.writeByte(*pstart); | |
2528 | ++pstart; | |
2529 | } | |
2530 | stringbuffer.writeByte(0); | |
2531 | auto sbufptr = cast(const(char)*)stringbuffer[].ptr; | |
2532 | TOK result; | |
2533 | bool isOutOfRange = false; | |
b7a586be | 2534 | t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, isOutOfRange) : CTFloat.zero); |
5fee5ec3 IB |
2535 | switch (*p) |
2536 | { | |
2537 | case 'F': | |
2538 | case 'f': | |
2539 | if (isWellformedString && !isOutOfRange) | |
2540 | isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr); | |
2541 | result = TOK.float32Literal; | |
2542 | p++; | |
2543 | break; | |
2544 | default: | |
2545 | if (isWellformedString && !isOutOfRange) | |
2546 | isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr); | |
2547 | result = TOK.float64Literal; | |
2548 | break; | |
2549 | case 'l': | |
2550 | if (!Ccompile) | |
2551 | error("use 'L' suffix instead of 'l'"); | |
2552 | goto case 'L'; | |
2553 | case 'L': | |
2554 | ++p; | |
2555 | if (Ccompile && long_doublesize == 8) | |
2556 | goto default; | |
2557 | result = TOK.float80Literal; | |
2558 | break; | |
2559 | } | |
2560 | if ((*p == 'i' || *p == 'I') && !Ccompile) | |
2561 | { | |
2562 | if (*p == 'I') | |
2563 | error("use 'i' suffix instead of 'I'"); | |
2564 | p++; | |
2565 | switch (result) | |
2566 | { | |
2567 | case TOK.float32Literal: | |
2568 | result = TOK.imaginary32Literal; | |
2569 | break; | |
2570 | case TOK.float64Literal: | |
2571 | result = TOK.imaginary64Literal; | |
2572 | break; | |
2573 | case TOK.float80Literal: | |
2574 | result = TOK.imaginary80Literal; | |
2575 | break; | |
2576 | default: | |
2577 | break; | |
2578 | } | |
2579 | } | |
2580 | const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal); | |
fbdaa581 | 2581 | if (isOutOfRange && !isLong && (!Ccompile || hex)) |
5fee5ec3 | 2582 | { |
fbdaa581 IB |
2583 | /* C11 6.4.4.2 doesn't actually care if it is not representable if it is not hex |
2584 | */ | |
c8dfa79c IB |
2585 | const char* suffix = result == TOK.float32Literal ? "f" : result == TOK.float80Literal ? "L" : ""; |
2586 | const char* type = [TOK.float32Literal: "`float`".ptr, | |
2587 | TOK.float64Literal: "`double`".ptr, | |
2588 | TOK.float80Literal: "`real` for the current target".ptr][result]; | |
2589 | error(scanloc, "number `%s%s` is not representable as a %s", sbufptr, suffix, type); | |
2590 | const char* extra = result == TOK.float64Literal ? "`real` literals can be written using the `L` suffix. " : ""; | |
2591 | errorSupplemental(scanloc, "%shttps://dlang.org/spec/lex.html#floatliteral", extra); | |
5fee5ec3 IB |
2592 | } |
2593 | debug | |
2594 | { | |
2595 | switch (result) | |
2596 | { | |
2597 | case TOK.float32Literal: | |
2598 | case TOK.float64Literal: | |
2599 | case TOK.float80Literal: | |
2600 | case TOK.imaginary32Literal: | |
2601 | case TOK.imaginary64Literal: | |
2602 | case TOK.imaginary80Literal: | |
2603 | break; | |
2604 | default: | |
2605 | assert(0); | |
2606 | } | |
2607 | } | |
2608 | return result; | |
2609 | } | |
2610 | ||
2611 | final Loc loc() pure @nogc | |
2612 | { | |
2613 | scanloc.charnum = cast(uint)(1 + p - line); | |
2614 | version (LocOffset) | |
2615 | scanloc.fileOffset = cast(uint)(p - base); | |
2616 | return scanloc; | |
2617 | } | |
2618 | ||
2619 | final void error(const(char)* format, ...) | |
2620 | { | |
2621 | va_list args; | |
2622 | va_start(args, format); | |
2623 | .verror(token.loc, format, args); | |
2624 | va_end(args); | |
2625 | } | |
2626 | ||
2627 | final void error(const ref Loc loc, const(char)* format, ...) | |
2628 | { | |
2629 | va_list args; | |
2630 | va_start(args, format); | |
2631 | .verror(loc, format, args); | |
2632 | va_end(args); | |
2633 | } | |
2634 | ||
2635 | final void deprecation(const(char)* format, ...) | |
2636 | { | |
2637 | va_list args; | |
2638 | va_start(args, format); | |
2639 | .vdeprecation(token.loc, format, args); | |
2640 | va_end(args); | |
2641 | } | |
2642 | ||
7e287503 IB |
2643 | /*************************************** |
2644 | * Parse special token sequence: | |
2645 | * Returns: | |
2646 | * true if the special token sequence was handled | |
2647 | * References: | |
2648 | * https://dlang.org/spec/lex.html#special-token-sequence | |
2649 | */ | |
2650 | bool parseSpecialTokenSequence() | |
2651 | { | |
2652 | Token n; | |
2653 | scan(&n); | |
2654 | if (n.value == TOK.identifier) | |
2655 | { | |
2656 | if (n.ident == Id.line) | |
2657 | { | |
2658 | poundLine(n, false); | |
2659 | return true; | |
2660 | } | |
2661 | else | |
2662 | { | |
2663 | const locx = loc(); | |
2664 | warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars()); | |
2665 | } | |
2666 | } | |
2667 | else if (n.value == TOK.if_) | |
2668 | { | |
2669 | error("C preprocessor directive `#if` is not supported, use `version` or `static if`"); | |
2670 | } | |
2671 | return false; | |
2672 | } | |
2673 | ||
5fee5ec3 IB |
2674 | /********************************************* |
2675 | * Parse line/file preprocessor directive: | |
2676 | * #line linnum [filespec] | |
2677 | * Allow __LINE__ for linnum, and __FILE__ for filespec. | |
2678 | * Accept linemarker format: | |
2679 | * # linnum [filespec] {flags} | |
2680 | * There can be zero or more flags, which are one of the digits 1..4, and | |
2681 | * must be in ascending order. The flags are ignored. | |
2682 | * Params: | |
2683 | * tok = token we're on, which is linnum of linemarker | |
2684 | * linemarker = true if line marker format and lexer is on linnum | |
2685 | * References: | |
2686 | * linemarker https://gcc.gnu.org/onlinedocs/gcc-11.1.0/cpp/Preprocessor-Output.html | |
2687 | */ | |
7e287503 | 2688 | final void poundLine(ref Token tok, bool linemarker) |
5fee5ec3 IB |
2689 | { |
2690 | auto linnum = this.scanloc.linnum; | |
2691 | const(char)* filespec = null; | |
5fee5ec3 IB |
2692 | bool flags; |
2693 | ||
2694 | if (!linemarker) | |
2695 | scan(&tok); | |
2696 | if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal) | |
2697 | { | |
235d5a96 IB |
2698 | const lin = cast(int)(tok.unsvalue); |
2699 | if (lin != tok.unsvalue) | |
2700 | { | |
2701 | error(tok.loc, "line number `%lld` out of range", cast(ulong)tok.unsvalue); | |
2702 | skipToNextLine(); | |
2703 | return; | |
2704 | } | |
5fee5ec3 IB |
2705 | else |
2706 | linnum = lin; | |
2707 | } | |
2708 | else if (tok.value == TOK.line) // #line __LINE__ | |
2709 | { | |
2710 | } | |
2711 | else | |
235d5a96 IB |
2712 | { |
2713 | error(tok.loc, "positive integer argument expected following `#line`"); | |
2714 | if (tok.value != TOK.endOfLine) | |
2715 | skipToNextLine(); | |
2716 | return; | |
2717 | } | |
5fee5ec3 IB |
2718 | while (1) |
2719 | { | |
235d5a96 IB |
2720 | scan(&tok); |
2721 | switch (tok.value) | |
5fee5ec3 | 2722 | { |
235d5a96 IB |
2723 | case TOK.endOfFile: |
2724 | case TOK.endOfLine: | |
5fee5ec3 IB |
2725 | if (!inTokenStringConstant) |
2726 | { | |
2727 | this.scanloc.linnum = linnum; | |
2728 | if (filespec) | |
2729 | this.scanloc.filename = filespec; | |
2730 | } | |
2731 | return; | |
235d5a96 | 2732 | case TOK.file: |
5fee5ec3 IB |
2733 | if (filespec || flags) |
2734 | goto Lerr; | |
235d5a96 IB |
2735 | filespec = mem.xstrdup(scanloc.filename); |
2736 | continue; | |
2737 | case TOK.string_: | |
5fee5ec3 IB |
2738 | if (filespec || flags) |
2739 | goto Lerr; | |
235d5a96 | 2740 | if (tok.ptr[0] != '"' || tok.postfix != 0) |
1027dc45 | 2741 | goto Lerr; |
235d5a96 | 2742 | filespec = tok.ustring; |
5fee5ec3 | 2743 | continue; |
235d5a96 IB |
2744 | case TOK.int32Literal: |
2745 | if (!filespec) | |
2746 | goto Lerr; | |
2747 | if (linemarker && tok.unsvalue >= 1 && tok.unsvalue <= 4) | |
5fee5ec3 | 2748 | { |
235d5a96 IB |
2749 | flags = true; // linemarker flags seen |
2750 | continue; | |
5fee5ec3 IB |
2751 | } |
2752 | goto Lerr; | |
235d5a96 IB |
2753 | default: |
2754 | goto Lerr; | |
5fee5ec3 IB |
2755 | } |
2756 | } | |
2757 | Lerr: | |
235d5a96 IB |
2758 | if (filespec is null) |
2759 | error(tok.loc, "invalid filename for `#line` directive"); | |
2760 | else if (linemarker) | |
2761 | error(tok.loc, "invalid flag for line marker directive"); | |
2762 | else if (!Ccompile) | |
2763 | error(tok.loc, "found `%s` when expecting new line following `#line` directive", tok.toChars()); | |
2764 | if (tok.value != TOK.endOfLine) | |
2765 | skipToNextLine(); | |
5fee5ec3 IB |
2766 | } |
2767 | ||
0fb57034 IB |
2768 | /*************************************** |
2769 | * Scan forward to start of next line. | |
610d7898 IB |
2770 | * Params: |
2771 | * defines = send characters to `defines` | |
0fb57034 | 2772 | */ |
610d7898 | 2773 | final void skipToNextLine(OutBuffer* defines = null) |
0fb57034 IB |
2774 | { |
2775 | while (1) | |
2776 | { | |
2777 | switch (*p) | |
2778 | { | |
2779 | case 0: | |
2780 | case 0x1A: | |
2781 | return; // do not advance p | |
2782 | ||
2783 | case '\n': | |
2784 | ++p; | |
2785 | break; | |
2786 | ||
2787 | case '\r': | |
2788 | ++p; | |
2789 | if (p[0] == '\n') | |
2790 | ++p; | |
2791 | break; | |
2792 | ||
2793 | default: | |
610d7898 IB |
2794 | if (defines) |
2795 | defines.writeByte(*p); // don't care about Unicode line endings for C | |
2796 | else if (*p & 0x80) | |
0fb57034 IB |
2797 | { |
2798 | const u = decodeUTF(); | |
2799 | if (u == PS || u == LS) | |
2800 | { | |
2801 | ++p; | |
2802 | break; | |
2803 | } | |
2804 | } | |
2805 | ++p; | |
2806 | continue; | |
2807 | } | |
2808 | break; | |
2809 | } | |
2810 | endOfLine(); | |
235d5a96 | 2811 | tokenizeNewlines = false; |
0fb57034 IB |
2812 | } |
2813 | ||
5fee5ec3 IB |
2814 | /******************************************** |
2815 | * Decode UTF character. | |
2816 | * Issue error messages for invalid sequences. | |
2817 | * Return decoded character, advance p to last character in UTF sequence. | |
2818 | */ | |
2819 | private uint decodeUTF() | |
2820 | { | |
2821 | const s = p; | |
2822 | assert(*s & 0x80); | |
2823 | // Check length of remaining string up to 4 UTF-8 characters | |
2824 | size_t len; | |
2825 | for (len = 1; len < 4 && s[len]; len++) | |
2826 | { | |
2827 | } | |
2828 | size_t idx = 0; | |
2829 | dchar u; | |
2830 | const msg = utf_decodeChar(s[0 .. len], idx, u); | |
2831 | p += idx - 1; | |
2832 | if (msg) | |
2833 | { | |
2834 | error("%.*s", cast(int)msg.length, msg.ptr); | |
2835 | } | |
2836 | return u; | |
2837 | } | |
2838 | ||
2839 | /*************************************************** | |
2840 | * Parse doc comment embedded between t.ptr and p. | |
2841 | * Remove trailing blanks and tabs from lines. | |
2842 | * Replace all newlines with \n. | |
2843 | * Remove leading comment character from each line. | |
2844 | * Decide if it's a lineComment or a blockComment. | |
2845 | * Append to previous one for this token. | |
2846 | * | |
2847 | * If newParagraph is true, an extra newline will be | |
2848 | * added between adjoining doc comments. | |
2849 | */ | |
2850 | private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure | |
2851 | { | |
2852 | /* ct tells us which kind of comment it is: '/', '*', or '+' | |
2853 | */ | |
2854 | const ct = t.ptr[2]; | |
2855 | /* Start of comment text skips over / * *, / + +, or / / / | |
2856 | */ | |
2857 | const(char)* q = t.ptr + 3; // start of comment text | |
2858 | const(char)* qend = p; | |
2859 | if (ct == '*' || ct == '+') | |
2860 | qend -= 2; | |
2861 | /* Scan over initial row of ****'s or ++++'s or ////'s | |
2862 | */ | |
2863 | for (; q < qend; q++) | |
2864 | { | |
2865 | if (*q != ct) | |
2866 | break; | |
2867 | } | |
2868 | /* Remove leading spaces until start of the comment | |
2869 | */ | |
2870 | int linestart = 0; | |
2871 | if (ct == '/') | |
2872 | { | |
2873 | while (q < qend && (*q == ' ' || *q == '\t')) | |
2874 | ++q; | |
2875 | } | |
2876 | else if (q < qend) | |
2877 | { | |
2878 | if (*q == '\r') | |
2879 | { | |
2880 | ++q; | |
2881 | if (q < qend && *q == '\n') | |
2882 | ++q; | |
2883 | linestart = 1; | |
2884 | } | |
2885 | else if (*q == '\n') | |
2886 | { | |
2887 | ++q; | |
2888 | linestart = 1; | |
2889 | } | |
2890 | } | |
2891 | /* Remove trailing row of ****'s or ++++'s | |
2892 | */ | |
2893 | if (ct != '/') | |
2894 | { | |
2895 | for (; q < qend; qend--) | |
2896 | { | |
2897 | if (qend[-1] != ct) | |
2898 | break; | |
2899 | } | |
2900 | } | |
2901 | /* Comment is now [q .. qend]. | |
2902 | * Canonicalize it into buf[]. | |
2903 | */ | |
2904 | OutBuffer buf; | |
2905 | ||
2906 | void trimTrailingWhitespace() | |
2907 | { | |
2908 | const s = buf[]; | |
2909 | auto len = s.length; | |
2910 | while (len && (s[len - 1] == ' ' || s[len - 1] == '\t')) | |
2911 | --len; | |
2912 | buf.setsize(len); | |
2913 | } | |
2914 | ||
2915 | for (; q < qend; q++) | |
2916 | { | |
2917 | char c = *q; | |
2918 | switch (c) | |
2919 | { | |
2920 | case '*': | |
2921 | case '+': | |
2922 | if (linestart && c == ct) | |
2923 | { | |
2924 | linestart = 0; | |
2925 | /* Trim preceding whitespace up to preceding \n | |
2926 | */ | |
2927 | trimTrailingWhitespace(); | |
2928 | continue; | |
2929 | } | |
2930 | break; | |
2931 | case ' ': | |
2932 | case '\t': | |
2933 | break; | |
2934 | case '\r': | |
2935 | if (q[1] == '\n') | |
2936 | continue; // skip the \r | |
2937 | goto Lnewline; | |
2938 | default: | |
2939 | if (c == 226) | |
2940 | { | |
2941 | // If LS or PS | |
2942 | if (q[1] == 128 && (q[2] == 168 || q[2] == 169)) | |
2943 | { | |
2944 | q += 2; | |
2945 | goto Lnewline; | |
2946 | } | |
2947 | } | |
2948 | linestart = 0; | |
2949 | break; | |
2950 | Lnewline: | |
2951 | c = '\n'; // replace all newlines with \n | |
2952 | goto case; | |
2953 | case '\n': | |
2954 | linestart = 1; | |
2955 | /* Trim trailing whitespace | |
2956 | */ | |
2957 | trimTrailingWhitespace(); | |
2958 | break; | |
2959 | } | |
2960 | buf.writeByte(c); | |
2961 | } | |
2962 | /* Trim trailing whitespace (if the last line does not have newline) | |
2963 | */ | |
2964 | trimTrailingWhitespace(); | |
2965 | ||
2966 | // Always end with a newline | |
2967 | const s = buf[]; | |
2968 | if (s.length == 0 || s[$ - 1] != '\n') | |
2969 | buf.writeByte('\n'); | |
2970 | ||
2971 | // It's a line comment if the start of the doc comment comes | |
2972 | // after other non-whitespace on the same line. | |
2973 | auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment; | |
2974 | // Combine with previous doc comment, if any | |
2975 | if (*dc) | |
2976 | *dc = combineComments(*dc, buf[], newParagraph).toDString(); | |
2977 | else | |
2978 | *dc = buf.extractSlice(true); | |
2979 | } | |
2980 | ||
2981 | /******************************************** | |
2982 | * Combine two document comments into one, | |
2983 | * separated by an extra newline if newParagraph is true. | |
2984 | */ | |
2985 | static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure | |
2986 | { | |
31350635 | 2987 | //debug printf("Lexer::combineComments('%*.s', '%*.s', '%i')\n", cast(int) c1.length, c1.ptr, cast(int) c2.length, c2.ptr, newParagraph); |
5fee5ec3 IB |
2988 | const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n' |
2989 | if (!c1) | |
2990 | return c2.ptr; | |
2991 | if (!c2) | |
2992 | return c1.ptr; | |
2993 | ||
2994 | int insertNewLine = 0; | |
2995 | if (c1.length && c1[$ - 1] != '\n') | |
2996 | insertNewLine = 1; | |
2997 | const retSize = c1.length + insertNewLine + newParagraphSize + c2.length; | |
2998 | auto p = cast(char*)mem.xmalloc_noscan(retSize + 1); | |
2999 | p[0 .. c1.length] = c1[]; | |
3000 | if (insertNewLine) | |
3001 | p[c1.length] = '\n'; | |
3002 | if (newParagraph) | |
3003 | p[c1.length + insertNewLine] = '\n'; | |
3004 | p[retSize - c2.length .. retSize] = c2[]; | |
3005 | p[retSize] = 0; | |
3006 | return p; | |
3007 | } | |
3008 | ||
0fb57034 IB |
3009 | /************************** |
3010 | * `p` should be at start of next line | |
3011 | */ | |
3012 | private void endOfLine() pure @nogc @safe | |
5fee5ec3 IB |
3013 | { |
3014 | scanloc.linnum++; | |
3015 | line = p; | |
3016 | } | |
3017 | } | |
3018 | ||
6384eff5 IB |
3019 | |
3020 | /******************************* Private *****************************************/ | |
3021 | ||
3022 | private: | |
3023 | ||
5fee5ec3 IB |
3024 | /// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__` |
3025 | private struct TimeStampInfo | |
3026 | { | |
3027 | private __gshared bool initdone = false; | |
3028 | ||
3029 | // Note: Those properties need to be guarded by a call to `init` | |
3030 | // The API isn't safe, and quite brittle, but it was left this way | |
3031 | // over performance concerns. | |
3032 | // This is currently only called once, from the lexer. | |
3033 | __gshared char[11 + 1] date; | |
3034 | __gshared char[8 + 1] time; | |
3035 | __gshared char[24 + 1] timestamp; | |
3036 | ||
3037 | public static void initialize(const ref Loc loc) nothrow | |
3038 | { | |
3039 | if (initdone) | |
3040 | return; | |
3041 | ||
3042 | initdone = true; | |
3043 | time_t ct; | |
3044 | // https://issues.dlang.org/show_bug.cgi?id=20444 | |
3045 | if (auto p = getenv("SOURCE_DATE_EPOCH")) | |
3046 | { | |
3047 | if (!ct.parseDigits(p.toDString())) | |
235d5a96 | 3048 | error(loc, "value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p); |
5fee5ec3 IB |
3049 | } |
3050 | else | |
3051 | .time(&ct); | |
3052 | const p = ctime(&ct); | |
3053 | assert(p); | |
3054 | sprintf(&date[0], "%.6s %.4s", p + 4, p + 20); | |
3055 | sprintf(&time[0], "%.8s", p + 11); | |
3056 | sprintf(×tamp[0], "%.24s", p); | |
3057 | } | |
3058 | } | |
3059 | ||
6384eff5 IB |
3060 | private enum LS = 0x2028; // UTF line separator |
3061 | private enum PS = 0x2029; // UTF paragraph separator | |
3062 | ||
3063 | /******************************************** | |
3064 | * Do our own char maps | |
3065 | */ | |
3066 | private static immutable cmtable = () | |
3067 | { | |
3068 | ubyte[256] table; | |
3069 | foreach (const c; 0 .. table.length) | |
3070 | { | |
3071 | if ('0' <= c && c <= '7') | |
3072 | table[c] |= CMoctal; | |
3073 | if (c_isxdigit(c)) | |
3074 | table[c] |= CMhex; | |
3075 | if (c_isalnum(c) || c == '_') | |
3076 | table[c] |= CMidchar; | |
3077 | ||
3078 | switch (c) | |
3079 | { | |
3080 | case 'x': case 'X': | |
3081 | case 'b': case 'B': | |
3082 | table[c] |= CMzerosecond; | |
3083 | break; | |
3084 | ||
3085 | case '0': .. case '9': | |
3086 | case 'e': case 'E': | |
3087 | case 'f': case 'F': | |
3088 | case 'l': case 'L': | |
3089 | case 'p': case 'P': | |
3090 | case 'u': case 'U': | |
3091 | case 'i': | |
3092 | case '.': | |
3093 | case '_': | |
3094 | table[c] |= CMzerosecond | CMdigitsecond; | |
3095 | break; | |
3096 | ||
3097 | default: | |
3098 | break; | |
3099 | } | |
3100 | ||
3101 | switch (c) | |
3102 | { | |
3103 | case '\\': | |
3104 | case '\n': | |
3105 | case '\r': | |
3106 | case 0: | |
3107 | case 0x1A: | |
3108 | case '\'': | |
3109 | break; | |
3110 | default: | |
3111 | if (!(c & 0x80)) | |
3112 | table[c] |= CMsinglechar; | |
3113 | break; | |
3114 | } | |
3115 | } | |
3116 | return table; | |
3117 | }(); | |
3118 | ||
3119 | private | |
3120 | { | |
3121 | enum CMoctal = 0x1; | |
3122 | enum CMhex = 0x2; | |
3123 | enum CMidchar = 0x4; | |
3124 | enum CMzerosecond = 0x8; | |
3125 | enum CMdigitsecond = 0x10; | |
3126 | enum CMsinglechar = 0x20; | |
3127 | } | |
3128 | ||
3129 | private bool isoctal(const char c) pure @nogc @safe | |
3130 | { | |
3131 | return (cmtable[c] & CMoctal) != 0; | |
3132 | } | |
3133 | ||
3134 | private bool ishex(const char c) pure @nogc @safe | |
3135 | { | |
3136 | return (cmtable[c] & CMhex) != 0; | |
3137 | } | |
3138 | ||
3139 | private bool isidchar(const char c) pure @nogc @safe | |
3140 | { | |
3141 | return (cmtable[c] & CMidchar) != 0; | |
3142 | } | |
3143 | ||
3144 | private bool isZeroSecond(const char c) pure @nogc @safe | |
3145 | { | |
3146 | return (cmtable[c] & CMzerosecond) != 0; | |
3147 | } | |
3148 | ||
3149 | private bool isDigitSecond(const char c) pure @nogc @safe | |
3150 | { | |
3151 | return (cmtable[c] & CMdigitsecond) != 0; | |
3152 | } | |
3153 | ||
3154 | private bool issinglechar(const char c) pure @nogc @safe | |
3155 | { | |
3156 | return (cmtable[c] & CMsinglechar) != 0; | |
3157 | } | |
3158 | ||
3159 | private bool c_isxdigit(const int c) pure @nogc @safe | |
3160 | { | |
3161 | return (( c >= '0' && c <= '9') || | |
3162 | ( c >= 'a' && c <= 'f') || | |
3163 | ( c >= 'A' && c <= 'F')); | |
3164 | } | |
3165 | ||
3166 | private bool c_isalnum(const int c) pure @nogc @safe | |
3167 | { | |
3168 | return (( c >= '0' && c <= '9') || | |
3169 | ( c >= 'a' && c <= 'z') || | |
3170 | ( c >= 'A' && c <= 'Z')); | |
3171 | } | |
3172 | ||
3173 | /******************************* Unittest *****************************************/ | |
3174 | ||
5fee5ec3 IB |
3175 | unittest |
3176 | { | |
3177 | import dmd.console; | |
3178 | nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header, | |
3179 | const(char)* format, va_list ap, const(char)* p1, const(char)* p2) | |
3180 | { | |
3181 | assert(0); | |
3182 | } | |
3183 | diagnosticHandler = &assertDiagnosticHandler; | |
3184 | ||
3185 | static void test(T)(string sequence, T expected, bool Ccompile = false) | |
3186 | { | |
3187 | auto p = cast(const(char)*)sequence.ptr; | |
610d7898 IB |
3188 | Lexer lexer = new Lexer(); |
3189 | assert(expected == lexer.escapeSequence(Loc.initial, p, Ccompile)); | |
5fee5ec3 IB |
3190 | assert(p == sequence.ptr + sequence.length); |
3191 | } | |
3192 | ||
3193 | test(`'`, '\''); | |
3194 | test(`"`, '"'); | |
3195 | test(`?`, '?'); | |
3196 | test(`\`, '\\'); | |
3197 | test(`0`, '\0'); | |
3198 | test(`a`, '\a'); | |
3199 | test(`b`, '\b'); | |
3200 | test(`f`, '\f'); | |
3201 | test(`n`, '\n'); | |
3202 | test(`r`, '\r'); | |
3203 | test(`t`, '\t'); | |
3204 | test(`v`, '\v'); | |
3205 | ||
3206 | test(`x00`, 0x00); | |
3207 | test(`xff`, 0xff); | |
3208 | test(`xFF`, 0xff); | |
3209 | test(`xa7`, 0xa7); | |
3210 | test(`x3c`, 0x3c); | |
3211 | test(`xe2`, 0xe2); | |
3212 | ||
3213 | test(`1`, '\1'); | |
3214 | test(`42`, '\42'); | |
3215 | test(`357`, '\357'); | |
3216 | ||
3217 | test(`u1234`, '\u1234'); | |
3218 | test(`uf0e4`, '\uf0e4'); | |
3219 | ||
3220 | test(`U0001f603`, '\U0001f603'); | |
3221 | ||
3222 | test(`"`, '"'); | |
3223 | test(`<`, '<'); | |
3224 | test(`>`, '>'); | |
3225 | ||
3226 | diagnosticHandler = null; | |
3227 | } | |
6384eff5 | 3228 | |
5fee5ec3 IB |
3229 | unittest |
3230 | { | |
3231 | import dmd.console; | |
3232 | string expected; | |
3233 | bool gotError; | |
3234 | ||
3235 | nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header, | |
3236 | const(char)* format, va_list ap, const(char)* p1, const(char)* p2) | |
3237 | { | |
3238 | assert(cast(Classification)headerColor == Classification.error); | |
3239 | ||
3240 | gotError = true; | |
3241 | char[100] buffer = void; | |
3242 | auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)]; | |
3243 | assert(expected == actual); | |
3244 | return true; | |
3245 | } | |
3246 | ||
3247 | diagnosticHandler = &expectDiagnosticHandler; | |
3248 | ||
3249 | void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength, bool Ccompile = false) | |
3250 | { | |
3251 | uint errors = global.errors; | |
3252 | gotError = false; | |
3253 | expected = expectedError; | |
3254 | auto p = cast(const(char)*)sequence.ptr; | |
610d7898 IB |
3255 | Lexer lexer = new Lexer(); |
3256 | auto actualReturnValue = lexer.escapeSequence(Loc.initial, p, Ccompile); | |
5fee5ec3 IB |
3257 | assert(gotError); |
3258 | assert(expectedReturnValue == actualReturnValue); | |
3259 | ||
3260 | auto actualScanLength = p - sequence.ptr; | |
3261 | assert(expectedScanLength == actualScanLength); | |
3262 | global.errors = errors; | |
3263 | } | |
3264 | ||
3265 | test("c", `undefined escape sequence \c`, 'c', 1); | |
3266 | test("!", `undefined escape sequence \!`, '!', 1); | |
3267 | test(""", `undefined escape sequence \&`, '&', 1, true); | |
3268 | ||
3269 | test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2); | |
3270 | ||
3271 | test("u1" , `escape hex sequence has 1 hex digits instead of 4`, 0x1, 2); | |
3272 | test("u12" , `escape hex sequence has 2 hex digits instead of 4`, 0x12, 3); | |
3273 | test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4); | |
3274 | ||
3275 | test("U0" , `escape hex sequence has 1 hex digits instead of 8`, 0x0, 2); | |
3276 | test("U00" , `escape hex sequence has 2 hex digits instead of 8`, 0x00, 3); | |
3277 | test("U000" , `escape hex sequence has 3 hex digits instead of 8`, 0x000, 4); | |
3278 | test("U0000" , `escape hex sequence has 4 hex digits instead of 8`, 0x0000, 5); | |
3279 | test("U0001f" , `escape hex sequence has 5 hex digits instead of 8`, 0x0001f, 6); | |
3280 | test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`, 0x0001f6, 7); | |
3281 | test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8); | |
3282 | ||
3283 | test("ud800" , `invalid UTF character \U0000d800`, '?', 5); | |
3284 | test("udfff" , `invalid UTF character \U0000dfff`, '?', 5); | |
3285 | test("U00110000", `invalid UTF character \U00110000`, '?', 9); | |
3286 | ||
3287 | test("xg0" , `undefined escape hex sequence \xg`, 'g', 2); | |
3288 | test("ug000" , `undefined escape hex sequence \ug`, 'g', 2); | |
3289 | test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2); | |
3290 | ||
3291 | test("&BAD;", `unnamed character entity &BAD;` , '?', 5); | |
3292 | test(""", `unterminated named entity "`, '?', 5); | |
3293 | test(""", `unterminated named entity "`, '?', 5); | |
3294 | ||
3295 | test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3); | |
3296 | ||
3297 | diagnosticHandler = null; | |
3298 | } | |
6384eff5 IB |
3299 | |
3300 | unittest | |
3301 | { | |
3302 | //printf("lexer.unittest\n"); | |
3303 | /* Not much here, just trying things out. | |
3304 | */ | |
3305 | string text = "int"; // We rely on the implicit null-terminator | |
3306 | scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0); | |
3307 | TOK tok; | |
3308 | tok = lex1.nextToken(); | |
3309 | //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32); | |
3310 | assert(tok == TOK.int32); | |
3311 | tok = lex1.nextToken(); | |
3312 | assert(tok == TOK.endOfFile); | |
3313 | tok = lex1.nextToken(); | |
3314 | assert(tok == TOK.endOfFile); | |
3315 | tok = lex1.nextToken(); | |
3316 | assert(tok == TOK.endOfFile); | |
3317 | } | |
3318 | ||
3319 | unittest | |
3320 | { | |
3321 | // We don't want to see Lexer error output during these tests. | |
3322 | uint errors = global.startGagging(); | |
3323 | scope(exit) global.endGagging(errors); | |
3324 | ||
3325 | // Test malformed input: even malformed input should end in a TOK.endOfFile. | |
3326 | static immutable char[][] testcases = | |
3327 | [ // Testcase must end with 0 or 0x1A. | |
3328 | [0], // not malformed, but pathological | |
3329 | ['\'', 0], | |
3330 | ['\'', 0x1A], | |
3331 | ['{', '{', 'q', '{', 0], | |
3332 | [0xFF, 0], | |
3333 | [0xFF, 0x80, 0], | |
3334 | [0xFF, 0xFF, 0], | |
3335 | [0xFF, 0xFF, 0], | |
3336 | ['x', '"', 0x1A], | |
3337 | ]; | |
3338 | ||
3339 | foreach (testcase; testcases) | |
3340 | { | |
3341 | scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0); | |
3342 | TOK tok = lex2.nextToken(); | |
3343 | size_t iterations = 1; | |
3344 | while ((tok != TOK.endOfFile) && (iterations++ < testcase.length)) | |
3345 | { | |
3346 | tok = lex2.nextToken(); | |
3347 | } | |
3348 | assert(tok == TOK.endOfFile); | |
3349 | tok = lex2.nextToken(); | |
3350 | assert(tok == TOK.endOfFile); | |
3351 | } | |
3352 | } |