]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
Test -fbranch-probabilities with computed gotos.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
93c80368
NB
23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
041c3194 36
45b966db
ZW
37#include "config.h"
38#include "system.h"
45b966db
ZW
39#include "cpplib.h"
40#include "cpphash.h"
41
c8a96070
NB
42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
93c80368
NB
54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
f9a0e96c 57{
93c80368
NB
58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
f9a0e96c
ZW
63};
64
93c80368 65struct token_spelling
f9a0e96c 66{
93c80368
NB
67 enum spell_type category;
68 const unsigned char *name;
f9a0e96c
ZW
69};
70
93c80368
NB
71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 82
1444f2ed 83static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
29401c30
NB
84static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 86
041c3194 87static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 88static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d
NB
89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
93c80368
NB
92static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 94static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 95static void unterminated PARAMS ((cpp_reader *, int));
0d9f234d
NB
96static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
29401c30 98static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
cbcff6df 99static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
93c80368 100static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350
NB
101static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, unsigned int *));
f617b8e2 103
93c80368
NB
104static cpp_chunk *new_chunk PARAMS ((unsigned int));
105static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
c8a96070 106static unsigned int hex_digit_value PARAMS ((unsigned int));
15dad1d9 107
041c3194 108/* Utility routine:
9e62c811 109
bfb9dc7f
ZW
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 112
041c3194 113int
bfb9dc7f
ZW
114cpp_ideq (token, string)
115 const cpp_token *token;
041c3194
ZW
116 const char *string;
117{
bfb9dc7f 118 if (token->type != CPP_NAME)
041c3194 119 return 0;
bfb9dc7f 120
a28c5035 121 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
15dad1d9 122}
1368ee70 123
0d9f234d
NB
124/* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
126static cppchar_t
1444f2ed
NB
127handle_newline (pfile, newline_char)
128 cpp_reader *pfile;
0d9f234d
NB
129 cppchar_t newline_char;
130{
1444f2ed 131 cpp_buffer *buffer;
0d9f234d
NB
132 cppchar_t next = EOF;
133
1444f2ed 134 pfile->line++;
1444f2ed 135 buffer = pfile->buffer;
0d9f234d 136 buffer->col_adjust = 0;
0d9f234d
NB
137 buffer->line_base = buffer->cur;
138
139 /* Handle CR-LF and LF-CR combinations, get the next character. */
140 if (buffer->cur < buffer->rlimit)
141 {
142 next = *buffer->cur++;
143 if (next + newline_char == '\r' + '\n')
144 {
145 buffer->line_base = buffer->cur;
146 if (buffer->cur < buffer->rlimit)
147 next = *buffer->cur++;
148 else
149 next = EOF;
150 }
151 }
152
153 buffer->read_ahead = next;
154 return next;
155}
156
157/* Subroutine of skip_escaped_newlines; called when a trigraph is
158 encountered. It warns if necessary, and returns true if the
159 trigraph should be honoured. FROM_CHAR is the third character of a
160 trigraph, and presumed to be the previous character for position
161 reporting. */
45b966db 162static int
0d9f234d 163trigraph_ok (pfile, from_char)
45b966db 164 cpp_reader *pfile;
0d9f234d 165 cppchar_t from_char;
45b966db 166{
041c3194
ZW
167 int accept = CPP_OPTION (pfile, trigraphs);
168
cbcff6df
NB
169 /* Don't warn about trigraphs in comments. */
170 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 171 {
0d9f234d 172 cpp_buffer *buffer = pfile->buffer;
67821e3a 173
041c3194 174 if (accept)
67821e3a 175 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
041c3194 176 "trigraph ??%c converted to %c",
0d9f234d
NB
177 (int) from_char,
178 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
179 else if (buffer->cur != buffer->last_Wtrigraphs)
180 {
181 buffer->last_Wtrigraphs = buffer->cur;
67821e3a 182 cpp_warning_with_line (pfile, pfile->line,
4a5b68a2
NB
183 CPP_BUF_COL (buffer) - 2,
184 "trigraph ??%c ignored", (int) from_char);
185 }
45b966db 186 }
0d9f234d 187
041c3194 188 return accept;
45b966db
ZW
189}
190
0d9f234d
NB
191/* Assumes local variables buffer and result. */
192#define ACCEPT_CHAR(t) \
193 do { result->type = t; buffer->read_ahead = EOF; } while (0)
194
195/* When we move to multibyte character sets, add to these something
196 that saves and restores the state of the multibyte conversion
197 library. This probably involves saving and restoring a "cookie".
198 In the case of glibc it is an 8-byte structure, so is not a high
199 overhead operation. In any case, it's out of the fast path. */
200#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
201#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
202
203/* Skips any escaped newlines introduced by NEXT, which is either a
204 '?' or a '\\'. Returns the next character, which will also have
a5c3cccd
NB
205 been placed in buffer->read_ahead. This routine performs
206 preprocessing stages 1 and 2 of the ISO C standard. */
0d9f234d 207static cppchar_t
29401c30
NB
208skip_escaped_newlines (pfile, next)
209 cpp_reader *pfile;
0d9f234d 210 cppchar_t next;
45b966db 211{
29401c30
NB
212 cpp_buffer *buffer = pfile->buffer;
213
a5c3cccd
NB
214 /* Only do this if we apply stages 1 and 2. */
215 if (!buffer->from_stage3)
041c3194 216 {
a5c3cccd
NB
217 cppchar_t next1;
218 const unsigned char *saved_cur;
219 int space;
220
221 do
0d9f234d 222 {
a5c3cccd
NB
223 if (buffer->cur == buffer->rlimit)
224 break;
225
226 SAVE_STATE ();
227 if (next == '?')
0d9f234d 228 {
a5c3cccd
NB
229 next1 = *buffer->cur++;
230 if (next1 != '?' || buffer->cur == buffer->rlimit)
231 {
232 RESTORE_STATE ();
233 break;
234 }
235
236 next1 = *buffer->cur++;
237 if (!_cpp_trigraph_map[next1]
29401c30 238 || !trigraph_ok (pfile, next1))
a5c3cccd
NB
239 {
240 RESTORE_STATE ();
241 break;
242 }
243
244 /* We have a full trigraph here. */
245 next = _cpp_trigraph_map[next1];
246 if (next != '\\' || buffer->cur == buffer->rlimit)
247 break;
248 SAVE_STATE ();
249 }
250
251 /* We have a backslash, and room for at least one more character. */
252 space = 0;
253 do
254 {
255 next1 = *buffer->cur++;
256 if (!is_nvspace (next1))
257 break;
258 space = 1;
0d9f234d 259 }
a5c3cccd 260 while (buffer->cur < buffer->rlimit);
041c3194 261
a5c3cccd 262 if (!is_vspace (next1))
0d9f234d
NB
263 {
264 RESTORE_STATE ();
265 break;
266 }
45b966db 267
29401c30
NB
268 if (space && !pfile->state.lexing_comment)
269 cpp_warning (pfile, "backslash and newline separated by space");
0d9f234d 270
29401c30 271 next = handle_newline (pfile, next1);
a5c3cccd 272 if (next == EOF)
29401c30 273 cpp_pedwarn (pfile, "backslash-newline at end of file");
0d9f234d 274 }
a5c3cccd 275 while (next == '\\' || next == '?');
041c3194 276 }
45b966db 277
0d9f234d
NB
278 buffer->read_ahead = next;
279 return next;
45b966db
ZW
280}
281
0d9f234d
NB
282/* Obtain the next character, after trigraph conversion and skipping
283 an arbitrary string of escaped newlines. The common case of no
284 trigraphs or escaped newlines falls through quickly. */
285static cppchar_t
29401c30
NB
286get_effective_char (pfile)
287 cpp_reader *pfile;
64aaf407 288{
29401c30 289 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
290 cppchar_t next = EOF;
291
292 if (buffer->cur < buffer->rlimit)
293 {
294 next = *buffer->cur++;
295
296 /* '?' can introduce trigraphs (and therefore backslash); '\\'
297 can introduce escaped newlines, which we want to skip, or
298 UCNs, which, depending upon lexer state, we will handle in
299 the future. */
300 if (next == '?' || next == '\\')
29401c30 301 next = skip_escaped_newlines (pfile, next);
0d9f234d
NB
302 }
303
304 buffer->read_ahead = next;
305 return next;
64aaf407
NB
306}
307
0d9f234d
NB
308/* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
310 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
311static int
312skip_block_comment (pfile)
45b966db
ZW
313 cpp_reader *pfile;
314{
041c3194 315 cpp_buffer *buffer = pfile->buffer;
d8090680 316 cppchar_t c = EOF, prevc = EOF;
0d9f234d 317
cbcff6df 318 pfile->state.lexing_comment = 1;
0d9f234d 319 while (buffer->cur != buffer->rlimit)
45b966db 320 {
0d9f234d
NB
321 prevc = c, c = *buffer->cur++;
322
323 next_char:
324 /* FIXME: For speed, create a new character class of characters
93c80368 325 of interest inside block comments. */
0d9f234d 326 if (c == '?' || c == '\\')
29401c30 327 c = skip_escaped_newlines (pfile, c);
041c3194 328
0d9f234d
NB
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
041c3194 331 if (c == '/')
45b966db 332 {
0d9f234d
NB
333 if (prevc == '*')
334 break;
041c3194 335
0d9f234d
NB
336 /* Warn about potential nested comments, but not if the '/'
337 comes immediately before the true comment delimeter.
041c3194 338 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
339 if (CPP_OPTION (pfile, warn_comments)
340 && buffer->cur != buffer->rlimit)
45b966db 341 {
0d9f234d
NB
342 prevc = c, c = *buffer->cur++;
343 if (c == '*' && buffer->cur != buffer->rlimit)
344 {
345 prevc = c, c = *buffer->cur++;
346 if (c != '/')
67821e3a
NB
347 cpp_warning_with_line (pfile, pfile->line,
348 CPP_BUF_COL (buffer) - 2,
0d9f234d
NB
349 "\"/*\" within comment");
350 }
351 goto next_char;
45b966db 352 }
45b966db 353 }
91fcd158 354 else if (is_vspace (c))
45b966db 355 {
1444f2ed 356 prevc = c, c = handle_newline (pfile, c);
0d9f234d 357 goto next_char;
45b966db 358 }
52fadca8 359 else if (c == '\t')
0d9f234d 360 adjust_column (pfile);
45b966db 361 }
041c3194 362
cbcff6df 363 pfile->state.lexing_comment = 0;
0d9f234d
NB
364 buffer->read_ahead = EOF;
365 return c != '/' || prevc != '*';
45b966db
ZW
366}
367
f9a0e96c 368/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
369 non-zero if a multiline comment. The following new line, if any,
370 is left in buffer->read_ahead. */
041c3194 371static int
cbcff6df
NB
372skip_line_comment (pfile)
373 cpp_reader *pfile;
45b966db 374{
cbcff6df 375 cpp_buffer *buffer = pfile->buffer;
67821e3a 376 unsigned int orig_line = pfile->line;
0d9f234d 377 cppchar_t c;
041c3194 378
cbcff6df 379 pfile->state.lexing_comment = 1;
0d9f234d 380 do
041c3194 381 {
0d9f234d
NB
382 c = EOF;
383 if (buffer->cur == buffer->rlimit)
384 break;
041c3194 385
0d9f234d
NB
386 c = *buffer->cur++;
387 if (c == '?' || c == '\\')
29401c30 388 c = skip_escaped_newlines (pfile, c);
041c3194 389 }
0d9f234d 390 while (!is_vspace (c));
45b966db 391
cbcff6df 392 pfile->state.lexing_comment = 0;
0d9f234d 393 buffer->read_ahead = c; /* Leave any newline for caller. */
67821e3a 394 return orig_line != pfile->line;
041c3194 395}
45b966db 396
0d9f234d
NB
397/* pfile->buffer->cur is one beyond the \t character. Update
398 col_adjust so we track the column correctly. */
52fadca8 399static void
0d9f234d 400adjust_column (pfile)
52fadca8 401 cpp_reader *pfile;
52fadca8 402{
0d9f234d
NB
403 cpp_buffer *buffer = pfile->buffer;
404 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
405
406 /* Round it up to multiple of the tabstop, but subtract 1 since the
407 tab itself occupies a character position. */
0d9f234d
NB
408 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
409 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
410}
411
0d9f234d
NB
412/* Skips whitespace, saving the next non-whitespace character.
413 Adjusts pfile->col_adjust to account for tabs. Without this,
414 tokens might be assigned an incorrect column. */
041c3194 415static void
0d9f234d 416skip_whitespace (pfile, c)
041c3194 417 cpp_reader *pfile;
0d9f234d 418 cppchar_t c;
041c3194
ZW
419{
420 cpp_buffer *buffer = pfile->buffer;
0d9f234d 421 unsigned int warned = 0;
45b966db 422
0d9f234d 423 do
041c3194 424 {
91fcd158
NB
425 /* Horizontal space always OK. */
426 if (c == ' ')
0d9f234d 427 ;
91fcd158 428 else if (c == '\t')
0d9f234d
NB
429 adjust_column (pfile);
430 /* Just \f \v or \0 left. */
91fcd158 431 else if (c == '\0')
041c3194 432 {
91fcd158 433 if (!warned)
0d9f234d
NB
434 {
435 cpp_warning (pfile, "null character(s) ignored");
436 warned = 1;
437 }
45b966db 438 }
93c80368 439 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
67821e3a 440 cpp_pedwarn_with_line (pfile, pfile->line,
91fcd158
NB
441 CPP_BUF_COL (buffer),
442 "%s in preprocessing directive",
443 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
444
445 c = EOF;
446 if (buffer->cur == buffer->rlimit)
447 break;
448 c = *buffer->cur++;
45b966db 449 }
ec5c56db 450 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
451 while (is_nvspace (c));
452
453 /* Remember the next character. */
454 buffer->read_ahead = c;
041c3194 455}
45b966db 456
93c80368
NB
457/* See if the characters of a number token are valid in a name (no
458 '.', '+' or '-'). */
459static int
460name_p (pfile, string)
461 cpp_reader *pfile;
462 const cpp_string *string;
463{
464 unsigned int i;
465
466 for (i = 0; i < string->len; i++)
467 if (!is_idchar (string->text[i]))
468 return 0;
469
470 return 1;
471}
472
0d9f234d
NB
473/* Parse an identifier, skipping embedded backslash-newlines.
474 Calculate the hash value of the token while parsing, for improved
475 performance. The hashing algorithm *must* match cpp_lookup(). */
476
477static cpp_hashnode *
478parse_identifier (pfile, c)
45b966db 479 cpp_reader *pfile;
0d9f234d 480 cppchar_t c;
45b966db 481{
93c80368 482 cpp_hashnode *result;
0d9f234d 483 cpp_buffer *buffer = pfile->buffer;
2a967f3d
NB
484 unsigned int saw_dollar = 0, len;
485 struct obstack *stack = &pfile->hash_table->stack;
041c3194 486
0d9f234d 487 do
041c3194 488 {
0d9f234d 489 do
041c3194 490 {
2a967f3d 491 obstack_1grow (stack, c);
45b966db 492
0d9f234d
NB
493 if (c == '$')
494 saw_dollar++;
ba89d661 495
0d9f234d
NB
496 c = EOF;
497 if (buffer->cur == buffer->rlimit)
498 break;
ba89d661 499
0d9f234d
NB
500 c = *buffer->cur++;
501 }
502 while (is_idchar (c));
ba89d661 503
0d9f234d
NB
504 /* Potential escaped newline? */
505 if (c != '?' && c != '\\')
506 break;
29401c30 507 c = skip_escaped_newlines (pfile, c);
041c3194 508 }
0d9f234d
NB
509 while (is_idchar (c));
510
93c80368
NB
511 /* Remember the next character. */
512 buffer->read_ahead = c;
513
0d9f234d
NB
514 /* $ is not a identifier character in the standard, but is commonly
515 accepted as an extension. Don't warn about it in skipped
516 conditional blocks. */
cef0d199 517 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
0d9f234d
NB
518 cpp_pedwarn (pfile, "'$' character(s) in identifier");
519
93c80368 520 /* Identifiers are null-terminated. */
2a967f3d
NB
521 len = obstack_object_size (stack);
522 obstack_1grow (stack, '\0');
93c80368
NB
523
524 /* This routine commits the memory if necessary. */
2a967f3d
NB
525 result = (cpp_hashnode *)
526 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
93c80368
NB
527
528 /* Some identifiers require diagnostics when lexed. */
cef0d199 529 if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
93c80368
NB
530 {
531 /* It is allowed to poison the same identifier twice. */
532 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
a28c5035
NB
533 cpp_error (pfile, "attempt to use poisoned \"%s\"",
534 NODE_NAME (result));
93c80368
NB
535
536 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
28e0f040 537 replacement list of a variadic macro. */
93c80368
NB
538 if (result == pfile->spec_nodes.n__VA_ARGS__
539 && !pfile->state.va_args_ok)
28e0f040 540 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
93c80368
NB
541 }
542
543 return result;
45b966db
ZW
544}
545
0d9f234d 546/* Parse a number, skipping embedded backslash-newlines. */
45b966db 547static void
93c80368 548parse_number (pfile, number, c, leading_period)
45b966db 549 cpp_reader *pfile;
0d9f234d
NB
550 cpp_string *number;
551 cppchar_t c;
93c80368 552 int leading_period;
45b966db 553{
041c3194 554 cpp_buffer *buffer = pfile->buffer;
49fe13f6 555 cpp_pool *pool = &pfile->ident_pool;
93c80368 556 unsigned char *dest, *limit;
45b966db 557
93c80368
NB
558 dest = POOL_FRONT (pool);
559 limit = POOL_LIMIT (pool);
cbcff6df 560
93c80368
NB
561 /* Place a leading period. */
562 if (leading_period)
563 {
564 if (dest >= limit)
565 limit = _cpp_next_chunk (pool, 0, &dest);
566 *dest++ = '.';
567 }
568
0d9f234d 569 do
041c3194 570 {
0d9f234d
NB
571 do
572 {
93c80368
NB
573 /* Need room for terminating null. */
574 if (dest + 1 >= limit)
575 limit = _cpp_next_chunk (pool, 0, &dest);
576 *dest++ = c;
0d9f234d 577
0d9f234d
NB
578 c = EOF;
579 if (buffer->cur == buffer->rlimit)
580 break;
45b966db 581
0d9f234d
NB
582 c = *buffer->cur++;
583 }
93c80368 584 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 585
0d9f234d
NB
586 /* Potential escaped newline? */
587 if (c != '?' && c != '\\')
588 break;
29401c30 589 c = skip_escaped_newlines (pfile, c);
45b966db 590 }
93c80368 591 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 592
0d9f234d
NB
593 /* Remember the next character. */
594 buffer->read_ahead = c;
64aaf407 595
93c80368
NB
596 /* Null-terminate the number. */
597 *dest = '\0';
598
599 number->text = POOL_FRONT (pool);
600 number->len = dest - number->text;
601 POOL_COMMIT (pool, number->len + 1);
0d9f234d
NB
602}
603
604/* Subroutine of parse_string. Emits error for unterminated strings. */
605static void
93c80368 606unterminated (pfile, term)
0d9f234d 607 cpp_reader *pfile;
0d9f234d
NB
608 int term;
609{
610 cpp_error (pfile, "missing terminating %c character", term);
611
93c80368
NB
612 if (term == '\"' && pfile->mlstring_pos.line
613 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
041c3194 614 {
93c80368
NB
615 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
616 pfile->mlstring_pos.col,
0d9f234d 617 "possible start of unterminated string literal");
93c80368 618 pfile->mlstring_pos.line = 0;
041c3194 619 }
45b966db
ZW
620}
621
93c80368
NB
622/* Subroutine of parse_string. */
623static int
624unescaped_terminator_p (pfile, dest)
625 cpp_reader *pfile;
626 const unsigned char *dest;
627{
628 const unsigned char *start, *temp;
629
630 /* In #include-style directives, terminators are not escapeable. */
631 if (pfile->state.angled_headers)
632 return 1;
633
49fe13f6 634 start = POOL_FRONT (&pfile->ident_pool);
93c80368
NB
635
636 /* An odd number of consecutive backslashes represents an escaped
637 terminator. */
638 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
639 ;
640
641 return ((dest - temp) & 1) == 0;
642}
643
0d9f234d 644/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
645 name. Handles embedded trigraphs and escaped newlines. The stored
646 string is guaranteed NUL-terminated, but it is not guaranteed that
647 this is the first NUL since embedded NULs are preserved.
45b966db 648
7868b4a2 649 Multi-line strings are allowed, but they are deprecated. */
041c3194 650static void
0d9f234d 651parse_string (pfile, token, terminator)
45b966db 652 cpp_reader *pfile;
041c3194 653 cpp_token *token;
0d9f234d 654 cppchar_t terminator;
45b966db 655{
041c3194 656 cpp_buffer *buffer = pfile->buffer;
49fe13f6 657 cpp_pool *pool = &pfile->ident_pool;
93c80368 658 unsigned char *dest, *limit;
0d9f234d 659 cppchar_t c;
d82fc108 660 bool warned_nulls = false, warned_multi = false;
0d9f234d 661
93c80368
NB
662 dest = POOL_FRONT (pool);
663 limit = POOL_LIMIT (pool);
664
0d9f234d 665 for (;;)
45b966db 666 {
0d9f234d 667 if (buffer->cur == buffer->rlimit)
7868b4a2
NB
668 c = EOF;
669 else
670 c = *buffer->cur++;
671
672 have_char:
673 /* We need space for the terminating NUL. */
674 if (dest >= limit)
675 limit = _cpp_next_chunk (pool, 0, &dest);
676
677 if (c == EOF)
0d9f234d 678 {
93c80368 679 unterminated (pfile, terminator);
0d9f234d
NB
680 break;
681 }
0d9f234d 682
0d9f234d
NB
683 /* Handle trigraphs, escaped newlines etc. */
684 if (c == '?' || c == '\\')
29401c30 685 c = skip_escaped_newlines (pfile, c);
45b966db 686
93c80368 687 if (c == terminator && unescaped_terminator_p (pfile, dest))
45b966db 688 {
93c80368
NB
689 c = EOF;
690 break;
0d9f234d
NB
691 }
692 else if (is_vspace (c))
693 {
694 /* In assembly language, silently terminate string and
695 character literals at end of line. This is a kludge
696 around not knowing where comments are. */
bdb05a7b 697 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
0d9f234d 698 break;
45b966db 699
0d9f234d
NB
700 /* Character constants and header names may not extend over
701 multiple lines. In Standard C, neither may strings.
702 Unfortunately, we accept multiline strings as an
16eb2788
NB
703 extension, except in #include family directives. */
704 if (terminator != '"' || pfile->state.angled_headers)
45b966db 705 {
93c80368 706 unterminated (pfile, terminator);
0d9f234d 707 break;
45b966db 708 }
45b966db 709
d82fc108
NB
710 if (!warned_multi)
711 {
712 warned_multi = true;
713 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
714 }
715
93c80368 716 if (pfile->mlstring_pos.line == 0)
c691145a 717 pfile->mlstring_pos = pfile->lexer_pos;
0d9f234d 718
1444f2ed 719 c = handle_newline (pfile, c);
7868b4a2
NB
720 *dest++ = '\n';
721 goto have_char;
0d9f234d 722 }
d82fc108 723 else if (c == '\0' && !warned_nulls)
0d9f234d 724 {
d82fc108
NB
725 warned_nulls = true;
726 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 727 }
45b966db 728
93c80368 729 *dest++ = c;
45b966db
ZW
730 }
731
93c80368 732 /* Remember the next character. */
0d9f234d 733 buffer->read_ahead = c;
7868b4a2 734 *dest = '\0';
45b966db 735
93c80368
NB
736 token->val.str.text = POOL_FRONT (pool);
737 token->val.str.len = dest - token->val.str.text;
7868b4a2 738 POOL_COMMIT (pool, token->val.str.len + 1);
0d9f234d 739}
041c3194 740
93c80368 741/* The stored comment includes the comment start and any terminator. */
9e62c811 742static void
0d9f234d
NB
743save_comment (pfile, token, from)
744 cpp_reader *pfile;
041c3194
ZW
745 cpp_token *token;
746 const unsigned char *from;
9e62c811 747{
041c3194 748 unsigned char *buffer;
0d9f234d 749 unsigned int len;
0d9f234d 750
1c6d33ef 751 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3542203b
NB
752 /* C++ comments probably (not definitely) have moved past a new
753 line, which we don't want to save in the comment. */
754 if (pfile->buffer->read_ahead != EOF)
755 len--;
49fe13f6 756 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
041c3194 757
041c3194 758 token->type = CPP_COMMENT;
bfb9dc7f 759 token->val.str.len = len;
0d9f234d 760 token->val.str.text = buffer;
45b966db 761
1c6d33ef
NB
762 buffer[0] = '/';
763 memcpy (buffer + 1, from, len - 1);
0d9f234d 764}
45b966db 765
cbcff6df
NB
766/* Subroutine of lex_token to handle '%'. A little tricky, since we
767 want to avoid stepping back when lexing %:%X. */
0d9f234d 768static void
29401c30
NB
769lex_percent (pfile, result)
770 cpp_reader *pfile;
0d9f234d 771 cpp_token *result;
0d9f234d 772{
29401c30 773 cpp_buffer *buffer= pfile->buffer;
cbcff6df
NB
774 cppchar_t c;
775
776 result->type = CPP_MOD;
777 /* Parsing %:%X could leave an extra character. */
778 if (buffer->extra_char == EOF)
29401c30 779 c = get_effective_char (pfile);
cbcff6df
NB
780 else
781 {
782 c = buffer->read_ahead = buffer->extra_char;
783 buffer->extra_char = EOF;
784 }
785
786 if (c == '=')
787 ACCEPT_CHAR (CPP_MOD_EQ);
29401c30 788 else if (CPP_OPTION (pfile, digraphs))
cbcff6df
NB
789 {
790 if (c == ':')
791 {
792 result->flags |= DIGRAPH;
793 ACCEPT_CHAR (CPP_HASH);
29401c30 794 if (get_effective_char (pfile) == '%')
cbcff6df 795 {
29401c30 796 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
797 if (buffer->extra_char == ':')
798 {
799 buffer->extra_char = EOF;
800 ACCEPT_CHAR (CPP_PASTE);
801 }
802 else
803 /* We'll catch the extra_char when we're called back. */
804 buffer->read_ahead = '%';
805 }
806 }
807 else if (c == '>')
808 {
809 result->flags |= DIGRAPH;
810 ACCEPT_CHAR (CPP_CLOSE_BRACE);
811 }
812 }
813}
814
815/* Subroutine of lex_token to handle '.'. This is tricky, since we
816 want to avoid stepping back when lexing '...' or '.123'. In the
817 latter case we should also set a flag for parse_number. */
818static void
819lex_dot (pfile, result)
820 cpp_reader *pfile;
821 cpp_token *result;
822{
823 cpp_buffer *buffer = pfile->buffer;
824 cppchar_t c;
825
826 /* Parsing ..X could leave an extra character. */
827 if (buffer->extra_char == EOF)
29401c30 828 c = get_effective_char (pfile);
cbcff6df
NB
829 else
830 {
831 c = buffer->read_ahead = buffer->extra_char;
832 buffer->extra_char = EOF;
833 }
0d9f234d 834
cbcff6df
NB
835 /* All known character sets have 0...9 contiguous. */
836 if (c >= '0' && c <= '9')
837 {
838 result->type = CPP_NUMBER;
93c80368 839 parse_number (pfile, &result->val.str, c, 1);
cbcff6df 840 }
041c3194 841 else
ea4a453b 842 {
cbcff6df
NB
843 result->type = CPP_DOT;
844 if (c == '.')
845 {
29401c30 846 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
847 if (buffer->extra_char == '.')
848 {
849 buffer->extra_char = EOF;
850 ACCEPT_CHAR (CPP_ELLIPSIS);
851 }
852 else
853 /* We'll catch the extra_char when we're called back. */
854 buffer->read_ahead = '.';
855 }
856 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
857 ACCEPT_CHAR (CPP_DOT_STAR);
ea4a453b 858 }
45b966db
ZW
859}
860
93c80368
NB
861void
862_cpp_lex_token (pfile, result)
45b966db 863 cpp_reader *pfile;
0d9f234d 864 cpp_token *result;
45b966db 865{
0d9f234d 866 cppchar_t c;
adb84b42 867 cpp_buffer *buffer;
0d9f234d 868 const unsigned char *comment_start;
67821e3a 869 int bol;
9ec7291f 870
67821e3a 871 next_token:
adb84b42 872 buffer = pfile->buffer;
bd969772
NB
873 result->flags = buffer->saved_flags;
874 buffer->saved_flags = 0;
67821e3a
NB
875 bol = (buffer->cur <= buffer->line_base + 1
876 && pfile->lexer_pos.output_line == pfile->line);
0d9f234d 877 next_char:
67821e3a 878 pfile->lexer_pos.line = pfile->line;
1444f2ed 879 result->line = pfile->line;
0d9f234d 880 next_char2:
93c80368 881 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
041c3194 882
0d9f234d
NB
883 c = buffer->read_ahead;
884 if (c == EOF && buffer->cur < buffer->rlimit)
885 {
886 c = *buffer->cur++;
93c80368 887 pfile->lexer_pos.col++;
0d9f234d 888 }
1444f2ed 889 result->col = pfile->lexer_pos.col;
45b966db 890
0d9f234d
NB
891 do_switch:
892 buffer->read_ahead = EOF;
893 switch (c)
45b966db 894 {
0d9f234d 895 case EOF:
d7bc7a98
NB
896 /* Non-empty files should end in a newline. Don't warn for
897 command line and _Pragma buffers. */
898 if (pfile->lexer_pos.col != 0)
899 {
900 /* Account for the missing \n, prevent multiple warnings. */
901 pfile->line++;
902 pfile->lexer_pos.col = 0;
903 if (!buffer->from_stage3)
904 cpp_pedwarn (pfile, "no newline at end of file");
905 }
906
67821e3a
NB
907 /* To prevent bogus diagnostics, only pop the buffer when
908 in-progress directives and arguments have been taken care of.
909 Decrement the line to terminate an in-progress directive. */
910 if (pfile->state.in_directive)
bb74c963 911 pfile->lexer_pos.output_line = pfile->line--;
67821e3a 912 else if (! pfile->state.parsing_args)
ef6e958a 913 {
d7bc7a98 914 /* Don't pop the last buffer. */
7364fdd8
NB
915 if (buffer->prev)
916 {
917 unsigned char stop = buffer->return_at_eof;
918
919 _cpp_pop_buffer (pfile);
920 if (!stop)
921 goto next_token;
922 }
ef6e958a 923 }
0d9f234d 924 result->type = CPP_EOF;
29b10746 925 return;
45b966db 926
0d9f234d
NB
927 case ' ': case '\t': case '\f': case '\v': case '\0':
928 skip_whitespace (pfile, c);
929 result->flags |= PREV_WHITE;
930 goto next_char2;
931
932 case '\n': case '\r':
67821e3a 933 if (pfile->state.in_directive)
45b966db 934 {
67821e3a
NB
935 result->type = CPP_EOF;
936 if (pfile->state.parsing_args)
937 buffer->read_ahead = c;
938 else
939 {
940 handle_newline (pfile, c);
941 /* Decrementing pfile->line allows directives to
942 recognise that the newline has been seen, and also
943 means that diagnostics don't point to the next line. */
944 pfile->lexer_pos.output_line = pfile->line--;
945 }
946 return;
45b966db 947 }
93c80368 948
67821e3a
NB
949 handle_newline (pfile, c);
950 /* This is a new line, so clear any white space flag. Newlines
951 in arguments are white space (6.10.3.10); parse_arg takes
952 care of that. */
953 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
954 bol = 1;
955 if (pfile->state.parsing_args != 2)
956 pfile->lexer_pos.output_line = pfile->line;
957 goto next_char;
46d07497 958
0d9f234d
NB
959 case '?':
960 case '\\':
961 /* These could start an escaped newline, or '?' a trigraph. Let
962 skip_escaped_newlines do all the work. */
963 {
67821e3a 964 unsigned int line = pfile->line;
0d9f234d 965
29401c30 966 c = skip_escaped_newlines (pfile, c);
67821e3a 967 if (line != pfile->line)
0d9f234d
NB
968 /* We had at least one escaped newline of some sort, and the
969 next character is in buffer->read_ahead. Update the
970 token's line and column. */
971 goto next_char;
972
973 /* We are either the original '?' or '\\', or a trigraph. */
974 result->type = CPP_QUERY;
975 buffer->read_ahead = EOF;
976 if (c == '\\')
12c4f523 977 goto random_char;
0d9f234d
NB
978 else if (c != '?')
979 goto do_switch;
980 }
981 break;
46d07497 982
0d9f234d
NB
983 case '0': case '1': case '2': case '3': case '4':
984 case '5': case '6': case '7': case '8': case '9':
985 result->type = CPP_NUMBER;
93c80368 986 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 987 break;
46d07497 988
0d9f234d
NB
989 case '$':
990 if (!CPP_OPTION (pfile, dollars_in_ident))
991 goto random_char;
ec5c56db 992 /* Fall through... */
0d9f234d
NB
993
994 case '_':
995 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
996 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
997 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
998 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
999 case 'y': case 'z':
1000 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1001 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1002 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1003 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1004 case 'Y': case 'Z':
1005 result->type = CPP_NAME;
1006 result->val.node = parse_identifier (pfile, c);
1007
1008 /* 'L' may introduce wide characters or strings. */
93c80368 1009 if (result->val.node == pfile->spec_nodes.n_L)
0d9f234d
NB
1010 {
1011 c = buffer->read_ahead; /* For make_string. */
1012 if (c == '\'' || c == '"')
ba89d661 1013 {
0d9f234d
NB
1014 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1015 goto make_string;
ba89d661 1016 }
0d9f234d
NB
1017 }
1018 /* Convert named operators to their proper types. */
93c80368 1019 else if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1020 {
1021 result->flags |= NAMED_OP;
93c80368 1022 result->type = result->val.node->value.operator;
0d9f234d
NB
1023 }
1024 break;
1025
1026 case '\'':
1027 case '"':
1028 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1029 make_string:
1030 parse_string (pfile, result, c);
1031 break;
041c3194 1032
0d9f234d 1033 case '/':
1c6d33ef
NB
1034 /* A potential block or line comment. */
1035 comment_start = buffer->cur;
0d9f234d 1036 result->type = CPP_DIV;
29401c30 1037 c = get_effective_char (pfile);
0d9f234d
NB
1038 if (c == '=')
1039 ACCEPT_CHAR (CPP_DIV_EQ);
1c6d33ef
NB
1040 if (c != '/' && c != '*')
1041 break;
e61fc951 1042
1c6d33ef
NB
1043 if (c == '*')
1044 {
0d9f234d 1045 if (skip_block_comment (pfile))
67821e3a 1046 cpp_error (pfile, "unterminated comment");
0d9f234d 1047 }
1c6d33ef 1048 else
0d9f234d 1049 {
1c6d33ef
NB
1050 if (!CPP_OPTION (pfile, cplusplus_comments)
1051 && !CPP_IN_SYSTEM_HEADER (pfile))
1052 break;
1053
bdb05a7b
NB
1054 /* Warn about comments only if pedantically GNUC89, and not
1055 in system headers. */
1056 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1057 && ! buffer->warned_cplusplus_comments)
041c3194 1058 {
1c6d33ef
NB
1059 cpp_pedwarn (pfile,
1060 "C++ style comments are not allowed in ISO C89");
1061 cpp_pedwarn (pfile,
1062 "(this will be reported only once per input file)");
1063 buffer->warned_cplusplus_comments = 1;
1064 }
0d9f234d 1065
a94c1199 1066 /* Skip_line_comment updates buffer->read_ahead. */
01ef6563 1067 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
93c80368
NB
1068 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1069 pfile->lexer_pos.col,
1c6d33ef
NB
1070 "multi-line comment");
1071 }
0d9f234d 1072
1c6d33ef
NB
1073 /* Skipping the comment has updated buffer->read_ahead. */
1074 if (!pfile->state.save_comments)
1075 {
1076 result->flags |= PREV_WHITE;
1077 goto next_char;
0d9f234d 1078 }
1c6d33ef
NB
1079
1080 /* Save the comment as a token in its own right. */
1081 save_comment (pfile, result, comment_start);
29b10746
NB
1082 /* Don't do MI optimisation. */
1083 return;
0d9f234d
NB
1084
1085 case '<':
1086 if (pfile->state.angled_headers)
1087 {
1088 result->type = CPP_HEADER_NAME;
1089 c = '>'; /* terminator. */
1090 goto make_string;
1091 }
45b966db 1092
0d9f234d 1093 result->type = CPP_LESS;
29401c30 1094 c = get_effective_char (pfile);
0d9f234d
NB
1095 if (c == '=')
1096 ACCEPT_CHAR (CPP_LESS_EQ);
1097 else if (c == '<')
1098 {
1099 ACCEPT_CHAR (CPP_LSHIFT);
29401c30 1100 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1101 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1102 }
1103 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1104 {
1105 ACCEPT_CHAR (CPP_MIN);
29401c30 1106 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1107 ACCEPT_CHAR (CPP_MIN_EQ);
1108 }
1109 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1110 {
1111 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1112 result->flags |= DIGRAPH;
1113 }
1114 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1115 {
1116 ACCEPT_CHAR (CPP_OPEN_BRACE);
1117 result->flags |= DIGRAPH;
1118 }
1119 break;
1120
1121 case '>':
1122 result->type = CPP_GREATER;
29401c30 1123 c = get_effective_char (pfile);
0d9f234d
NB
1124 if (c == '=')
1125 ACCEPT_CHAR (CPP_GREATER_EQ);
1126 else if (c == '>')
1127 {
1128 ACCEPT_CHAR (CPP_RSHIFT);
29401c30 1129 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1130 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1131 }
1132 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1133 {
1134 ACCEPT_CHAR (CPP_MAX);
29401c30 1135 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1136 ACCEPT_CHAR (CPP_MAX_EQ);
1137 }
1138 break;
1139
cbcff6df 1140 case '%':
29401c30 1141 lex_percent (pfile, result);
93c80368
NB
1142 if (result->type == CPP_HASH)
1143 goto do_hash;
0d9f234d
NB
1144 break;
1145
cbcff6df
NB
1146 case '.':
1147 lex_dot (pfile, result);
0d9f234d 1148 break;
45b966db 1149
0d9f234d
NB
1150 case '+':
1151 result->type = CPP_PLUS;
29401c30 1152 c = get_effective_char (pfile);
0d9f234d
NB
1153 if (c == '=')
1154 ACCEPT_CHAR (CPP_PLUS_EQ);
1155 else if (c == '+')
1156 ACCEPT_CHAR (CPP_PLUS_PLUS);
1157 break;
04e3ec78 1158
0d9f234d
NB
1159 case '-':
1160 result->type = CPP_MINUS;
29401c30 1161 c = get_effective_char (pfile);
0d9f234d
NB
1162 if (c == '>')
1163 {
1164 ACCEPT_CHAR (CPP_DEREF);
1165 if (CPP_OPTION (pfile, cplusplus)
29401c30 1166 && get_effective_char (pfile) == '*')
0d9f234d
NB
1167 ACCEPT_CHAR (CPP_DEREF_STAR);
1168 }
1169 else if (c == '=')
1170 ACCEPT_CHAR (CPP_MINUS_EQ);
1171 else if (c == '-')
1172 ACCEPT_CHAR (CPP_MINUS_MINUS);
1173 break;
45b966db 1174
0d9f234d
NB
1175 case '*':
1176 result->type = CPP_MULT;
29401c30 1177 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1178 ACCEPT_CHAR (CPP_MULT_EQ);
1179 break;
04e3ec78 1180
0d9f234d
NB
1181 case '=':
1182 result->type = CPP_EQ;
29401c30 1183 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1184 ACCEPT_CHAR (CPP_EQ_EQ);
1185 break;
f8f769ea 1186
0d9f234d
NB
1187 case '!':
1188 result->type = CPP_NOT;
29401c30 1189 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1190 ACCEPT_CHAR (CPP_NOT_EQ);
1191 break;
45b966db 1192
0d9f234d
NB
1193 case '&':
1194 result->type = CPP_AND;
29401c30 1195 c = get_effective_char (pfile);
0d9f234d
NB
1196 if (c == '=')
1197 ACCEPT_CHAR (CPP_AND_EQ);
1198 else if (c == '&')
1199 ACCEPT_CHAR (CPP_AND_AND);
1200 break;
1201
1202 case '#':
adb84b42
NB
1203 c = buffer->extra_char; /* Can be set by error condition below. */
1204 if (c != EOF)
1205 {
1206 buffer->read_ahead = c;
1207 buffer->extra_char = EOF;
1208 }
1209 else
29401c30 1210 c = get_effective_char (pfile);
adb84b42
NB
1211
1212 if (c == '#')
93c80368 1213 {
a949941c
NB
1214 ACCEPT_CHAR (CPP_PASTE);
1215 break;
1216 }
1217
1218 result->type = CPP_HASH;
1219 do_hash:
e8408f25
NB
1220 if (!bol)
1221 break;
1222 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1223 tokens within the list of arguments that would otherwise act
1224 as preprocessing directives, the behavior is undefined.
1225
1226 This implementation will report a hard error, terminate the
1227 macro invocation, and proceed to process the directive. */
1228 if (pfile->state.parsing_args)
a949941c 1229 {
67821e3a 1230 pfile->lexer_pos.output_line = pfile->line;
e8408f25 1231 if (pfile->state.parsing_args == 2)
67821e3a
NB
1232 {
1233 cpp_error (pfile,
1234 "directives may not be used inside a macro argument");
1235 result->type = CPP_EOF;
1236 }
e8408f25 1237 }
67821e3a
NB
1238 /* in_directive can be true inside a _Pragma. */
1239 else if (!pfile->state.in_directive)
e8408f25 1240 {
67821e3a
NB
1241 /* This is the hash introducing a directive. If the return
1242 value is false, it is an assembler #. */
e8408f25 1243 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
67821e3a 1244 goto next_token;
93c80368 1245 }
0d9f234d 1246 break;
45b966db 1247
0d9f234d
NB
1248 case '|':
1249 result->type = CPP_OR;
29401c30 1250 c = get_effective_char (pfile);
0d9f234d
NB
1251 if (c == '=')
1252 ACCEPT_CHAR (CPP_OR_EQ);
1253 else if (c == '|')
1254 ACCEPT_CHAR (CPP_OR_OR);
1255 break;
45b966db 1256
0d9f234d
NB
1257 case '^':
1258 result->type = CPP_XOR;
29401c30 1259 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1260 ACCEPT_CHAR (CPP_XOR_EQ);
1261 break;
45b966db 1262
0d9f234d
NB
1263 case ':':
1264 result->type = CPP_COLON;
29401c30 1265 c = get_effective_char (pfile);
0d9f234d
NB
1266 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1267 ACCEPT_CHAR (CPP_SCOPE);
1268 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1269 {
1270 result->flags |= DIGRAPH;
1271 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1272 }
1273 break;
45b966db 1274
0d9f234d
NB
1275 case '~': result->type = CPP_COMPL; break;
1276 case ',': result->type = CPP_COMMA; break;
1277 case '(': result->type = CPP_OPEN_PAREN; break;
1278 case ')': result->type = CPP_CLOSE_PAREN; break;
1279 case '[': result->type = CPP_OPEN_SQUARE; break;
1280 case ']': result->type = CPP_CLOSE_SQUARE; break;
1281 case '{': result->type = CPP_OPEN_BRACE; break;
1282 case '}': result->type = CPP_CLOSE_BRACE; break;
1283 case ';': result->type = CPP_SEMICOLON; break;
1284
cc937581
ZW
1285 /* @ is a punctuator in Objective C. */
1286 case '@': result->type = CPP_ATSIGN; break;
0d9f234d
NB
1287
1288 random_char:
1289 default:
1290 result->type = CPP_OTHER;
6c53ebff 1291 result->val.c = c;
0d9f234d
NB
1292 break;
1293 }
b528a07e 1294
cef0d199 1295 if (!pfile->state.in_directive && pfile->state.skipping)
67821e3a 1296 goto next_char;
8d9e9a08 1297
29b10746
NB
1298 /* If not in a directive, this token invalidates controlling macros. */
1299 if (!pfile->state.in_directive)
6d18adbc 1300 pfile->mi_valid = false;
0d9f234d
NB
1301}
1302
93c80368
NB
1303/* An upper bound on the number of bytes needed to spell a token,
1304 including preceding whitespace. */
1305unsigned int
1306cpp_token_len (token)
1307 const cpp_token *token;
0d9f234d 1308{
93c80368 1309 unsigned int len;
6d2c2047 1310
93c80368 1311 switch (TOKEN_SPELL (token))
041c3194 1312 {
a28c5035
NB
1313 default: len = 0; break;
1314 case SPELL_STRING: len = token->val.str.len; break;
1315 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1316 }
93c80368
NB
1317 /* 1 for whitespace, 4 for comment delimeters. */
1318 return len + 5;
6d2c2047
ZW
1319}
1320
041c3194 1321/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1322 already contain the enough space to hold the token's spelling.
1323 Returns a pointer to the character after the last character
1324 written. */
93c80368
NB
1325unsigned char *
1326cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1327 cpp_reader *pfile; /* Would be nice to be rid of this... */
1328 const cpp_token *token;
1329 unsigned char *buffer;
1330{
96be6998 1331 switch (TOKEN_SPELL (token))
041c3194
ZW
1332 {
1333 case SPELL_OPERATOR:
1334 {
1335 const unsigned char *spelling;
1336 unsigned char c;
d6d5f795 1337
041c3194 1338 if (token->flags & DIGRAPH)
37b8524c
JDA
1339 spelling
1340 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1341 else if (token->flags & NAMED_OP)
1342 goto spell_ident;
041c3194 1343 else
96be6998 1344 spelling = TOKEN_NAME (token);
041c3194
ZW
1345
1346 while ((c = *spelling++) != '\0')
1347 *buffer++ = c;
1348 }
1349 break;
d6d5f795 1350
041c3194 1351 case SPELL_IDENT:
92936ecf 1352 spell_ident:
a28c5035
NB
1353 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1354 buffer += NODE_LEN (token->val.node);
041c3194 1355 break;
d6d5f795 1356
041c3194
ZW
1357 case SPELL_STRING:
1358 {
ba89d661
ZW
1359 int left, right, tag;
1360 switch (token->type)
1361 {
1362 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1363 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1364 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1365 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1366 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1367 default: left = '\0'; right = '\0'; tag = '\0'; break;
1368 }
1369 if (tag) *buffer++ = tag;
1370 if (left) *buffer++ = left;
bfb9dc7f
ZW
1371 memcpy (buffer, token->val.str.text, token->val.str.len);
1372 buffer += token->val.str.len;
ba89d661 1373 if (right) *buffer++ = right;
041c3194
ZW
1374 }
1375 break;
d6d5f795 1376
041c3194 1377 case SPELL_CHAR:
6c53ebff 1378 *buffer++ = token->val.c;
041c3194 1379 break;
d6d5f795 1380
041c3194 1381 case SPELL_NONE:
96be6998 1382 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1383 break;
1384 }
d6d5f795 1385
041c3194
ZW
1386 return buffer;
1387}
d6d5f795 1388
93c80368
NB
1389/* Returns a token as a null-terminated string. The string is
1390 temporary, and automatically freed later. Useful for diagnostics. */
1391unsigned char *
1392cpp_token_as_text (pfile, token)
c5a04734 1393 cpp_reader *pfile;
041c3194 1394 const cpp_token *token;
c5a04734 1395{
93c80368 1396 unsigned int len = cpp_token_len (token);
49fe13f6 1397 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
c5a04734 1398
93c80368
NB
1399 end = cpp_spell_token (pfile, token, start);
1400 end[0] = '\0';
c5a04734 1401
93c80368
NB
1402 return start;
1403}
c5a04734 1404
93c80368
NB
1405/* Used by C front ends. Should really move to using cpp_token_as_text. */
1406const char *
1407cpp_type2name (type)
1408 enum cpp_ttype type;
1409{
1410 return (const char *) token_spellings[type].name;
1411}
c5a04734 1412
93c80368
NB
1413/* Writes the spelling of token to FP. Separate from cpp_spell_token
1414 for efficiency - to avoid double-buffering. Also, outputs a space
1415 if PREV_WHITE is flagged. */
1416void
1417cpp_output_token (token, fp)
1418 const cpp_token *token;
1419 FILE *fp;
1420{
1421 if (token->flags & PREV_WHITE)
1422 putc (' ', fp);
d8090680 1423
93c80368 1424 switch (TOKEN_SPELL (token))
c5a04734 1425 {
93c80368
NB
1426 case SPELL_OPERATOR:
1427 {
1428 const unsigned char *spelling;
c5a04734 1429
93c80368 1430 if (token->flags & DIGRAPH)
37b8524c
JDA
1431 spelling
1432 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1433 else if (token->flags & NAMED_OP)
1434 goto spell_ident;
1435 else
1436 spelling = TOKEN_NAME (token);
041c3194 1437
93c80368
NB
1438 ufputs (spelling, fp);
1439 }
1440 break;
041c3194 1441
93c80368
NB
1442 spell_ident:
1443 case SPELL_IDENT:
a28c5035 1444 ufputs (NODE_NAME (token->val.node), fp);
93c80368 1445 break;
041c3194 1446
93c80368
NB
1447 case SPELL_STRING:
1448 {
1449 int left, right, tag;
1450 switch (token->type)
1451 {
1452 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1453 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1454 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1455 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1456 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1457 default: left = '\0'; right = '\0'; tag = '\0'; break;
1458 }
1459 if (tag) putc (tag, fp);
1460 if (left) putc (left, fp);
1461 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1462 if (right) putc (right, fp);
1463 }
1464 break;
c5a04734 1465
93c80368 1466 case SPELL_CHAR:
6c53ebff 1467 putc (token->val.c, fp);
93c80368 1468 break;
c5a04734 1469
93c80368
NB
1470 case SPELL_NONE:
1471 /* An error, most probably. */
1472 break;
041c3194 1473 }
c5a04734
ZW
1474}
1475
93c80368
NB
1476/* Compare two tokens. */
1477int
1478_cpp_equiv_tokens (a, b)
1479 const cpp_token *a, *b;
c5a04734 1480{
93c80368
NB
1481 if (a->type == b->type && a->flags == b->flags)
1482 switch (TOKEN_SPELL (a))
1483 {
1484 default: /* Keep compiler happy. */
1485 case SPELL_OPERATOR:
1486 return 1;
1487 case SPELL_CHAR:
6c53ebff 1488 return a->val.c == b->val.c; /* Character. */
93c80368 1489 case SPELL_NONE:
56051c0a 1490 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1491 case SPELL_IDENT:
1492 return a->val.node == b->val.node;
1493 case SPELL_STRING:
1494 return (a->val.str.len == b->val.str.len
1495 && !memcmp (a->val.str.text, b->val.str.text,
1496 a->val.str.len));
1497 }
c5a04734 1498
041c3194
ZW
1499 return 0;
1500}
1501
041c3194
ZW
1502/* Determine whether two tokens can be pasted together, and if so,
1503 what the resulting token is. Returns CPP_EOF if the tokens cannot
1504 be pasted, or the appropriate type for the merged token if they
1505 can. */
7de4d004 1506enum cpp_ttype
93c80368 1507cpp_can_paste (pfile, token1, token2, digraph)
041c3194
ZW
1508 cpp_reader * pfile;
1509 const cpp_token *token1, *token2;
1510 int* digraph;
c5a04734 1511{
041c3194
ZW
1512 enum cpp_ttype a = token1->type, b = token2->type;
1513 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 1514
92936ecf
ZW
1515 /* Treat named operators as if they were ordinary NAMEs. */
1516 if (token1->flags & NAMED_OP)
1517 a = CPP_NAME;
1518 if (token2->flags & NAMED_OP)
1519 b = CPP_NAME;
1520
37b8524c
JDA
1521 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1522 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
c5a04734 1523
041c3194 1524 switch (a)
c5a04734 1525 {
041c3194
ZW
1526 case CPP_GREATER:
1527 if (b == a) return CPP_RSHIFT;
1528 if (b == CPP_QUERY && cxx) return CPP_MAX;
1529 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1530 break;
1531 case CPP_LESS:
1532 if (b == a) return CPP_LSHIFT;
1533 if (b == CPP_QUERY && cxx) return CPP_MIN;
1534 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
1535 if (CPP_OPTION (pfile, digraphs))
1536 {
1537 if (b == CPP_COLON)
1538 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1539 if (b == CPP_MOD)
1540 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1541 }
041c3194 1542 break;
c5a04734 1543
041c3194
ZW
1544 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1545 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1546 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 1547
041c3194
ZW
1548 case CPP_MINUS:
1549 if (b == a) return CPP_MINUS_MINUS;
1550 if (b == CPP_GREATER) return CPP_DEREF;
1551 break;
1552 case CPP_COLON:
1553 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 1554 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1555 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1556 break;
1557
1558 case CPP_MOD:
9b55f29a
NB
1559 if (CPP_OPTION (pfile, digraphs))
1560 {
1561 if (b == CPP_GREATER)
1562 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1563 if (b == CPP_COLON)
1564 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1565 }
041c3194
ZW
1566 break;
1567 case CPP_DEREF:
1568 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1569 break;
1570 case CPP_DOT:
1571 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1572 if (b == CPP_NUMBER) return CPP_NUMBER;
1573 break;
1574
1575 case CPP_HASH:
1576 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1577 /* %:%: digraph */
1578 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1579 break;
1580
1581 case CPP_NAME:
1582 if (b == CPP_NAME) return CPP_NAME;
1583 if (b == CPP_NUMBER
93c80368 1584 && name_p (pfile, &token2->val.str)) return CPP_NAME;
041c3194 1585 if (b == CPP_CHAR
93c80368 1586 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
041c3194 1587 if (b == CPP_STRING
93c80368 1588 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
041c3194
ZW
1589 break;
1590
1591 case CPP_NUMBER:
1592 if (b == CPP_NUMBER) return CPP_NUMBER;
1593 if (b == CPP_NAME) return CPP_NUMBER;
1594 if (b == CPP_DOT) return CPP_NUMBER;
1595 /* Numbers cannot have length zero, so this is safe. */
1596 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 1597 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
1598 return CPP_NUMBER;
1599 break;
1600
1601 default:
1602 break;
c5a04734
ZW
1603 }
1604
041c3194
ZW
1605 return CPP_EOF;
1606}
1607
93c80368
NB
1608/* Returns nonzero if a space should be inserted to avoid an
1609 accidental token paste for output. For simplicity, it is
1610 conservative, and occasionally advises a space where one is not
1611 needed, e.g. "." and ".2". */
041c3194 1612
93c80368
NB
1613int
1614cpp_avoid_paste (pfile, token1, token2)
c5a04734 1615 cpp_reader *pfile;
93c80368 1616 const cpp_token *token1, *token2;
c5a04734 1617{
93c80368
NB
1618 enum cpp_ttype a = token1->type, b = token2->type;
1619 cppchar_t c;
c5a04734 1620
93c80368
NB
1621 if (token1->flags & NAMED_OP)
1622 a = CPP_NAME;
1623 if (token2->flags & NAMED_OP)
1624 b = CPP_NAME;
c5a04734 1625
93c80368
NB
1626 c = EOF;
1627 if (token2->flags & DIGRAPH)
37b8524c 1628 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1629 else if (token_spellings[b].category == SPELL_OPERATOR)
1630 c = token_spellings[b].name[0];
c5a04734 1631
93c80368 1632 /* Quickly get everything that can paste with an '='. */
37b8524c 1633 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1634 return 1;
c5a04734 1635
93c80368 1636 switch (a)
c5a04734 1637 {
93c80368
NB
1638 case CPP_GREATER: return c == '>' || c == '?';
1639 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1640 case CPP_PLUS: return c == '+';
1641 case CPP_MINUS: return c == '-' || c == '>';
1642 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1643 case CPP_MOD: return c == ':' || c == '>';
1644 case CPP_AND: return c == '&';
1645 case CPP_OR: return c == '|';
1646 case CPP_COLON: return c == ':' || c == '>';
1647 case CPP_DEREF: return c == '*';
26ec42ee 1648 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1649 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1650 case CPP_NAME: return ((b == CPP_NUMBER
1651 && name_p (pfile, &token2->val.str))
1652 || b == CPP_NAME
1653 || b == CPP_CHAR || b == CPP_STRING); /* L */
1654 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1655 || c == '.' || c == '+' || c == '-');
1656 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1657 && token1->val.c == '@'
93c80368
NB
1658 && (b == CPP_NAME || b == CPP_STRING));
1659 default: break;
c5a04734 1660 }
c5a04734 1661
417f3e3a 1662 return 0;
c5a04734
ZW
1663}
1664
93c80368
NB
1665/* Output all the remaining tokens on the current line, and a newline
1666 character, to FP. Leading whitespace is removed. */
c5a04734 1667void
93c80368 1668cpp_output_line (pfile, fp)
c5a04734 1669 cpp_reader *pfile;
93c80368 1670 FILE *fp;
c5a04734 1671{
93c80368 1672 cpp_token token;
96be6998 1673
7f2f1a66 1674 cpp_get_token (pfile, &token);
93c80368
NB
1675 token.flags &= ~PREV_WHITE;
1676 while (token.type != CPP_EOF)
96be6998 1677 {
93c80368 1678 cpp_output_token (&token, fp);
7f2f1a66 1679 cpp_get_token (pfile, &token);
96be6998
ZW
1680 }
1681
93c80368 1682 putc ('\n', fp);
041c3194 1683}
c5a04734 1684
c8a96070
NB
1685/* Returns the value of a hexadecimal digit. */
1686static unsigned int
1687hex_digit_value (c)
1688 unsigned int c;
1689{
1690 if (c >= 'a' && c <= 'f')
1691 return c - 'a' + 10;
1692 if (c >= 'A' && c <= 'F')
1693 return c - 'A' + 10;
1694 if (c >= '0' && c <= '9')
1695 return c - '0';
1696 abort ();
1697}
1698
62729350
NB
1699/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1700 failure if cpplib is not parsing C++ or C99. Such failure is
1701 silent, and no variables are updated. Otherwise returns 0, and
1702 warns if -Wtraditional.
c8a96070
NB
1703
1704 [lex.charset]: The character designated by the universal character
1705 name \UNNNNNNNN is that character whose character short name in
1706 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1707 universal character name \uNNNN is that character whose character
1708 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1709 for a universal character name is less than 0x20 or in the range
1710 0x7F-0x9F (inclusive), or if the universal character name
1711 designates a character in the basic source character set, then the
1712 program is ill-formed.
1713
1714 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1715 mapping. Is this ever wrong?
c8a96070 1716
62729350
NB
1717 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1718 LIMIT is the end of the string or charconst. PSTR is updated to
1719 point after the UCS on return, and the UCS is written into PC. */
1720
1721static int
1722maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1723 cpp_reader *pfile;
1724 const unsigned char **pstr;
1725 const unsigned char *limit;
62729350 1726 unsigned int *pc;
c8a96070
NB
1727{
1728 const unsigned char *p = *pstr;
62729350
NB
1729 unsigned int code = 0;
1730 unsigned int c = *pc, length;
1731
1732 /* Only attempt to interpret a UCS for C++ and C99. */
1733 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1734 return 1;
c8a96070 1735
62729350
NB
1736 if (CPP_WTRADITIONAL (pfile))
1737 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
c8a96070 1738
f8710242
NB
1739 length = (c == 'u' ? 4: 8);
1740
1741 if ((size_t) (limit - p) < length)
1742 {
1743 cpp_error (pfile, "incomplete universal-character-name");
1744 /* Skip to the end to avoid more diagnostics. */
1745 p = limit;
1746 }
1747 else
1748 {
1749 for (; length; length--, p++)
c8a96070 1750 {
f8710242
NB
1751 c = *p;
1752 if (ISXDIGIT (c))
1753 code = (code << 4) + hex_digit_value (c);
1754 else
1755 {
1756 cpp_error (pfile,
1757 "non-hex digit '%c' in universal-character-name", c);
1758 /* We shouldn't skip in case there are multibyte chars. */
1759 break;
1760 }
c8a96070 1761 }
c8a96070
NB
1762 }
1763
1764#ifdef TARGET_EBCDIC
1765 cpp_error (pfile, "universal-character-name on EBCDIC target");
1766 code = 0x3f; /* EBCDIC invalid character */
1767#else
f8710242
NB
1768 /* True extended characters are OK. */
1769 if (code >= 0xa0
1770 && !(code & 0x80000000)
1771 && !(code >= 0xD800 && code <= 0xDFFF))
1772 ;
1773 /* The standard permits $, @ and ` to be specified as UCNs. We use
1774 hex escapes so that this also works with EBCDIC hosts. */
1775 else if (code == 0x24 || code == 0x40 || code == 0x60)
1776 ;
1777 /* Don't give another error if one occurred above. */
1778 else if (length == 0)
1779 cpp_error (pfile, "universal-character-name out of range");
c8a96070
NB
1780#endif
1781
1782 *pstr = p;
62729350
NB
1783 *pc = code;
1784 return 0;
c8a96070
NB
1785}
1786
1787/* Interpret an escape sequence, and return its value. PSTR points to
1788 the input pointer, which is just after the backslash. LIMIT is how
62729350
NB
1789 much text we have. MASK is a bitmask for the precision for the
1790 destination type (char or wchar_t). TRADITIONAL, if true, does not
1791 interpret escapes that did not exist in traditional C.
c8a96070 1792
62729350
NB
1793 Handles all relevant diagnostics. */
1794
1795unsigned int
1796cpp_parse_escape (pfile, pstr, limit, mask, traditional)
c8a96070
NB
1797 cpp_reader *pfile;
1798 const unsigned char **pstr;
1799 const unsigned char *limit;
62729350 1800 unsigned HOST_WIDE_INT mask;
c8a96070
NB
1801 int traditional;
1802{
1803 int unknown = 0;
1804 const unsigned char *str = *pstr;
1805 unsigned int c = *str++;
1806
1807 switch (c)
1808 {
1809 case '\\': case '\'': case '"': case '?': break;
1810 case 'b': c = TARGET_BS; break;
1811 case 'f': c = TARGET_FF; break;
1812 case 'n': c = TARGET_NEWLINE; break;
1813 case 'r': c = TARGET_CR; break;
1814 case 't': c = TARGET_TAB; break;
1815 case 'v': c = TARGET_VT; break;
1816
1817 case '(': case '{': case '[': case '%':
1818 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1819 '\%' is used to prevent SCCS from getting confused. */
1820 unknown = CPP_PEDANTIC (pfile);
1821 break;
1822
1823 case 'a':
1824 if (CPP_WTRADITIONAL (pfile))
1825 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1826 if (!traditional)
1827 c = TARGET_BELL;
1828 break;
1829
1830 case 'e': case 'E':
1831 if (CPP_PEDANTIC (pfile))
1832 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1833 c = TARGET_ESC;
1834 break;
1835
c8a96070 1836 case 'u': case 'U':
62729350 1837 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1838 break;
1839
1840 case 'x':
1841 if (CPP_WTRADITIONAL (pfile))
1842 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1843
1844 if (!traditional)
1845 {
1846 unsigned int i = 0, overflow = 0;
1847 int digits_found = 0;
1848
1849 while (str < limit)
1850 {
1851 c = *str;
1852 if (! ISXDIGIT (c))
1853 break;
1854 str++;
1855 overflow |= i ^ (i << 4 >> 4);
1856 i = (i << 4) + hex_digit_value (c);
1857 digits_found = 1;
1858 }
1859
1860 if (!digits_found)
1861 cpp_error (pfile, "\\x used with no following hex digits");
1862
1863 if (overflow | (i != (i & mask)))
1864 {
1865 cpp_pedwarn (pfile, "hex escape sequence out of range");
1866 i &= mask;
1867 }
1868 c = i;
1869 }
1870 break;
1871
1872 case '0': case '1': case '2': case '3':
1873 case '4': case '5': case '6': case '7':
1874 {
1875 unsigned int i = c - '0';
1876 int count = 0;
1877
1878 while (str < limit && ++count < 3)
1879 {
1880 c = *str;
1881 if (c < '0' || c > '7')
1882 break;
1883 str++;
1884 i = (i << 3) + c - '0';
1885 }
1886
1887 if (i != (i & mask))
1888 {
1889 cpp_pedwarn (pfile, "octal escape sequence out of range");
1890 i &= mask;
1891 }
1892 c = i;
1893 }
1894 break;
1895
1896 default:
1897 unknown = 1;
1898 break;
1899 }
1900
1901 if (unknown)
1902 {
1903 if (ISGRAPH (c))
1904 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1905 else
1906 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1907 }
1908
62729350
NB
1909 if (c > mask)
1910 cpp_pedwarn (pfile, "escape sequence out of range for character");
1911
c8a96070
NB
1912 *pstr = str;
1913 return c;
1914}
1915
1916#ifndef MAX_CHAR_TYPE_SIZE
1917#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1918#endif
1919
1920#ifndef MAX_WCHAR_TYPE_SIZE
1921#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1922#endif
1923
1924/* Interpret a (possibly wide) character constant in TOKEN.
1925 WARN_MULTI warns about multi-character charconsts, if not
1926 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1927 that did not exist in traditional C. PCHARS_SEEN points to a
1928 variable that is filled in with the number of characters seen. */
1929HOST_WIDE_INT
1930cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1931 cpp_reader *pfile;
1932 const cpp_token *token;
1933 int warn_multi;
1934 int traditional;
1935 unsigned int *pchars_seen;
1936{
1937 const unsigned char *str = token->val.str.text;
1938 const unsigned char *limit = str + token->val.str.len;
1939 unsigned int chars_seen = 0;
1940 unsigned int width, max_chars, c;
2a967f3d
NB
1941 unsigned HOST_WIDE_INT mask;
1942 HOST_WIDE_INT result = 0;
c8a96070
NB
1943
1944#ifdef MULTIBYTE_CHARS
1945 (void) local_mbtowc (NULL, NULL, 0);
1946#endif
1947
1948 /* Width in bits. */
1949 if (token->type == CPP_CHAR)
1950 width = MAX_CHAR_TYPE_SIZE;
1951 else
1952 width = MAX_WCHAR_TYPE_SIZE;
1953
1954 if (width < HOST_BITS_PER_WIDE_INT)
1955 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1956 else
1957 mask = ~0;
1958 max_chars = HOST_BITS_PER_WIDE_INT / width;
1959
1960 while (str < limit)
1961 {
1962#ifdef MULTIBYTE_CHARS
1963 wchar_t wc;
1964 int char_len;
1965
1966 char_len = local_mbtowc (&wc, str, limit - str);
1967 if (char_len == -1)
1968 {
1969 cpp_warning (pfile, "ignoring invalid multibyte character");
1970 c = *str++;
1971 }
1972 else
1973 {
1974 str += char_len;
1975 c = wc;
1976 }
1977#else
1978 c = *str++;
1979#endif
1980
1981 if (c == '\\')
62729350 1982 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
c8a96070
NB
1983
1984#ifdef MAP_CHARACTER
1985 if (ISPRINT (c))
1986 c = MAP_CHARACTER (c);
1987#endif
1988
1989 /* Merge character into result; ignore excess chars. */
1990 if (++chars_seen <= max_chars)
1991 {
1992 if (width < HOST_BITS_PER_WIDE_INT)
1993 result = (result << width) | (c & mask);
1994 else
1995 result = c;
1996 }
1997 }
1998
1999 if (chars_seen == 0)
2000 cpp_error (pfile, "empty character constant");
2001 else if (chars_seen > max_chars)
2002 {
2003 chars_seen = max_chars;
f8710242 2004 cpp_warning (pfile, "character constant too long");
c8a96070
NB
2005 }
2006 else if (chars_seen > 1 && !traditional && warn_multi)
2007 cpp_warning (pfile, "multi-character character constant");
2008
2009 /* If char type is signed, sign-extend the constant. The
2010 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2011 if (token->type == CPP_CHAR && chars_seen)
2012 {
2013 unsigned int nbits = chars_seen * width;
2014 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2015
2016 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2017 || ((result >> (nbits - 1)) & 1) == 0)
2018 result &= mask;
2019 else
2020 result |= ~mask;
2021 }
2022
2023 *pchars_seen = chars_seen;
2024 return result;
2025}
2026
93c80368 2027/* Memory pools. */
417f3e3a 2028
93c80368 2029struct dummy
417f3e3a 2030{
93c80368
NB
2031 char c;
2032 union
2033 {
2034 double d;
2035 int *p;
2036 } u;
2037};
417f3e3a 2038
93c80368 2039#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
417f3e3a 2040
93c80368
NB
2041static int
2042chunk_suitable (pool, chunk, size)
2043 cpp_pool *pool;
2044 cpp_chunk *chunk;
2045 unsigned int size;
2046{
2047 /* Being at least twice SIZE means we can use memcpy in
2048 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2049 anyway. */
2050 return (chunk && pool->locked != chunk
2051 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
041c3194 2052}
c5a04734 2053
93c80368
NB
2054/* Returns the end of the new pool. PTR points to a char in the old
2055 pool, and is updated to point to the same char in the new pool. */
2056unsigned char *
2057_cpp_next_chunk (pool, len, ptr)
2058 cpp_pool *pool;
2059 unsigned int len;
2060 unsigned char **ptr;
041c3194 2061{
93c80368 2062 cpp_chunk *chunk = pool->cur->next;
c5a04734 2063
93c80368
NB
2064 /* LEN is the minimum size we want in the new pool. */
2065 len += POOL_ROOM (pool);
2066 if (! chunk_suitable (pool, chunk, len))
041c3194 2067 {
93c80368 2068 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
c5a04734 2069
93c80368
NB
2070 chunk->next = pool->cur->next;
2071 pool->cur->next = chunk;
c5a04734
ZW
2072 }
2073
93c80368
NB
2074 /* Update the pointer before changing chunk's front. */
2075 if (ptr)
2076 *ptr += chunk->base - POOL_FRONT (pool);
041c3194 2077
93c80368
NB
2078 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2079 chunk->front = chunk->base;
041c3194 2080
93c80368
NB
2081 pool->cur = chunk;
2082 return POOL_LIMIT (pool);
c5a04734
ZW
2083}
2084
93c80368
NB
2085static cpp_chunk *
2086new_chunk (size)
2087 unsigned int size;
041c3194 2088{
93c80368
NB
2089 unsigned char *base;
2090 cpp_chunk *result;
3fef5b2b 2091
269592a8 2092 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
93c80368
NB
2093 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2094 /* Put the chunk descriptor at the end. Then chunk overruns will
2095 cause obvious chaos. */
2096 result = (cpp_chunk *) (base + size);
2097 result->base = base;
2098 result->front = base;
2099 result->limit = base + size;
2100 result->next = 0;
417f3e3a 2101
93c80368 2102 return result;
041c3194
ZW
2103}
2104
93c80368
NB
2105void
2106_cpp_init_pool (pool, size, align, temp)
2107 cpp_pool *pool;
2108 unsigned int size, align, temp;
2109{
2110 if (align == 0)
2111 align = DEFAULT_ALIGNMENT;
2112 if (align & (align - 1))
2113 abort ();
2114 pool->align = align;
bef985f3
NB
2115 pool->first = new_chunk (size);
2116 pool->cur = pool->first;
93c80368
NB
2117 pool->locked = 0;
2118 pool->locks = 0;
2119 if (temp)
2120 pool->cur->next = pool->cur;
041c3194
ZW
2121}
2122
93c80368
NB
2123void
2124_cpp_lock_pool (pool)
2125 cpp_pool *pool;
041c3194 2126{
93c80368
NB
2127 if (pool->locks++ == 0)
2128 pool->locked = pool->cur;
041c3194
ZW
2129}
2130
93c80368
NB
2131void
2132_cpp_unlock_pool (pool)
2133 cpp_pool *pool;
041c3194 2134{
93c80368
NB
2135 if (--pool->locks == 0)
2136 pool->locked = 0;
041c3194
ZW
2137}
2138
93c80368
NB
2139void
2140_cpp_free_pool (pool)
2141 cpp_pool *pool;
3fef5b2b 2142{
bef985f3 2143 cpp_chunk *chunk = pool->first, *next;
3fef5b2b 2144
93c80368 2145 do
3fef5b2b 2146 {
93c80368
NB
2147 next = chunk->next;
2148 free (chunk->base);
2149 chunk = next;
3fef5b2b 2150 }
bef985f3 2151 while (chunk && chunk != pool->first);
041c3194 2152}
041c3194 2153
93c80368
NB
2154/* Reserve LEN bytes from a memory pool. */
2155unsigned char *
2156_cpp_pool_reserve (pool, len)
2157 cpp_pool *pool;
2158 unsigned int len;
041c3194 2159{
269592a8 2160 len = POOL_ALIGN (len, pool->align);
93c80368
NB
2161 if (len > (unsigned int) POOL_ROOM (pool))
2162 _cpp_next_chunk (pool, len, 0);
041c3194 2163
93c80368 2164 return POOL_FRONT (pool);
c5a04734
ZW
2165}
2166
93c80368
NB
2167/* Allocate LEN bytes from a memory pool. */
2168unsigned char *
2169_cpp_pool_alloc (pool, len)
2170 cpp_pool *pool;
2171 unsigned int len;
041c3194 2172{
93c80368 2173 unsigned char *result = _cpp_pool_reserve (pool, len);
417f3e3a 2174
93c80368
NB
2175 POOL_COMMIT (pool, len);
2176 return result;
041c3194 2177}
This page took 0.663327 seconds and 5 git commands to generate.