]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
* basic-block.h: Add prototype for last_loop_beg_note.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
93c80368
NB
23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
041c3194 36
45b966db
ZW
37#include "config.h"
38#include "system.h"
45b966db
ZW
39#include "cpplib.h"
40#include "cpphash.h"
41
c8a96070
NB
42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
93c80368
NB
54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
f9a0e96c 57{
93c80368
NB
58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
f9a0e96c
ZW
63};
64
93c80368 65struct token_spelling
f9a0e96c 66{
93c80368
NB
67 enum spell_type category;
68 const unsigned char *name;
f9a0e96c
ZW
69};
70
93c80368
NB
71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 82
0d9f234d
NB
83static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
84static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
86
041c3194 87static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 88static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d
NB
89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
93c80368
NB
92static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 94static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 95static void unterminated PARAMS ((cpp_reader *, int));
0d9f234d
NB
96static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
cbcff6df
NB
98static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
99static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
93c80368 100static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350
NB
101static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, unsigned int *));
f617b8e2 103
93c80368
NB
104static cpp_chunk *new_chunk PARAMS ((unsigned int));
105static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
c8a96070 106static unsigned int hex_digit_value PARAMS ((unsigned int));
15dad1d9 107
041c3194 108/* Utility routine:
9e62c811 109
bfb9dc7f
ZW
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 112
041c3194 113int
bfb9dc7f
ZW
114cpp_ideq (token, string)
115 const cpp_token *token;
041c3194
ZW
116 const char *string;
117{
bfb9dc7f 118 if (token->type != CPP_NAME)
041c3194 119 return 0;
bfb9dc7f 120
a28c5035 121 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
15dad1d9 122}
1368ee70 123
0d9f234d
NB
124/* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
126static cppchar_t
127handle_newline (buffer, newline_char)
128 cpp_buffer *buffer;
129 cppchar_t newline_char;
130{
131 cppchar_t next = EOF;
132
133 buffer->col_adjust = 0;
134 buffer->lineno++;
135 buffer->line_base = buffer->cur;
136
137 /* Handle CR-LF and LF-CR combinations, get the next character. */
138 if (buffer->cur < buffer->rlimit)
139 {
140 next = *buffer->cur++;
141 if (next + newline_char == '\r' + '\n')
142 {
143 buffer->line_base = buffer->cur;
144 if (buffer->cur < buffer->rlimit)
145 next = *buffer->cur++;
146 else
147 next = EOF;
148 }
149 }
150
151 buffer->read_ahead = next;
152 return next;
153}
154
155/* Subroutine of skip_escaped_newlines; called when a trigraph is
156 encountered. It warns if necessary, and returns true if the
157 trigraph should be honoured. FROM_CHAR is the third character of a
158 trigraph, and presumed to be the previous character for position
159 reporting. */
45b966db 160static int
0d9f234d 161trigraph_ok (pfile, from_char)
45b966db 162 cpp_reader *pfile;
0d9f234d 163 cppchar_t from_char;
45b966db 164{
041c3194
ZW
165 int accept = CPP_OPTION (pfile, trigraphs);
166
cbcff6df
NB
167 /* Don't warn about trigraphs in comments. */
168 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 169 {
0d9f234d 170 cpp_buffer *buffer = pfile->buffer;
041c3194 171 if (accept)
0d9f234d 172 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
041c3194 173 "trigraph ??%c converted to %c",
0d9f234d
NB
174 (int) from_char,
175 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
176 else if (buffer->cur != buffer->last_Wtrigraphs)
177 {
178 buffer->last_Wtrigraphs = buffer->cur;
179 cpp_warning_with_line (pfile, buffer->lineno,
180 CPP_BUF_COL (buffer) - 2,
181 "trigraph ??%c ignored", (int) from_char);
182 }
45b966db 183 }
0d9f234d 184
041c3194 185 return accept;
45b966db
ZW
186}
187
0d9f234d
NB
188/* Assumes local variables buffer and result. */
189#define ACCEPT_CHAR(t) \
190 do { result->type = t; buffer->read_ahead = EOF; } while (0)
191
192/* When we move to multibyte character sets, add to these something
193 that saves and restores the state of the multibyte conversion
194 library. This probably involves saving and restoring a "cookie".
195 In the case of glibc it is an 8-byte structure, so is not a high
196 overhead operation. In any case, it's out of the fast path. */
197#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
198#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
199
200/* Skips any escaped newlines introduced by NEXT, which is either a
201 '?' or a '\\'. Returns the next character, which will also have
a5c3cccd
NB
202 been placed in buffer->read_ahead. This routine performs
203 preprocessing stages 1 and 2 of the ISO C standard. */
0d9f234d
NB
204static cppchar_t
205skip_escaped_newlines (buffer, next)
206 cpp_buffer *buffer;
207 cppchar_t next;
45b966db 208{
a5c3cccd
NB
209 /* Only do this if we apply stages 1 and 2. */
210 if (!buffer->from_stage3)
041c3194 211 {
a5c3cccd
NB
212 cppchar_t next1;
213 const unsigned char *saved_cur;
214 int space;
215
216 do
0d9f234d 217 {
a5c3cccd
NB
218 if (buffer->cur == buffer->rlimit)
219 break;
220
221 SAVE_STATE ();
222 if (next == '?')
0d9f234d 223 {
a5c3cccd
NB
224 next1 = *buffer->cur++;
225 if (next1 != '?' || buffer->cur == buffer->rlimit)
226 {
227 RESTORE_STATE ();
228 break;
229 }
230
231 next1 = *buffer->cur++;
232 if (!_cpp_trigraph_map[next1]
233 || !trigraph_ok (buffer->pfile, next1))
234 {
235 RESTORE_STATE ();
236 break;
237 }
238
239 /* We have a full trigraph here. */
240 next = _cpp_trigraph_map[next1];
241 if (next != '\\' || buffer->cur == buffer->rlimit)
242 break;
243 SAVE_STATE ();
244 }
245
246 /* We have a backslash, and room for at least one more character. */
247 space = 0;
248 do
249 {
250 next1 = *buffer->cur++;
251 if (!is_nvspace (next1))
252 break;
253 space = 1;
0d9f234d 254 }
a5c3cccd 255 while (buffer->cur < buffer->rlimit);
041c3194 256
a5c3cccd 257 if (!is_vspace (next1))
0d9f234d
NB
258 {
259 RESTORE_STATE ();
260 break;
261 }
45b966db 262
c673abe4 263 if (space && !buffer->pfile->state.lexing_comment)
a5c3cccd
NB
264 cpp_warning (buffer->pfile,
265 "backslash and newline separated by space");
0d9f234d 266
a5c3cccd
NB
267 next = handle_newline (buffer, next1);
268 if (next == EOF)
269 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
0d9f234d 270 }
a5c3cccd 271 while (next == '\\' || next == '?');
041c3194 272 }
45b966db 273
0d9f234d
NB
274 buffer->read_ahead = next;
275 return next;
45b966db
ZW
276}
277
0d9f234d
NB
278/* Obtain the next character, after trigraph conversion and skipping
279 an arbitrary string of escaped newlines. The common case of no
280 trigraphs or escaped newlines falls through quickly. */
281static cppchar_t
282get_effective_char (buffer)
283 cpp_buffer *buffer;
64aaf407 284{
0d9f234d
NB
285 cppchar_t next = EOF;
286
287 if (buffer->cur < buffer->rlimit)
288 {
289 next = *buffer->cur++;
290
291 /* '?' can introduce trigraphs (and therefore backslash); '\\'
292 can introduce escaped newlines, which we want to skip, or
293 UCNs, which, depending upon lexer state, we will handle in
294 the future. */
295 if (next == '?' || next == '\\')
296 next = skip_escaped_newlines (buffer, next);
297 }
298
299 buffer->read_ahead = next;
300 return next;
64aaf407
NB
301}
302
0d9f234d
NB
303/* Skip a C-style block comment. We find the end of the comment by
304 seeing if an asterisk is before every '/' we encounter. Returns
305 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
306static int
307skip_block_comment (pfile)
45b966db
ZW
308 cpp_reader *pfile;
309{
041c3194 310 cpp_buffer *buffer = pfile->buffer;
d8090680 311 cppchar_t c = EOF, prevc = EOF;
0d9f234d 312
cbcff6df 313 pfile->state.lexing_comment = 1;
0d9f234d 314 while (buffer->cur != buffer->rlimit)
45b966db 315 {
0d9f234d
NB
316 prevc = c, c = *buffer->cur++;
317
318 next_char:
319 /* FIXME: For speed, create a new character class of characters
93c80368 320 of interest inside block comments. */
0d9f234d
NB
321 if (c == '?' || c == '\\')
322 c = skip_escaped_newlines (buffer, c);
041c3194 323
0d9f234d
NB
324 /* People like decorating comments with '*', so check for '/'
325 instead for efficiency. */
041c3194 326 if (c == '/')
45b966db 327 {
0d9f234d
NB
328 if (prevc == '*')
329 break;
041c3194 330
0d9f234d
NB
331 /* Warn about potential nested comments, but not if the '/'
332 comes immediately before the true comment delimeter.
041c3194 333 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
334 if (CPP_OPTION (pfile, warn_comments)
335 && buffer->cur != buffer->rlimit)
45b966db 336 {
0d9f234d
NB
337 prevc = c, c = *buffer->cur++;
338 if (c == '*' && buffer->cur != buffer->rlimit)
339 {
340 prevc = c, c = *buffer->cur++;
341 if (c != '/')
342 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
343 CPP_BUF_COL (buffer),
344 "\"/*\" within comment");
345 }
346 goto next_char;
45b966db 347 }
45b966db 348 }
91fcd158 349 else if (is_vspace (c))
45b966db 350 {
0d9f234d
NB
351 prevc = c, c = handle_newline (buffer, c);
352 goto next_char;
45b966db 353 }
52fadca8 354 else if (c == '\t')
0d9f234d 355 adjust_column (pfile);
45b966db 356 }
041c3194 357
cbcff6df 358 pfile->state.lexing_comment = 0;
0d9f234d
NB
359 buffer->read_ahead = EOF;
360 return c != '/' || prevc != '*';
45b966db
ZW
361}
362
f9a0e96c 363/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
364 non-zero if a multiline comment. The following new line, if any,
365 is left in buffer->read_ahead. */
041c3194 366static int
cbcff6df
NB
367skip_line_comment (pfile)
368 cpp_reader *pfile;
45b966db 369{
cbcff6df 370 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
371 unsigned int orig_lineno = buffer->lineno;
372 cppchar_t c;
041c3194 373
cbcff6df 374 pfile->state.lexing_comment = 1;
0d9f234d 375 do
041c3194 376 {
0d9f234d
NB
377 c = EOF;
378 if (buffer->cur == buffer->rlimit)
379 break;
041c3194 380
0d9f234d
NB
381 c = *buffer->cur++;
382 if (c == '?' || c == '\\')
383 c = skip_escaped_newlines (buffer, c);
041c3194 384 }
0d9f234d 385 while (!is_vspace (c));
45b966db 386
cbcff6df 387 pfile->state.lexing_comment = 0;
0d9f234d
NB
388 buffer->read_ahead = c; /* Leave any newline for caller. */
389 return orig_lineno != buffer->lineno;
041c3194 390}
45b966db 391
0d9f234d
NB
392/* pfile->buffer->cur is one beyond the \t character. Update
393 col_adjust so we track the column correctly. */
52fadca8 394static void
0d9f234d 395adjust_column (pfile)
52fadca8 396 cpp_reader *pfile;
52fadca8 397{
0d9f234d
NB
398 cpp_buffer *buffer = pfile->buffer;
399 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
400
401 /* Round it up to multiple of the tabstop, but subtract 1 since the
402 tab itself occupies a character position. */
0d9f234d
NB
403 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
404 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
405}
406
0d9f234d
NB
407/* Skips whitespace, saving the next non-whitespace character.
408 Adjusts pfile->col_adjust to account for tabs. Without this,
409 tokens might be assigned an incorrect column. */
041c3194 410static void
0d9f234d 411skip_whitespace (pfile, c)
041c3194 412 cpp_reader *pfile;
0d9f234d 413 cppchar_t c;
041c3194
ZW
414{
415 cpp_buffer *buffer = pfile->buffer;
0d9f234d 416 unsigned int warned = 0;
45b966db 417
0d9f234d 418 do
041c3194 419 {
91fcd158
NB
420 /* Horizontal space always OK. */
421 if (c == ' ')
0d9f234d 422 ;
91fcd158 423 else if (c == '\t')
0d9f234d
NB
424 adjust_column (pfile);
425 /* Just \f \v or \0 left. */
91fcd158 426 else if (c == '\0')
041c3194 427 {
91fcd158 428 if (!warned)
0d9f234d
NB
429 {
430 cpp_warning (pfile, "null character(s) ignored");
431 warned = 1;
432 }
45b966db 433 }
93c80368 434 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
435 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
436 CPP_BUF_COL (buffer),
437 "%s in preprocessing directive",
438 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
439
440 c = EOF;
441 if (buffer->cur == buffer->rlimit)
442 break;
443 c = *buffer->cur++;
45b966db 444 }
0d9f234d
NB
445 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
446 while (is_nvspace (c));
447
448 /* Remember the next character. */
449 buffer->read_ahead = c;
041c3194 450}
45b966db 451
93c80368
NB
452/* See if the characters of a number token are valid in a name (no
453 '.', '+' or '-'). */
454static int
455name_p (pfile, string)
456 cpp_reader *pfile;
457 const cpp_string *string;
458{
459 unsigned int i;
460
461 for (i = 0; i < string->len; i++)
462 if (!is_idchar (string->text[i]))
463 return 0;
464
465 return 1;
466}
467
0d9f234d
NB
468/* Parse an identifier, skipping embedded backslash-newlines.
469 Calculate the hash value of the token while parsing, for improved
470 performance. The hashing algorithm *must* match cpp_lookup(). */
471
472static cpp_hashnode *
473parse_identifier (pfile, c)
45b966db 474 cpp_reader *pfile;
0d9f234d 475 cppchar_t c;
45b966db 476{
93c80368 477 cpp_hashnode *result;
0d9f234d 478 cpp_buffer *buffer = pfile->buffer;
2a967f3d
NB
479 unsigned int saw_dollar = 0, len;
480 struct obstack *stack = &pfile->hash_table->stack;
041c3194 481
0d9f234d 482 do
041c3194 483 {
0d9f234d 484 do
041c3194 485 {
2a967f3d 486 obstack_1grow (stack, c);
45b966db 487
0d9f234d
NB
488 if (c == '$')
489 saw_dollar++;
ba89d661 490
0d9f234d
NB
491 c = EOF;
492 if (buffer->cur == buffer->rlimit)
493 break;
ba89d661 494
0d9f234d
NB
495 c = *buffer->cur++;
496 }
497 while (is_idchar (c));
ba89d661 498
0d9f234d
NB
499 /* Potential escaped newline? */
500 if (c != '?' && c != '\\')
501 break;
502 c = skip_escaped_newlines (buffer, c);
041c3194 503 }
0d9f234d
NB
504 while (is_idchar (c));
505
93c80368
NB
506 /* Remember the next character. */
507 buffer->read_ahead = c;
508
0d9f234d
NB
509 /* $ is not a identifier character in the standard, but is commonly
510 accepted as an extension. Don't warn about it in skipped
511 conditional blocks. */
cef0d199 512 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
0d9f234d
NB
513 cpp_pedwarn (pfile, "'$' character(s) in identifier");
514
93c80368 515 /* Identifiers are null-terminated. */
2a967f3d
NB
516 len = obstack_object_size (stack);
517 obstack_1grow (stack, '\0');
93c80368
NB
518
519 /* This routine commits the memory if necessary. */
2a967f3d
NB
520 result = (cpp_hashnode *)
521 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
93c80368
NB
522
523 /* Some identifiers require diagnostics when lexed. */
cef0d199 524 if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
93c80368
NB
525 {
526 /* It is allowed to poison the same identifier twice. */
527 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
a28c5035
NB
528 cpp_error (pfile, "attempt to use poisoned \"%s\"",
529 NODE_NAME (result));
93c80368
NB
530
531 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
28e0f040 532 replacement list of a variadic macro. */
93c80368
NB
533 if (result == pfile->spec_nodes.n__VA_ARGS__
534 && !pfile->state.va_args_ok)
28e0f040 535 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
93c80368
NB
536 }
537
538 return result;
45b966db
ZW
539}
540
0d9f234d 541/* Parse a number, skipping embedded backslash-newlines. */
45b966db 542static void
93c80368 543parse_number (pfile, number, c, leading_period)
45b966db 544 cpp_reader *pfile;
0d9f234d
NB
545 cpp_string *number;
546 cppchar_t c;
93c80368 547 int leading_period;
45b966db 548{
041c3194 549 cpp_buffer *buffer = pfile->buffer;
49fe13f6 550 cpp_pool *pool = &pfile->ident_pool;
93c80368 551 unsigned char *dest, *limit;
45b966db 552
93c80368
NB
553 dest = POOL_FRONT (pool);
554 limit = POOL_LIMIT (pool);
cbcff6df 555
93c80368
NB
556 /* Place a leading period. */
557 if (leading_period)
558 {
559 if (dest >= limit)
560 limit = _cpp_next_chunk (pool, 0, &dest);
561 *dest++ = '.';
562 }
563
0d9f234d 564 do
041c3194 565 {
0d9f234d
NB
566 do
567 {
93c80368
NB
568 /* Need room for terminating null. */
569 if (dest + 1 >= limit)
570 limit = _cpp_next_chunk (pool, 0, &dest);
571 *dest++ = c;
0d9f234d 572
0d9f234d
NB
573 c = EOF;
574 if (buffer->cur == buffer->rlimit)
575 break;
45b966db 576
0d9f234d
NB
577 c = *buffer->cur++;
578 }
93c80368 579 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 580
0d9f234d
NB
581 /* Potential escaped newline? */
582 if (c != '?' && c != '\\')
583 break;
584 c = skip_escaped_newlines (buffer, c);
45b966db 585 }
93c80368 586 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 587
0d9f234d
NB
588 /* Remember the next character. */
589 buffer->read_ahead = c;
64aaf407 590
93c80368
NB
591 /* Null-terminate the number. */
592 *dest = '\0';
593
594 number->text = POOL_FRONT (pool);
595 number->len = dest - number->text;
596 POOL_COMMIT (pool, number->len + 1);
0d9f234d
NB
597}
598
599/* Subroutine of parse_string. Emits error for unterminated strings. */
600static void
93c80368 601unterminated (pfile, term)
0d9f234d 602 cpp_reader *pfile;
0d9f234d
NB
603 int term;
604{
605 cpp_error (pfile, "missing terminating %c character", term);
606
93c80368
NB
607 if (term == '\"' && pfile->mlstring_pos.line
608 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
041c3194 609 {
93c80368
NB
610 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
611 pfile->mlstring_pos.col,
0d9f234d 612 "possible start of unterminated string literal");
93c80368 613 pfile->mlstring_pos.line = 0;
041c3194 614 }
45b966db
ZW
615}
616
93c80368
NB
617/* Subroutine of parse_string. */
618static int
619unescaped_terminator_p (pfile, dest)
620 cpp_reader *pfile;
621 const unsigned char *dest;
622{
623 const unsigned char *start, *temp;
624
625 /* In #include-style directives, terminators are not escapeable. */
626 if (pfile->state.angled_headers)
627 return 1;
628
49fe13f6 629 start = POOL_FRONT (&pfile->ident_pool);
93c80368
NB
630
631 /* An odd number of consecutive backslashes represents an escaped
632 terminator. */
633 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
634 ;
635
636 return ((dest - temp) & 1) == 0;
637}
638
0d9f234d 639/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
640 name. Handles embedded trigraphs and escaped newlines. The stored
641 string is guaranteed NUL-terminated, but it is not guaranteed that
642 this is the first NUL since embedded NULs are preserved.
45b966db 643
7868b4a2 644 Multi-line strings are allowed, but they are deprecated. */
041c3194 645static void
0d9f234d 646parse_string (pfile, token, terminator)
45b966db 647 cpp_reader *pfile;
041c3194 648 cpp_token *token;
0d9f234d 649 cppchar_t terminator;
45b966db 650{
041c3194 651 cpp_buffer *buffer = pfile->buffer;
49fe13f6 652 cpp_pool *pool = &pfile->ident_pool;
93c80368 653 unsigned char *dest, *limit;
0d9f234d
NB
654 cppchar_t c;
655 unsigned int nulls = 0;
656
93c80368
NB
657 dest = POOL_FRONT (pool);
658 limit = POOL_LIMIT (pool);
659
0d9f234d 660 for (;;)
45b966db 661 {
0d9f234d 662 if (buffer->cur == buffer->rlimit)
7868b4a2
NB
663 c = EOF;
664 else
665 c = *buffer->cur++;
666
667 have_char:
668 /* We need space for the terminating NUL. */
669 if (dest >= limit)
670 limit = _cpp_next_chunk (pool, 0, &dest);
671
672 if (c == EOF)
0d9f234d 673 {
93c80368 674 unterminated (pfile, terminator);
0d9f234d
NB
675 break;
676 }
0d9f234d 677
0d9f234d
NB
678 /* Handle trigraphs, escaped newlines etc. */
679 if (c == '?' || c == '\\')
680 c = skip_escaped_newlines (buffer, c);
45b966db 681
93c80368 682 if (c == terminator && unescaped_terminator_p (pfile, dest))
45b966db 683 {
93c80368
NB
684 c = EOF;
685 break;
0d9f234d
NB
686 }
687 else if (is_vspace (c))
688 {
689 /* In assembly language, silently terminate string and
690 character literals at end of line. This is a kludge
691 around not knowing where comments are. */
bdb05a7b 692 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
0d9f234d 693 break;
45b966db 694
0d9f234d
NB
695 /* Character constants and header names may not extend over
696 multiple lines. In Standard C, neither may strings.
697 Unfortunately, we accept multiline strings as an
16eb2788
NB
698 extension, except in #include family directives. */
699 if (terminator != '"' || pfile->state.angled_headers)
45b966db 700 {
93c80368 701 unterminated (pfile, terminator);
0d9f234d 702 break;
45b966db 703 }
45b966db 704
795a25f4 705 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
93c80368 706 if (pfile->mlstring_pos.line == 0)
c691145a 707 pfile->mlstring_pos = pfile->lexer_pos;
0d9f234d 708
7868b4a2
NB
709 c = handle_newline (buffer, c);
710 *dest++ = '\n';
711 goto have_char;
0d9f234d
NB
712 }
713 else if (c == '\0')
714 {
715 if (nulls++ == 0)
716 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 717 }
45b966db 718
93c80368 719 *dest++ = c;
45b966db
ZW
720 }
721
93c80368 722 /* Remember the next character. */
0d9f234d 723 buffer->read_ahead = c;
7868b4a2 724 *dest = '\0';
45b966db 725
93c80368
NB
726 token->val.str.text = POOL_FRONT (pool);
727 token->val.str.len = dest - token->val.str.text;
7868b4a2 728 POOL_COMMIT (pool, token->val.str.len + 1);
0d9f234d 729}
041c3194 730
93c80368 731/* The stored comment includes the comment start and any terminator. */
9e62c811 732static void
0d9f234d
NB
733save_comment (pfile, token, from)
734 cpp_reader *pfile;
041c3194
ZW
735 cpp_token *token;
736 const unsigned char *from;
9e62c811 737{
041c3194 738 unsigned char *buffer;
0d9f234d 739 unsigned int len;
0d9f234d 740
1c6d33ef 741 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3542203b
NB
742 /* C++ comments probably (not definitely) have moved past a new
743 line, which we don't want to save in the comment. */
744 if (pfile->buffer->read_ahead != EOF)
745 len--;
49fe13f6 746 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
041c3194 747
041c3194 748 token->type = CPP_COMMENT;
bfb9dc7f 749 token->val.str.len = len;
0d9f234d 750 token->val.str.text = buffer;
45b966db 751
1c6d33ef
NB
752 buffer[0] = '/';
753 memcpy (buffer + 1, from, len - 1);
0d9f234d 754}
45b966db 755
cbcff6df
NB
756/* Subroutine of lex_token to handle '%'. A little tricky, since we
757 want to avoid stepping back when lexing %:%X. */
0d9f234d 758static void
cbcff6df 759lex_percent (buffer, result)
0d9f234d
NB
760 cpp_buffer *buffer;
761 cpp_token *result;
0d9f234d 762{
cbcff6df
NB
763 cppchar_t c;
764
765 result->type = CPP_MOD;
766 /* Parsing %:%X could leave an extra character. */
767 if (buffer->extra_char == EOF)
768 c = get_effective_char (buffer);
769 else
770 {
771 c = buffer->read_ahead = buffer->extra_char;
772 buffer->extra_char = EOF;
773 }
774
775 if (c == '=')
776 ACCEPT_CHAR (CPP_MOD_EQ);
777 else if (CPP_OPTION (buffer->pfile, digraphs))
778 {
779 if (c == ':')
780 {
781 result->flags |= DIGRAPH;
782 ACCEPT_CHAR (CPP_HASH);
783 if (get_effective_char (buffer) == '%')
784 {
785 buffer->extra_char = get_effective_char (buffer);
786 if (buffer->extra_char == ':')
787 {
788 buffer->extra_char = EOF;
789 ACCEPT_CHAR (CPP_PASTE);
790 }
791 else
792 /* We'll catch the extra_char when we're called back. */
793 buffer->read_ahead = '%';
794 }
795 }
796 else if (c == '>')
797 {
798 result->flags |= DIGRAPH;
799 ACCEPT_CHAR (CPP_CLOSE_BRACE);
800 }
801 }
802}
803
804/* Subroutine of lex_token to handle '.'. This is tricky, since we
805 want to avoid stepping back when lexing '...' or '.123'. In the
806 latter case we should also set a flag for parse_number. */
807static void
808lex_dot (pfile, result)
809 cpp_reader *pfile;
810 cpp_token *result;
811{
812 cpp_buffer *buffer = pfile->buffer;
813 cppchar_t c;
814
815 /* Parsing ..X could leave an extra character. */
816 if (buffer->extra_char == EOF)
817 c = get_effective_char (buffer);
818 else
819 {
820 c = buffer->read_ahead = buffer->extra_char;
821 buffer->extra_char = EOF;
822 }
0d9f234d 823
cbcff6df
NB
824 /* All known character sets have 0...9 contiguous. */
825 if (c >= '0' && c <= '9')
826 {
827 result->type = CPP_NUMBER;
93c80368 828 parse_number (pfile, &result->val.str, c, 1);
cbcff6df 829 }
041c3194 830 else
ea4a453b 831 {
cbcff6df
NB
832 result->type = CPP_DOT;
833 if (c == '.')
834 {
835 buffer->extra_char = get_effective_char (buffer);
836 if (buffer->extra_char == '.')
837 {
838 buffer->extra_char = EOF;
839 ACCEPT_CHAR (CPP_ELLIPSIS);
840 }
841 else
842 /* We'll catch the extra_char when we're called back. */
843 buffer->read_ahead = '.';
844 }
845 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
846 ACCEPT_CHAR (CPP_DOT_STAR);
ea4a453b 847 }
45b966db
ZW
848}
849
93c80368
NB
850void
851_cpp_lex_token (pfile, result)
45b966db 852 cpp_reader *pfile;
0d9f234d 853 cpp_token *result;
45b966db 854{
0d9f234d 855 cppchar_t c;
adb84b42 856 cpp_buffer *buffer;
0d9f234d 857 const unsigned char *comment_start;
8d9e9a08 858 unsigned char bol;
9ec7291f 859
8d9e9a08
NB
860 skip:
861 bol = pfile->state.next_bol;
adb84b42
NB
862 done_directive:
863 buffer = pfile->buffer;
4c2b647d 864 pfile->state.next_bol = 0;
bd969772
NB
865 result->flags = buffer->saved_flags;
866 buffer->saved_flags = 0;
0d9f234d 867 next_char:
93c80368 868 pfile->lexer_pos.line = buffer->lineno;
0d9f234d 869 next_char2:
93c80368 870 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
041c3194 871
0d9f234d
NB
872 c = buffer->read_ahead;
873 if (c == EOF && buffer->cur < buffer->rlimit)
874 {
875 c = *buffer->cur++;
93c80368 876 pfile->lexer_pos.col++;
0d9f234d 877 }
45b966db 878
0d9f234d
NB
879 do_switch:
880 buffer->read_ahead = EOF;
881 switch (c)
45b966db 882 {
0d9f234d 883 case EOF:
800914c3
NB
884 /* Non-empty files should end in a newline. Checking "bol" too
885 prevents multiple warnings when hitting the EOF more than
886 once, like in a directive. Don't warn for command line and
887 _Pragma buffers. */
888 if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
93c80368 889 cpp_pedwarn (pfile, "no newline at end of file");
4c2b647d 890 pfile->state.next_bol = 1;
cef0d199 891 pfile->state.skipping = 0; /* In case missing #endif. */
0d9f234d 892 result->type = CPP_EOF;
29b10746
NB
893 /* Don't do MI optimisation. */
894 return;
45b966db 895
0d9f234d
NB
896 case ' ': case '\t': case '\f': case '\v': case '\0':
897 skip_whitespace (pfile, c);
898 result->flags |= PREV_WHITE;
899 goto next_char2;
900
901 case '\n': case '\r':
a949941c 902 if (!pfile->state.in_directive)
45b966db 903 {
93c80368 904 handle_newline (buffer, c);
a949941c 905 bol = 1;
93c80368 906 pfile->lexer_pos.output_line = buffer->lineno;
8d9e9a08
NB
907 /* This is a new line, so clear any white space flag.
908 Newlines in arguments are white space (6.10.3.10);
909 parse_arg takes care of that. */
bd969772 910 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
a949941c 911 goto next_char;
45b966db 912 }
93c80368 913
a949941c
NB
914 /* Don't let directives spill over to the next line. */
915 buffer->read_ahead = c;
4c2b647d 916 pfile->state.next_bol = 1;
93c80368 917 result->type = CPP_EOF;
cef0d199 918 /* Don't break; pfile->state.skipping might be true. */
28e0f040 919 return;
46d07497 920
0d9f234d
NB
921 case '?':
922 case '\\':
923 /* These could start an escaped newline, or '?' a trigraph. Let
924 skip_escaped_newlines do all the work. */
925 {
926 unsigned int lineno = buffer->lineno;
927
928 c = skip_escaped_newlines (buffer, c);
929 if (lineno != buffer->lineno)
930 /* We had at least one escaped newline of some sort, and the
931 next character is in buffer->read_ahead. Update the
932 token's line and column. */
933 goto next_char;
934
935 /* We are either the original '?' or '\\', or a trigraph. */
936 result->type = CPP_QUERY;
937 buffer->read_ahead = EOF;
938 if (c == '\\')
12c4f523 939 goto random_char;
0d9f234d
NB
940 else if (c != '?')
941 goto do_switch;
942 }
943 break;
46d07497 944
0d9f234d
NB
945 case '0': case '1': case '2': case '3': case '4':
946 case '5': case '6': case '7': case '8': case '9':
947 result->type = CPP_NUMBER;
93c80368 948 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 949 break;
46d07497 950
0d9f234d
NB
951 case '$':
952 if (!CPP_OPTION (pfile, dollars_in_ident))
953 goto random_char;
954 /* Fall through... */
955
956 case '_':
957 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
958 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
959 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
960 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
961 case 'y': case 'z':
962 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
963 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
964 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
965 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
966 case 'Y': case 'Z':
967 result->type = CPP_NAME;
968 result->val.node = parse_identifier (pfile, c);
969
970 /* 'L' may introduce wide characters or strings. */
93c80368 971 if (result->val.node == pfile->spec_nodes.n_L)
0d9f234d
NB
972 {
973 c = buffer->read_ahead; /* For make_string. */
974 if (c == '\'' || c == '"')
ba89d661 975 {
0d9f234d
NB
976 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
977 goto make_string;
ba89d661 978 }
0d9f234d
NB
979 }
980 /* Convert named operators to their proper types. */
93c80368 981 else if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
982 {
983 result->flags |= NAMED_OP;
93c80368 984 result->type = result->val.node->value.operator;
0d9f234d
NB
985 }
986 break;
987
988 case '\'':
989 case '"':
990 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
991 make_string:
992 parse_string (pfile, result, c);
993 break;
041c3194 994
0d9f234d 995 case '/':
1c6d33ef
NB
996 /* A potential block or line comment. */
997 comment_start = buffer->cur;
0d9f234d
NB
998 result->type = CPP_DIV;
999 c = get_effective_char (buffer);
1000 if (c == '=')
1001 ACCEPT_CHAR (CPP_DIV_EQ);
1c6d33ef
NB
1002 if (c != '/' && c != '*')
1003 break;
e61fc951 1004
1c6d33ef
NB
1005 if (c == '*')
1006 {
0d9f234d 1007 if (skip_block_comment (pfile))
93c80368
NB
1008 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1009 pfile->lexer_pos.col,
0d9f234d 1010 "unterminated comment");
0d9f234d 1011 }
1c6d33ef 1012 else
0d9f234d 1013 {
1c6d33ef
NB
1014 if (!CPP_OPTION (pfile, cplusplus_comments)
1015 && !CPP_IN_SYSTEM_HEADER (pfile))
1016 break;
1017
bdb05a7b
NB
1018 /* Warn about comments only if pedantically GNUC89, and not
1019 in system headers. */
1020 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1021 && ! buffer->warned_cplusplus_comments)
041c3194 1022 {
1c6d33ef
NB
1023 cpp_pedwarn (pfile,
1024 "C++ style comments are not allowed in ISO C89");
1025 cpp_pedwarn (pfile,
1026 "(this will be reported only once per input file)");
1027 buffer->warned_cplusplus_comments = 1;
1028 }
0d9f234d 1029
a94c1199 1030 /* Skip_line_comment updates buffer->read_ahead. */
01ef6563 1031 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
93c80368
NB
1032 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1033 pfile->lexer_pos.col,
1c6d33ef
NB
1034 "multi-line comment");
1035 }
0d9f234d 1036
1c6d33ef
NB
1037 /* Skipping the comment has updated buffer->read_ahead. */
1038 if (!pfile->state.save_comments)
1039 {
1040 result->flags |= PREV_WHITE;
1041 goto next_char;
0d9f234d 1042 }
1c6d33ef
NB
1043
1044 /* Save the comment as a token in its own right. */
1045 save_comment (pfile, result, comment_start);
29b10746
NB
1046 /* Don't do MI optimisation. */
1047 return;
0d9f234d
NB
1048
1049 case '<':
1050 if (pfile->state.angled_headers)
1051 {
1052 result->type = CPP_HEADER_NAME;
1053 c = '>'; /* terminator. */
1054 goto make_string;
1055 }
45b966db 1056
0d9f234d
NB
1057 result->type = CPP_LESS;
1058 c = get_effective_char (buffer);
1059 if (c == '=')
1060 ACCEPT_CHAR (CPP_LESS_EQ);
1061 else if (c == '<')
1062 {
1063 ACCEPT_CHAR (CPP_LSHIFT);
1064 if (get_effective_char (buffer) == '=')
1065 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1066 }
1067 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1068 {
1069 ACCEPT_CHAR (CPP_MIN);
1070 if (get_effective_char (buffer) == '=')
1071 ACCEPT_CHAR (CPP_MIN_EQ);
1072 }
1073 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1074 {
1075 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1076 result->flags |= DIGRAPH;
1077 }
1078 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1079 {
1080 ACCEPT_CHAR (CPP_OPEN_BRACE);
1081 result->flags |= DIGRAPH;
1082 }
1083 break;
1084
1085 case '>':
1086 result->type = CPP_GREATER;
1087 c = get_effective_char (buffer);
1088 if (c == '=')
1089 ACCEPT_CHAR (CPP_GREATER_EQ);
1090 else if (c == '>')
1091 {
1092 ACCEPT_CHAR (CPP_RSHIFT);
1093 if (get_effective_char (buffer) == '=')
1094 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1095 }
1096 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1097 {
1098 ACCEPT_CHAR (CPP_MAX);
1099 if (get_effective_char (buffer) == '=')
1100 ACCEPT_CHAR (CPP_MAX_EQ);
1101 }
1102 break;
1103
cbcff6df
NB
1104 case '%':
1105 lex_percent (buffer, result);
93c80368
NB
1106 if (result->type == CPP_HASH)
1107 goto do_hash;
0d9f234d
NB
1108 break;
1109
cbcff6df
NB
1110 case '.':
1111 lex_dot (pfile, result);
0d9f234d 1112 break;
45b966db 1113
0d9f234d
NB
1114 case '+':
1115 result->type = CPP_PLUS;
1116 c = get_effective_char (buffer);
1117 if (c == '=')
1118 ACCEPT_CHAR (CPP_PLUS_EQ);
1119 else if (c == '+')
1120 ACCEPT_CHAR (CPP_PLUS_PLUS);
1121 break;
04e3ec78 1122
0d9f234d
NB
1123 case '-':
1124 result->type = CPP_MINUS;
1125 c = get_effective_char (buffer);
1126 if (c == '>')
1127 {
1128 ACCEPT_CHAR (CPP_DEREF);
1129 if (CPP_OPTION (pfile, cplusplus)
1130 && get_effective_char (buffer) == '*')
1131 ACCEPT_CHAR (CPP_DEREF_STAR);
1132 }
1133 else if (c == '=')
1134 ACCEPT_CHAR (CPP_MINUS_EQ);
1135 else if (c == '-')
1136 ACCEPT_CHAR (CPP_MINUS_MINUS);
1137 break;
45b966db 1138
0d9f234d
NB
1139 case '*':
1140 result->type = CPP_MULT;
1141 if (get_effective_char (buffer) == '=')
1142 ACCEPT_CHAR (CPP_MULT_EQ);
1143 break;
04e3ec78 1144
0d9f234d
NB
1145 case '=':
1146 result->type = CPP_EQ;
1147 if (get_effective_char (buffer) == '=')
1148 ACCEPT_CHAR (CPP_EQ_EQ);
1149 break;
f8f769ea 1150
0d9f234d
NB
1151 case '!':
1152 result->type = CPP_NOT;
1153 if (get_effective_char (buffer) == '=')
1154 ACCEPT_CHAR (CPP_NOT_EQ);
1155 break;
45b966db 1156
0d9f234d
NB
1157 case '&':
1158 result->type = CPP_AND;
1159 c = get_effective_char (buffer);
1160 if (c == '=')
1161 ACCEPT_CHAR (CPP_AND_EQ);
1162 else if (c == '&')
1163 ACCEPT_CHAR (CPP_AND_AND);
1164 break;
1165
1166 case '#':
adb84b42
NB
1167 c = buffer->extra_char; /* Can be set by error condition below. */
1168 if (c != EOF)
1169 {
1170 buffer->read_ahead = c;
1171 buffer->extra_char = EOF;
1172 }
1173 else
1174 c = get_effective_char (buffer);
1175
1176 if (c == '#')
93c80368 1177 {
a949941c
NB
1178 ACCEPT_CHAR (CPP_PASTE);
1179 break;
1180 }
1181
1182 result->type = CPP_HASH;
1183 do_hash:
e8408f25
NB
1184 if (!bol)
1185 break;
1186 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1187 tokens within the list of arguments that would otherwise act
1188 as preprocessing directives, the behavior is undefined.
1189
1190 This implementation will report a hard error, terminate the
1191 macro invocation, and proceed to process the directive. */
1192 if (pfile->state.parsing_args)
a949941c 1193 {
e8408f25
NB
1194 if (pfile->state.parsing_args == 2)
1195 cpp_error (pfile,
1196 "directives may not be used inside a macro argument");
1197
1198 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1199 buffer->extra_char = buffer->read_ahead;
1200 buffer->read_ahead = '#';
1201 pfile->state.next_bol = 1;
1202 result->type = CPP_EOF;
1203
1204 /* Get whitespace right - newline_in_args sets it. */
1205 if (pfile->lexer_pos.col == 1)
1206 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1207 }
1208 else
1209 {
1210 /* This is the hash introducing a directive. */
1211 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1212 goto done_directive; /* bol still 1. */
1213 /* This is in fact an assembler #. */
93c80368 1214 }
0d9f234d 1215 break;
45b966db 1216
0d9f234d
NB
1217 case '|':
1218 result->type = CPP_OR;
1219 c = get_effective_char (buffer);
1220 if (c == '=')
1221 ACCEPT_CHAR (CPP_OR_EQ);
1222 else if (c == '|')
1223 ACCEPT_CHAR (CPP_OR_OR);
1224 break;
45b966db 1225
0d9f234d
NB
1226 case '^':
1227 result->type = CPP_XOR;
1228 if (get_effective_char (buffer) == '=')
1229 ACCEPT_CHAR (CPP_XOR_EQ);
1230 break;
45b966db 1231
0d9f234d
NB
1232 case ':':
1233 result->type = CPP_COLON;
1234 c = get_effective_char (buffer);
1235 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1236 ACCEPT_CHAR (CPP_SCOPE);
1237 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1238 {
1239 result->flags |= DIGRAPH;
1240 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1241 }
1242 break;
45b966db 1243
0d9f234d
NB
1244 case '~': result->type = CPP_COMPL; break;
1245 case ',': result->type = CPP_COMMA; break;
1246 case '(': result->type = CPP_OPEN_PAREN; break;
1247 case ')': result->type = CPP_CLOSE_PAREN; break;
1248 case '[': result->type = CPP_OPEN_SQUARE; break;
1249 case ']': result->type = CPP_CLOSE_SQUARE; break;
1250 case '{': result->type = CPP_OPEN_BRACE; break;
1251 case '}': result->type = CPP_CLOSE_BRACE; break;
1252 case ';': result->type = CPP_SEMICOLON; break;
1253
cc937581
ZW
1254 /* @ is a punctuator in Objective C. */
1255 case '@': result->type = CPP_ATSIGN; break;
0d9f234d
NB
1256
1257 random_char:
1258 default:
1259 result->type = CPP_OTHER;
6c53ebff 1260 result->val.c = c;
0d9f234d
NB
1261 break;
1262 }
b528a07e 1263
cef0d199 1264 if (!pfile->state.in_directive && pfile->state.skipping)
8d9e9a08
NB
1265 goto skip;
1266
29b10746
NB
1267 /* If not in a directive, this token invalidates controlling macros. */
1268 if (!pfile->state.in_directive)
6d18adbc 1269 pfile->mi_valid = false;
0d9f234d
NB
1270}
1271
93c80368
NB
1272/* An upper bound on the number of bytes needed to spell a token,
1273 including preceding whitespace. */
1274unsigned int
1275cpp_token_len (token)
1276 const cpp_token *token;
0d9f234d 1277{
93c80368 1278 unsigned int len;
6d2c2047 1279
93c80368 1280 switch (TOKEN_SPELL (token))
041c3194 1281 {
a28c5035
NB
1282 default: len = 0; break;
1283 case SPELL_STRING: len = token->val.str.len; break;
1284 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1285 }
93c80368
NB
1286 /* 1 for whitespace, 4 for comment delimeters. */
1287 return len + 5;
6d2c2047
ZW
1288}
1289
041c3194 1290/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1291 already contain the enough space to hold the token's spelling.
1292 Returns a pointer to the character after the last character
1293 written. */
93c80368
NB
1294unsigned char *
1295cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1296 cpp_reader *pfile; /* Would be nice to be rid of this... */
1297 const cpp_token *token;
1298 unsigned char *buffer;
1299{
96be6998 1300 switch (TOKEN_SPELL (token))
041c3194
ZW
1301 {
1302 case SPELL_OPERATOR:
1303 {
1304 const unsigned char *spelling;
1305 unsigned char c;
d6d5f795 1306
041c3194 1307 if (token->flags & DIGRAPH)
37b8524c
JDA
1308 spelling
1309 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1310 else if (token->flags & NAMED_OP)
1311 goto spell_ident;
041c3194 1312 else
96be6998 1313 spelling = TOKEN_NAME (token);
041c3194
ZW
1314
1315 while ((c = *spelling++) != '\0')
1316 *buffer++ = c;
1317 }
1318 break;
d6d5f795 1319
041c3194 1320 case SPELL_IDENT:
92936ecf 1321 spell_ident:
a28c5035
NB
1322 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1323 buffer += NODE_LEN (token->val.node);
041c3194 1324 break;
d6d5f795 1325
041c3194
ZW
1326 case SPELL_STRING:
1327 {
ba89d661
ZW
1328 int left, right, tag;
1329 switch (token->type)
1330 {
1331 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1332 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1333 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1334 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1335 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1336 default: left = '\0'; right = '\0'; tag = '\0'; break;
1337 }
1338 if (tag) *buffer++ = tag;
1339 if (left) *buffer++ = left;
bfb9dc7f
ZW
1340 memcpy (buffer, token->val.str.text, token->val.str.len);
1341 buffer += token->val.str.len;
ba89d661 1342 if (right) *buffer++ = right;
041c3194
ZW
1343 }
1344 break;
d6d5f795 1345
041c3194 1346 case SPELL_CHAR:
6c53ebff 1347 *buffer++ = token->val.c;
041c3194 1348 break;
d6d5f795 1349
041c3194 1350 case SPELL_NONE:
96be6998 1351 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1352 break;
1353 }
d6d5f795 1354
041c3194
ZW
1355 return buffer;
1356}
d6d5f795 1357
93c80368
NB
1358/* Returns a token as a null-terminated string. The string is
1359 temporary, and automatically freed later. Useful for diagnostics. */
1360unsigned char *
1361cpp_token_as_text (pfile, token)
c5a04734 1362 cpp_reader *pfile;
041c3194 1363 const cpp_token *token;
c5a04734 1364{
93c80368 1365 unsigned int len = cpp_token_len (token);
49fe13f6 1366 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
c5a04734 1367
93c80368
NB
1368 end = cpp_spell_token (pfile, token, start);
1369 end[0] = '\0';
c5a04734 1370
93c80368
NB
1371 return start;
1372}
c5a04734 1373
93c80368
NB
1374/* Used by C front ends. Should really move to using cpp_token_as_text. */
1375const char *
1376cpp_type2name (type)
1377 enum cpp_ttype type;
1378{
1379 return (const char *) token_spellings[type].name;
1380}
c5a04734 1381
93c80368
NB
1382/* Writes the spelling of token to FP. Separate from cpp_spell_token
1383 for efficiency - to avoid double-buffering. Also, outputs a space
1384 if PREV_WHITE is flagged. */
1385void
1386cpp_output_token (token, fp)
1387 const cpp_token *token;
1388 FILE *fp;
1389{
1390 if (token->flags & PREV_WHITE)
1391 putc (' ', fp);
d8090680 1392
93c80368 1393 switch (TOKEN_SPELL (token))
c5a04734 1394 {
93c80368
NB
1395 case SPELL_OPERATOR:
1396 {
1397 const unsigned char *spelling;
c5a04734 1398
93c80368 1399 if (token->flags & DIGRAPH)
37b8524c
JDA
1400 spelling
1401 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1402 else if (token->flags & NAMED_OP)
1403 goto spell_ident;
1404 else
1405 spelling = TOKEN_NAME (token);
041c3194 1406
93c80368
NB
1407 ufputs (spelling, fp);
1408 }
1409 break;
041c3194 1410
93c80368
NB
1411 spell_ident:
1412 case SPELL_IDENT:
a28c5035 1413 ufputs (NODE_NAME (token->val.node), fp);
93c80368 1414 break;
041c3194 1415
93c80368
NB
1416 case SPELL_STRING:
1417 {
1418 int left, right, tag;
1419 switch (token->type)
1420 {
1421 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1422 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1423 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1424 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1425 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1426 default: left = '\0'; right = '\0'; tag = '\0'; break;
1427 }
1428 if (tag) putc (tag, fp);
1429 if (left) putc (left, fp);
1430 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1431 if (right) putc (right, fp);
1432 }
1433 break;
c5a04734 1434
93c80368 1435 case SPELL_CHAR:
6c53ebff 1436 putc (token->val.c, fp);
93c80368 1437 break;
c5a04734 1438
93c80368
NB
1439 case SPELL_NONE:
1440 /* An error, most probably. */
1441 break;
041c3194 1442 }
c5a04734
ZW
1443}
1444
93c80368
NB
1445/* Compare two tokens. */
1446int
1447_cpp_equiv_tokens (a, b)
1448 const cpp_token *a, *b;
c5a04734 1449{
93c80368
NB
1450 if (a->type == b->type && a->flags == b->flags)
1451 switch (TOKEN_SPELL (a))
1452 {
1453 default: /* Keep compiler happy. */
1454 case SPELL_OPERATOR:
1455 return 1;
1456 case SPELL_CHAR:
6c53ebff 1457 return a->val.c == b->val.c; /* Character. */
93c80368 1458 case SPELL_NONE:
56051c0a 1459 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1460 case SPELL_IDENT:
1461 return a->val.node == b->val.node;
1462 case SPELL_STRING:
1463 return (a->val.str.len == b->val.str.len
1464 && !memcmp (a->val.str.text, b->val.str.text,
1465 a->val.str.len));
1466 }
c5a04734 1467
041c3194
ZW
1468 return 0;
1469}
1470
041c3194
ZW
1471/* Determine whether two tokens can be pasted together, and if so,
1472 what the resulting token is. Returns CPP_EOF if the tokens cannot
1473 be pasted, or the appropriate type for the merged token if they
1474 can. */
7de4d004 1475enum cpp_ttype
93c80368 1476cpp_can_paste (pfile, token1, token2, digraph)
041c3194
ZW
1477 cpp_reader * pfile;
1478 const cpp_token *token1, *token2;
1479 int* digraph;
c5a04734 1480{
041c3194
ZW
1481 enum cpp_ttype a = token1->type, b = token2->type;
1482 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 1483
92936ecf
ZW
1484 /* Treat named operators as if they were ordinary NAMEs. */
1485 if (token1->flags & NAMED_OP)
1486 a = CPP_NAME;
1487 if (token2->flags & NAMED_OP)
1488 b = CPP_NAME;
1489
37b8524c
JDA
1490 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1491 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
c5a04734 1492
041c3194 1493 switch (a)
c5a04734 1494 {
041c3194
ZW
1495 case CPP_GREATER:
1496 if (b == a) return CPP_RSHIFT;
1497 if (b == CPP_QUERY && cxx) return CPP_MAX;
1498 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1499 break;
1500 case CPP_LESS:
1501 if (b == a) return CPP_LSHIFT;
1502 if (b == CPP_QUERY && cxx) return CPP_MIN;
1503 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
1504 if (CPP_OPTION (pfile, digraphs))
1505 {
1506 if (b == CPP_COLON)
1507 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1508 if (b == CPP_MOD)
1509 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1510 }
041c3194 1511 break;
c5a04734 1512
041c3194
ZW
1513 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1514 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1515 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 1516
041c3194
ZW
1517 case CPP_MINUS:
1518 if (b == a) return CPP_MINUS_MINUS;
1519 if (b == CPP_GREATER) return CPP_DEREF;
1520 break;
1521 case CPP_COLON:
1522 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 1523 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1524 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1525 break;
1526
1527 case CPP_MOD:
9b55f29a
NB
1528 if (CPP_OPTION (pfile, digraphs))
1529 {
1530 if (b == CPP_GREATER)
1531 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1532 if (b == CPP_COLON)
1533 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1534 }
041c3194
ZW
1535 break;
1536 case CPP_DEREF:
1537 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1538 break;
1539 case CPP_DOT:
1540 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1541 if (b == CPP_NUMBER) return CPP_NUMBER;
1542 break;
1543
1544 case CPP_HASH:
1545 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1546 /* %:%: digraph */
1547 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1548 break;
1549
1550 case CPP_NAME:
1551 if (b == CPP_NAME) return CPP_NAME;
1552 if (b == CPP_NUMBER
93c80368 1553 && name_p (pfile, &token2->val.str)) return CPP_NAME;
041c3194 1554 if (b == CPP_CHAR
93c80368 1555 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
041c3194 1556 if (b == CPP_STRING
93c80368 1557 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
041c3194
ZW
1558 break;
1559
1560 case CPP_NUMBER:
1561 if (b == CPP_NUMBER) return CPP_NUMBER;
1562 if (b == CPP_NAME) return CPP_NUMBER;
1563 if (b == CPP_DOT) return CPP_NUMBER;
1564 /* Numbers cannot have length zero, so this is safe. */
1565 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 1566 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
1567 return CPP_NUMBER;
1568 break;
1569
1570 default:
1571 break;
c5a04734
ZW
1572 }
1573
041c3194
ZW
1574 return CPP_EOF;
1575}
1576
93c80368
NB
1577/* Returns nonzero if a space should be inserted to avoid an
1578 accidental token paste for output. For simplicity, it is
1579 conservative, and occasionally advises a space where one is not
1580 needed, e.g. "." and ".2". */
041c3194 1581
93c80368
NB
1582int
1583cpp_avoid_paste (pfile, token1, token2)
c5a04734 1584 cpp_reader *pfile;
93c80368 1585 const cpp_token *token1, *token2;
c5a04734 1586{
93c80368
NB
1587 enum cpp_ttype a = token1->type, b = token2->type;
1588 cppchar_t c;
c5a04734 1589
93c80368
NB
1590 if (token1->flags & NAMED_OP)
1591 a = CPP_NAME;
1592 if (token2->flags & NAMED_OP)
1593 b = CPP_NAME;
c5a04734 1594
93c80368
NB
1595 c = EOF;
1596 if (token2->flags & DIGRAPH)
37b8524c 1597 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1598 else if (token_spellings[b].category == SPELL_OPERATOR)
1599 c = token_spellings[b].name[0];
c5a04734 1600
93c80368 1601 /* Quickly get everything that can paste with an '='. */
37b8524c 1602 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1603 return 1;
c5a04734 1604
93c80368 1605 switch (a)
c5a04734 1606 {
93c80368
NB
1607 case CPP_GREATER: return c == '>' || c == '?';
1608 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1609 case CPP_PLUS: return c == '+';
1610 case CPP_MINUS: return c == '-' || c == '>';
1611 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1612 case CPP_MOD: return c == ':' || c == '>';
1613 case CPP_AND: return c == '&';
1614 case CPP_OR: return c == '|';
1615 case CPP_COLON: return c == ':' || c == '>';
1616 case CPP_DEREF: return c == '*';
26ec42ee 1617 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1618 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1619 case CPP_NAME: return ((b == CPP_NUMBER
1620 && name_p (pfile, &token2->val.str))
1621 || b == CPP_NAME
1622 || b == CPP_CHAR || b == CPP_STRING); /* L */
1623 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1624 || c == '.' || c == '+' || c == '-');
1625 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1626 && token1->val.c == '@'
93c80368
NB
1627 && (b == CPP_NAME || b == CPP_STRING));
1628 default: break;
c5a04734 1629 }
c5a04734 1630
417f3e3a 1631 return 0;
c5a04734
ZW
1632}
1633
93c80368
NB
1634/* Output all the remaining tokens on the current line, and a newline
1635 character, to FP. Leading whitespace is removed. */
c5a04734 1636void
93c80368 1637cpp_output_line (pfile, fp)
c5a04734 1638 cpp_reader *pfile;
93c80368 1639 FILE *fp;
c5a04734 1640{
93c80368 1641 cpp_token token;
96be6998 1642
7f2f1a66 1643 cpp_get_token (pfile, &token);
93c80368
NB
1644 token.flags &= ~PREV_WHITE;
1645 while (token.type != CPP_EOF)
96be6998 1646 {
93c80368 1647 cpp_output_token (&token, fp);
7f2f1a66 1648 cpp_get_token (pfile, &token);
96be6998
ZW
1649 }
1650
93c80368 1651 putc ('\n', fp);
041c3194 1652}
c5a04734 1653
c8a96070
NB
1654/* Returns the value of a hexadecimal digit. */
1655static unsigned int
1656hex_digit_value (c)
1657 unsigned int c;
1658{
1659 if (c >= 'a' && c <= 'f')
1660 return c - 'a' + 10;
1661 if (c >= 'A' && c <= 'F')
1662 return c - 'A' + 10;
1663 if (c >= '0' && c <= '9')
1664 return c - '0';
1665 abort ();
1666}
1667
62729350
NB
1668/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1669 failure if cpplib is not parsing C++ or C99. Such failure is
1670 silent, and no variables are updated. Otherwise returns 0, and
1671 warns if -Wtraditional.
c8a96070
NB
1672
1673 [lex.charset]: The character designated by the universal character
1674 name \UNNNNNNNN is that character whose character short name in
1675 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1676 universal character name \uNNNN is that character whose character
1677 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1678 for a universal character name is less than 0x20 or in the range
1679 0x7F-0x9F (inclusive), or if the universal character name
1680 designates a character in the basic source character set, then the
1681 program is ill-formed.
1682
1683 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1684 mapping. Is this ever wrong?
c8a96070 1685
62729350
NB
1686 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1687 LIMIT is the end of the string or charconst. PSTR is updated to
1688 point after the UCS on return, and the UCS is written into PC. */
1689
1690static int
1691maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1692 cpp_reader *pfile;
1693 const unsigned char **pstr;
1694 const unsigned char *limit;
62729350 1695 unsigned int *pc;
c8a96070
NB
1696{
1697 const unsigned char *p = *pstr;
62729350
NB
1698 unsigned int code = 0;
1699 unsigned int c = *pc, length;
1700
1701 /* Only attempt to interpret a UCS for C++ and C99. */
1702 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1703 return 1;
c8a96070 1704
62729350
NB
1705 if (CPP_WTRADITIONAL (pfile))
1706 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
c8a96070 1707
f8710242
NB
1708 length = (c == 'u' ? 4: 8);
1709
1710 if ((size_t) (limit - p) < length)
1711 {
1712 cpp_error (pfile, "incomplete universal-character-name");
1713 /* Skip to the end to avoid more diagnostics. */
1714 p = limit;
1715 }
1716 else
1717 {
1718 for (; length; length--, p++)
c8a96070 1719 {
f8710242
NB
1720 c = *p;
1721 if (ISXDIGIT (c))
1722 code = (code << 4) + hex_digit_value (c);
1723 else
1724 {
1725 cpp_error (pfile,
1726 "non-hex digit '%c' in universal-character-name", c);
1727 /* We shouldn't skip in case there are multibyte chars. */
1728 break;
1729 }
c8a96070 1730 }
c8a96070
NB
1731 }
1732
1733#ifdef TARGET_EBCDIC
1734 cpp_error (pfile, "universal-character-name on EBCDIC target");
1735 code = 0x3f; /* EBCDIC invalid character */
1736#else
f8710242
NB
1737 /* True extended characters are OK. */
1738 if (code >= 0xa0
1739 && !(code & 0x80000000)
1740 && !(code >= 0xD800 && code <= 0xDFFF))
1741 ;
1742 /* The standard permits $, @ and ` to be specified as UCNs. We use
1743 hex escapes so that this also works with EBCDIC hosts. */
1744 else if (code == 0x24 || code == 0x40 || code == 0x60)
1745 ;
1746 /* Don't give another error if one occurred above. */
1747 else if (length == 0)
1748 cpp_error (pfile, "universal-character-name out of range");
c8a96070
NB
1749#endif
1750
1751 *pstr = p;
62729350
NB
1752 *pc = code;
1753 return 0;
c8a96070
NB
1754}
1755
1756/* Interpret an escape sequence, and return its value. PSTR points to
1757 the input pointer, which is just after the backslash. LIMIT is how
62729350
NB
1758 much text we have. MASK is a bitmask for the precision for the
1759 destination type (char or wchar_t). TRADITIONAL, if true, does not
1760 interpret escapes that did not exist in traditional C.
c8a96070 1761
62729350
NB
1762 Handles all relevant diagnostics. */
1763
1764unsigned int
1765cpp_parse_escape (pfile, pstr, limit, mask, traditional)
c8a96070
NB
1766 cpp_reader *pfile;
1767 const unsigned char **pstr;
1768 const unsigned char *limit;
62729350 1769 unsigned HOST_WIDE_INT mask;
c8a96070
NB
1770 int traditional;
1771{
1772 int unknown = 0;
1773 const unsigned char *str = *pstr;
1774 unsigned int c = *str++;
1775
1776 switch (c)
1777 {
1778 case '\\': case '\'': case '"': case '?': break;
1779 case 'b': c = TARGET_BS; break;
1780 case 'f': c = TARGET_FF; break;
1781 case 'n': c = TARGET_NEWLINE; break;
1782 case 'r': c = TARGET_CR; break;
1783 case 't': c = TARGET_TAB; break;
1784 case 'v': c = TARGET_VT; break;
1785
1786 case '(': case '{': case '[': case '%':
1787 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1788 '\%' is used to prevent SCCS from getting confused. */
1789 unknown = CPP_PEDANTIC (pfile);
1790 break;
1791
1792 case 'a':
1793 if (CPP_WTRADITIONAL (pfile))
1794 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1795 if (!traditional)
1796 c = TARGET_BELL;
1797 break;
1798
1799 case 'e': case 'E':
1800 if (CPP_PEDANTIC (pfile))
1801 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1802 c = TARGET_ESC;
1803 break;
1804
c8a96070 1805 case 'u': case 'U':
62729350 1806 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1807 break;
1808
1809 case 'x':
1810 if (CPP_WTRADITIONAL (pfile))
1811 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1812
1813 if (!traditional)
1814 {
1815 unsigned int i = 0, overflow = 0;
1816 int digits_found = 0;
1817
1818 while (str < limit)
1819 {
1820 c = *str;
1821 if (! ISXDIGIT (c))
1822 break;
1823 str++;
1824 overflow |= i ^ (i << 4 >> 4);
1825 i = (i << 4) + hex_digit_value (c);
1826 digits_found = 1;
1827 }
1828
1829 if (!digits_found)
1830 cpp_error (pfile, "\\x used with no following hex digits");
1831
1832 if (overflow | (i != (i & mask)))
1833 {
1834 cpp_pedwarn (pfile, "hex escape sequence out of range");
1835 i &= mask;
1836 }
1837 c = i;
1838 }
1839 break;
1840
1841 case '0': case '1': case '2': case '3':
1842 case '4': case '5': case '6': case '7':
1843 {
1844 unsigned int i = c - '0';
1845 int count = 0;
1846
1847 while (str < limit && ++count < 3)
1848 {
1849 c = *str;
1850 if (c < '0' || c > '7')
1851 break;
1852 str++;
1853 i = (i << 3) + c - '0';
1854 }
1855
1856 if (i != (i & mask))
1857 {
1858 cpp_pedwarn (pfile, "octal escape sequence out of range");
1859 i &= mask;
1860 }
1861 c = i;
1862 }
1863 break;
1864
1865 default:
1866 unknown = 1;
1867 break;
1868 }
1869
1870 if (unknown)
1871 {
1872 if (ISGRAPH (c))
1873 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1874 else
1875 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1876 }
1877
62729350
NB
1878 if (c > mask)
1879 cpp_pedwarn (pfile, "escape sequence out of range for character");
1880
c8a96070
NB
1881 *pstr = str;
1882 return c;
1883}
1884
1885#ifndef MAX_CHAR_TYPE_SIZE
1886#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1887#endif
1888
1889#ifndef MAX_WCHAR_TYPE_SIZE
1890#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1891#endif
1892
1893/* Interpret a (possibly wide) character constant in TOKEN.
1894 WARN_MULTI warns about multi-character charconsts, if not
1895 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1896 that did not exist in traditional C. PCHARS_SEEN points to a
1897 variable that is filled in with the number of characters seen. */
1898HOST_WIDE_INT
1899cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1900 cpp_reader *pfile;
1901 const cpp_token *token;
1902 int warn_multi;
1903 int traditional;
1904 unsigned int *pchars_seen;
1905{
1906 const unsigned char *str = token->val.str.text;
1907 const unsigned char *limit = str + token->val.str.len;
1908 unsigned int chars_seen = 0;
1909 unsigned int width, max_chars, c;
2a967f3d
NB
1910 unsigned HOST_WIDE_INT mask;
1911 HOST_WIDE_INT result = 0;
c8a96070
NB
1912
1913#ifdef MULTIBYTE_CHARS
1914 (void) local_mbtowc (NULL, NULL, 0);
1915#endif
1916
1917 /* Width in bits. */
1918 if (token->type == CPP_CHAR)
1919 width = MAX_CHAR_TYPE_SIZE;
1920 else
1921 width = MAX_WCHAR_TYPE_SIZE;
1922
1923 if (width < HOST_BITS_PER_WIDE_INT)
1924 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1925 else
1926 mask = ~0;
1927 max_chars = HOST_BITS_PER_WIDE_INT / width;
1928
1929 while (str < limit)
1930 {
1931#ifdef MULTIBYTE_CHARS
1932 wchar_t wc;
1933 int char_len;
1934
1935 char_len = local_mbtowc (&wc, str, limit - str);
1936 if (char_len == -1)
1937 {
1938 cpp_warning (pfile, "ignoring invalid multibyte character");
1939 c = *str++;
1940 }
1941 else
1942 {
1943 str += char_len;
1944 c = wc;
1945 }
1946#else
1947 c = *str++;
1948#endif
1949
1950 if (c == '\\')
62729350 1951 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
c8a96070
NB
1952
1953#ifdef MAP_CHARACTER
1954 if (ISPRINT (c))
1955 c = MAP_CHARACTER (c);
1956#endif
1957
1958 /* Merge character into result; ignore excess chars. */
1959 if (++chars_seen <= max_chars)
1960 {
1961 if (width < HOST_BITS_PER_WIDE_INT)
1962 result = (result << width) | (c & mask);
1963 else
1964 result = c;
1965 }
1966 }
1967
1968 if (chars_seen == 0)
1969 cpp_error (pfile, "empty character constant");
1970 else if (chars_seen > max_chars)
1971 {
1972 chars_seen = max_chars;
f8710242 1973 cpp_warning (pfile, "character constant too long");
c8a96070
NB
1974 }
1975 else if (chars_seen > 1 && !traditional && warn_multi)
1976 cpp_warning (pfile, "multi-character character constant");
1977
1978 /* If char type is signed, sign-extend the constant. The
1979 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1980 if (token->type == CPP_CHAR && chars_seen)
1981 {
1982 unsigned int nbits = chars_seen * width;
1983 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1984
1985 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1986 || ((result >> (nbits - 1)) & 1) == 0)
1987 result &= mask;
1988 else
1989 result |= ~mask;
1990 }
1991
1992 *pchars_seen = chars_seen;
1993 return result;
1994}
1995
93c80368 1996/* Memory pools. */
417f3e3a 1997
93c80368 1998struct dummy
417f3e3a 1999{
93c80368
NB
2000 char c;
2001 union
2002 {
2003 double d;
2004 int *p;
2005 } u;
2006};
417f3e3a 2007
93c80368 2008#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
417f3e3a 2009
93c80368
NB
2010static int
2011chunk_suitable (pool, chunk, size)
2012 cpp_pool *pool;
2013 cpp_chunk *chunk;
2014 unsigned int size;
2015{
2016 /* Being at least twice SIZE means we can use memcpy in
2017 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2018 anyway. */
2019 return (chunk && pool->locked != chunk
2020 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
041c3194 2021}
c5a04734 2022
93c80368
NB
2023/* Returns the end of the new pool. PTR points to a char in the old
2024 pool, and is updated to point to the same char in the new pool. */
2025unsigned char *
2026_cpp_next_chunk (pool, len, ptr)
2027 cpp_pool *pool;
2028 unsigned int len;
2029 unsigned char **ptr;
041c3194 2030{
93c80368 2031 cpp_chunk *chunk = pool->cur->next;
c5a04734 2032
93c80368
NB
2033 /* LEN is the minimum size we want in the new pool. */
2034 len += POOL_ROOM (pool);
2035 if (! chunk_suitable (pool, chunk, len))
041c3194 2036 {
93c80368 2037 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
c5a04734 2038
93c80368
NB
2039 chunk->next = pool->cur->next;
2040 pool->cur->next = chunk;
c5a04734
ZW
2041 }
2042
93c80368
NB
2043 /* Update the pointer before changing chunk's front. */
2044 if (ptr)
2045 *ptr += chunk->base - POOL_FRONT (pool);
041c3194 2046
93c80368
NB
2047 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2048 chunk->front = chunk->base;
041c3194 2049
93c80368
NB
2050 pool->cur = chunk;
2051 return POOL_LIMIT (pool);
c5a04734
ZW
2052}
2053
93c80368
NB
2054static cpp_chunk *
2055new_chunk (size)
2056 unsigned int size;
041c3194 2057{
93c80368
NB
2058 unsigned char *base;
2059 cpp_chunk *result;
3fef5b2b 2060
269592a8 2061 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
93c80368
NB
2062 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2063 /* Put the chunk descriptor at the end. Then chunk overruns will
2064 cause obvious chaos. */
2065 result = (cpp_chunk *) (base + size);
2066 result->base = base;
2067 result->front = base;
2068 result->limit = base + size;
2069 result->next = 0;
417f3e3a 2070
93c80368 2071 return result;
041c3194
ZW
2072}
2073
93c80368
NB
2074void
2075_cpp_init_pool (pool, size, align, temp)
2076 cpp_pool *pool;
2077 unsigned int size, align, temp;
2078{
2079 if (align == 0)
2080 align = DEFAULT_ALIGNMENT;
2081 if (align & (align - 1))
2082 abort ();
2083 pool->align = align;
2084 pool->cur = new_chunk (size);
2085 pool->locked = 0;
2086 pool->locks = 0;
2087 if (temp)
2088 pool->cur->next = pool->cur;
041c3194
ZW
2089}
2090
93c80368
NB
2091void
2092_cpp_lock_pool (pool)
2093 cpp_pool *pool;
041c3194 2094{
93c80368
NB
2095 if (pool->locks++ == 0)
2096 pool->locked = pool->cur;
041c3194
ZW
2097}
2098
93c80368
NB
2099void
2100_cpp_unlock_pool (pool)
2101 cpp_pool *pool;
041c3194 2102{
93c80368
NB
2103 if (--pool->locks == 0)
2104 pool->locked = 0;
041c3194
ZW
2105}
2106
93c80368
NB
2107void
2108_cpp_free_pool (pool)
2109 cpp_pool *pool;
3fef5b2b 2110{
93c80368 2111 cpp_chunk *chunk = pool->cur, *next;
3fef5b2b 2112
93c80368 2113 do
3fef5b2b 2114 {
93c80368
NB
2115 next = chunk->next;
2116 free (chunk->base);
2117 chunk = next;
3fef5b2b 2118 }
93c80368 2119 while (chunk && chunk != pool->cur);
041c3194 2120}
041c3194 2121
93c80368
NB
2122/* Reserve LEN bytes from a memory pool. */
2123unsigned char *
2124_cpp_pool_reserve (pool, len)
2125 cpp_pool *pool;
2126 unsigned int len;
041c3194 2127{
269592a8 2128 len = POOL_ALIGN (len, pool->align);
93c80368
NB
2129 if (len > (unsigned int) POOL_ROOM (pool))
2130 _cpp_next_chunk (pool, len, 0);
041c3194 2131
93c80368 2132 return POOL_FRONT (pool);
c5a04734
ZW
2133}
2134
93c80368
NB
2135/* Allocate LEN bytes from a memory pool. */
2136unsigned char *
2137_cpp_pool_alloc (pool, len)
2138 cpp_pool *pool;
2139 unsigned int len;
041c3194 2140{
93c80368 2141 unsigned char *result = _cpp_pool_reserve (pool, len);
417f3e3a 2142
93c80368
NB
2143 POOL_COMMIT (pool, len);
2144 return result;
041c3194 2145}
This page took 0.633014 seconds and 5 git commands to generate.