]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
gcse.c (replace_store_insn): Use delete_insn.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
93c80368
NB
23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
041c3194 36
45b966db
ZW
37#include "config.h"
38#include "system.h"
45b966db
ZW
39#include "cpplib.h"
40#include "cpphash.h"
41
c8a96070
NB
42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
93c80368
NB
54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
f9a0e96c 57{
93c80368
NB
58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
f9a0e96c
ZW
63};
64
93c80368 65struct token_spelling
f9a0e96c 66{
93c80368
NB
67 enum spell_type category;
68 const unsigned char *name;
f9a0e96c
ZW
69};
70
93c80368
NB
71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 82
1444f2ed 83static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
29401c30
NB
84static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 86
041c3194 87static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 88static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d
NB
89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6
ZW
91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
93 const U_CHAR *));
93c80368
NB
94static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 96static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 97static void unterminated PARAMS ((cpp_reader *, int));
0d9f234d
NB
98static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
29401c30 100static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
cbcff6df 101static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
93c80368 102static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350
NB
103static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
5fddcffc 105static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 106
93c80368 107static cpp_chunk *new_chunk PARAMS ((unsigned int));
1e013d2e 108static int chunk_suitable PARAMS ((cpp_chunk *, unsigned int));
c8a96070 109static unsigned int hex_digit_value PARAMS ((unsigned int));
b8af0ca5 110static _cpp_buff *new_buff PARAMS ((unsigned int));
15dad1d9 111
041c3194 112/* Utility routine:
9e62c811 113
bfb9dc7f
ZW
114 Compares, the token TOKEN to the NUL-terminated string STRING.
115 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 116
041c3194 117int
bfb9dc7f
ZW
118cpp_ideq (token, string)
119 const cpp_token *token;
041c3194
ZW
120 const char *string;
121{
bfb9dc7f 122 if (token->type != CPP_NAME)
041c3194 123 return 0;
bfb9dc7f 124
a28c5035 125 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
15dad1d9 126}
1368ee70 127
0d9f234d
NB
128/* Call when meeting a newline. Returns the character after the newline
129 (or carriage-return newline combination), or EOF. */
130static cppchar_t
1444f2ed
NB
131handle_newline (pfile, newline_char)
132 cpp_reader *pfile;
0d9f234d
NB
133 cppchar_t newline_char;
134{
1444f2ed 135 cpp_buffer *buffer;
0d9f234d
NB
136 cppchar_t next = EOF;
137
1444f2ed 138 pfile->line++;
1444f2ed 139 buffer = pfile->buffer;
0d9f234d 140 buffer->col_adjust = 0;
0d9f234d
NB
141 buffer->line_base = buffer->cur;
142
143 /* Handle CR-LF and LF-CR combinations, get the next character. */
144 if (buffer->cur < buffer->rlimit)
145 {
146 next = *buffer->cur++;
147 if (next + newline_char == '\r' + '\n')
148 {
149 buffer->line_base = buffer->cur;
150 if (buffer->cur < buffer->rlimit)
151 next = *buffer->cur++;
152 else
153 next = EOF;
154 }
155 }
156
157 buffer->read_ahead = next;
158 return next;
159}
160
161/* Subroutine of skip_escaped_newlines; called when a trigraph is
162 encountered. It warns if necessary, and returns true if the
163 trigraph should be honoured. FROM_CHAR is the third character of a
164 trigraph, and presumed to be the previous character for position
165 reporting. */
45b966db 166static int
0d9f234d 167trigraph_ok (pfile, from_char)
45b966db 168 cpp_reader *pfile;
0d9f234d 169 cppchar_t from_char;
45b966db 170{
041c3194
ZW
171 int accept = CPP_OPTION (pfile, trigraphs);
172
cbcff6df
NB
173 /* Don't warn about trigraphs in comments. */
174 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 175 {
0d9f234d 176 cpp_buffer *buffer = pfile->buffer;
67821e3a 177
041c3194 178 if (accept)
67821e3a 179 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
041c3194 180 "trigraph ??%c converted to %c",
0d9f234d
NB
181 (int) from_char,
182 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
183 else if (buffer->cur != buffer->last_Wtrigraphs)
184 {
185 buffer->last_Wtrigraphs = buffer->cur;
67821e3a 186 cpp_warning_with_line (pfile, pfile->line,
4a5b68a2
NB
187 CPP_BUF_COL (buffer) - 2,
188 "trigraph ??%c ignored", (int) from_char);
189 }
45b966db 190 }
0d9f234d 191
041c3194 192 return accept;
45b966db
ZW
193}
194
0d9f234d
NB
195/* Assumes local variables buffer and result. */
196#define ACCEPT_CHAR(t) \
197 do { result->type = t; buffer->read_ahead = EOF; } while (0)
198
199/* When we move to multibyte character sets, add to these something
200 that saves and restores the state of the multibyte conversion
201 library. This probably involves saving and restoring a "cookie".
202 In the case of glibc it is an 8-byte structure, so is not a high
203 overhead operation. In any case, it's out of the fast path. */
204#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
205#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
206
207/* Skips any escaped newlines introduced by NEXT, which is either a
208 '?' or a '\\'. Returns the next character, which will also have
a5c3cccd
NB
209 been placed in buffer->read_ahead. This routine performs
210 preprocessing stages 1 and 2 of the ISO C standard. */
0d9f234d 211static cppchar_t
29401c30
NB
212skip_escaped_newlines (pfile, next)
213 cpp_reader *pfile;
0d9f234d 214 cppchar_t next;
45b966db 215{
29401c30
NB
216 cpp_buffer *buffer = pfile->buffer;
217
a5c3cccd
NB
218 /* Only do this if we apply stages 1 and 2. */
219 if (!buffer->from_stage3)
041c3194 220 {
a5c3cccd
NB
221 cppchar_t next1;
222 const unsigned char *saved_cur;
223 int space;
224
225 do
0d9f234d 226 {
a5c3cccd
NB
227 if (buffer->cur == buffer->rlimit)
228 break;
229
230 SAVE_STATE ();
231 if (next == '?')
0d9f234d 232 {
a5c3cccd
NB
233 next1 = *buffer->cur++;
234 if (next1 != '?' || buffer->cur == buffer->rlimit)
235 {
236 RESTORE_STATE ();
237 break;
238 }
239
240 next1 = *buffer->cur++;
241 if (!_cpp_trigraph_map[next1]
29401c30 242 || !trigraph_ok (pfile, next1))
a5c3cccd
NB
243 {
244 RESTORE_STATE ();
245 break;
246 }
247
248 /* We have a full trigraph here. */
249 next = _cpp_trigraph_map[next1];
250 if (next != '\\' || buffer->cur == buffer->rlimit)
251 break;
252 SAVE_STATE ();
253 }
254
255 /* We have a backslash, and room for at least one more character. */
256 space = 0;
257 do
258 {
259 next1 = *buffer->cur++;
260 if (!is_nvspace (next1))
261 break;
262 space = 1;
0d9f234d 263 }
a5c3cccd 264 while (buffer->cur < buffer->rlimit);
041c3194 265
a5c3cccd 266 if (!is_vspace (next1))
0d9f234d
NB
267 {
268 RESTORE_STATE ();
269 break;
270 }
45b966db 271
29401c30
NB
272 if (space && !pfile->state.lexing_comment)
273 cpp_warning (pfile, "backslash and newline separated by space");
0d9f234d 274
29401c30 275 next = handle_newline (pfile, next1);
a5c3cccd 276 if (next == EOF)
29401c30 277 cpp_pedwarn (pfile, "backslash-newline at end of file");
0d9f234d 278 }
a5c3cccd 279 while (next == '\\' || next == '?');
041c3194 280 }
45b966db 281
0d9f234d
NB
282 buffer->read_ahead = next;
283 return next;
45b966db
ZW
284}
285
0d9f234d
NB
286/* Obtain the next character, after trigraph conversion and skipping
287 an arbitrary string of escaped newlines. The common case of no
288 trigraphs or escaped newlines falls through quickly. */
289static cppchar_t
29401c30
NB
290get_effective_char (pfile)
291 cpp_reader *pfile;
64aaf407 292{
29401c30 293 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
294 cppchar_t next = EOF;
295
296 if (buffer->cur < buffer->rlimit)
297 {
298 next = *buffer->cur++;
299
300 /* '?' can introduce trigraphs (and therefore backslash); '\\'
301 can introduce escaped newlines, which we want to skip, or
302 UCNs, which, depending upon lexer state, we will handle in
303 the future. */
304 if (next == '?' || next == '\\')
29401c30 305 next = skip_escaped_newlines (pfile, next);
0d9f234d
NB
306 }
307
308 buffer->read_ahead = next;
309 return next;
64aaf407
NB
310}
311
0d9f234d
NB
312/* Skip a C-style block comment. We find the end of the comment by
313 seeing if an asterisk is before every '/' we encounter. Returns
314 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
315static int
316skip_block_comment (pfile)
45b966db
ZW
317 cpp_reader *pfile;
318{
041c3194 319 cpp_buffer *buffer = pfile->buffer;
d8090680 320 cppchar_t c = EOF, prevc = EOF;
0d9f234d 321
cbcff6df 322 pfile->state.lexing_comment = 1;
0d9f234d 323 while (buffer->cur != buffer->rlimit)
45b966db 324 {
0d9f234d
NB
325 prevc = c, c = *buffer->cur++;
326
327 next_char:
328 /* FIXME: For speed, create a new character class of characters
93c80368 329 of interest inside block comments. */
0d9f234d 330 if (c == '?' || c == '\\')
29401c30 331 c = skip_escaped_newlines (pfile, c);
041c3194 332
0d9f234d
NB
333 /* People like decorating comments with '*', so check for '/'
334 instead for efficiency. */
041c3194 335 if (c == '/')
45b966db 336 {
0d9f234d
NB
337 if (prevc == '*')
338 break;
041c3194 339
0d9f234d
NB
340 /* Warn about potential nested comments, but not if the '/'
341 comes immediately before the true comment delimeter.
041c3194 342 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
343 if (CPP_OPTION (pfile, warn_comments)
344 && buffer->cur != buffer->rlimit)
45b966db 345 {
0d9f234d
NB
346 prevc = c, c = *buffer->cur++;
347 if (c == '*' && buffer->cur != buffer->rlimit)
348 {
349 prevc = c, c = *buffer->cur++;
350 if (c != '/')
67821e3a
NB
351 cpp_warning_with_line (pfile, pfile->line,
352 CPP_BUF_COL (buffer) - 2,
0d9f234d
NB
353 "\"/*\" within comment");
354 }
355 goto next_char;
45b966db 356 }
45b966db 357 }
91fcd158 358 else if (is_vspace (c))
45b966db 359 {
1444f2ed 360 prevc = c, c = handle_newline (pfile, c);
0d9f234d 361 goto next_char;
45b966db 362 }
52fadca8 363 else if (c == '\t')
0d9f234d 364 adjust_column (pfile);
45b966db 365 }
041c3194 366
cbcff6df 367 pfile->state.lexing_comment = 0;
0d9f234d
NB
368 buffer->read_ahead = EOF;
369 return c != '/' || prevc != '*';
45b966db
ZW
370}
371
f9a0e96c 372/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
373 non-zero if a multiline comment. The following new line, if any,
374 is left in buffer->read_ahead. */
041c3194 375static int
cbcff6df
NB
376skip_line_comment (pfile)
377 cpp_reader *pfile;
45b966db 378{
cbcff6df 379 cpp_buffer *buffer = pfile->buffer;
67821e3a 380 unsigned int orig_line = pfile->line;
0d9f234d 381 cppchar_t c;
041c3194 382
cbcff6df 383 pfile->state.lexing_comment = 1;
0d9f234d 384 do
041c3194 385 {
0d9f234d
NB
386 c = EOF;
387 if (buffer->cur == buffer->rlimit)
388 break;
041c3194 389
0d9f234d
NB
390 c = *buffer->cur++;
391 if (c == '?' || c == '\\')
29401c30 392 c = skip_escaped_newlines (pfile, c);
041c3194 393 }
0d9f234d 394 while (!is_vspace (c));
45b966db 395
cbcff6df 396 pfile->state.lexing_comment = 0;
0d9f234d 397 buffer->read_ahead = c; /* Leave any newline for caller. */
67821e3a 398 return orig_line != pfile->line;
041c3194 399}
45b966db 400
0d9f234d
NB
401/* pfile->buffer->cur is one beyond the \t character. Update
402 col_adjust so we track the column correctly. */
52fadca8 403static void
0d9f234d 404adjust_column (pfile)
52fadca8 405 cpp_reader *pfile;
52fadca8 406{
0d9f234d
NB
407 cpp_buffer *buffer = pfile->buffer;
408 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
409
410 /* Round it up to multiple of the tabstop, but subtract 1 since the
411 tab itself occupies a character position. */
0d9f234d
NB
412 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
413 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
414}
415
0d9f234d
NB
416/* Skips whitespace, saving the next non-whitespace character.
417 Adjusts pfile->col_adjust to account for tabs. Without this,
418 tokens might be assigned an incorrect column. */
041c3194 419static void
0d9f234d 420skip_whitespace (pfile, c)
041c3194 421 cpp_reader *pfile;
0d9f234d 422 cppchar_t c;
041c3194
ZW
423{
424 cpp_buffer *buffer = pfile->buffer;
0d9f234d 425 unsigned int warned = 0;
45b966db 426
0d9f234d 427 do
041c3194 428 {
91fcd158
NB
429 /* Horizontal space always OK. */
430 if (c == ' ')
0d9f234d 431 ;
91fcd158 432 else if (c == '\t')
0d9f234d
NB
433 adjust_column (pfile);
434 /* Just \f \v or \0 left. */
91fcd158 435 else if (c == '\0')
041c3194 436 {
91fcd158 437 if (!warned)
0d9f234d
NB
438 {
439 cpp_warning (pfile, "null character(s) ignored");
440 warned = 1;
441 }
45b966db 442 }
93c80368 443 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
67821e3a 444 cpp_pedwarn_with_line (pfile, pfile->line,
91fcd158
NB
445 CPP_BUF_COL (buffer),
446 "%s in preprocessing directive",
447 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
448
449 c = EOF;
450 if (buffer->cur == buffer->rlimit)
451 break;
452 c = *buffer->cur++;
45b966db 453 }
ec5c56db 454 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
455 while (is_nvspace (c));
456
457 /* Remember the next character. */
458 buffer->read_ahead = c;
041c3194 459}
45b966db 460
93c80368
NB
461/* See if the characters of a number token are valid in a name (no
462 '.', '+' or '-'). */
463static int
464name_p (pfile, string)
465 cpp_reader *pfile;
466 const cpp_string *string;
467{
468 unsigned int i;
469
470 for (i = 0; i < string->len; i++)
471 if (!is_idchar (string->text[i]))
472 return 0;
473
474 return 1;
475}
476
2c3fcba6
ZW
477/* Parse an identifier, skipping embedded backslash-newlines. This is
478 a critical inner loop. The common case is an identifier which has
479 not been split by backslash-newline, does not contain a dollar
480 sign, and has already been scanned (roughly 10:1 ratio of
481 seen:unseen identifiers in normal code; the distribution is
482 Poisson-like). Second most common case is a new identifier, not
483 split and no dollar sign. The other possibilities are rare and
484 have been relegated to parse_identifier_slow. */
0d9f234d
NB
485
486static cpp_hashnode *
2c3fcba6 487parse_identifier (pfile)
45b966db 488 cpp_reader *pfile;
45b966db 489{
93c80368 490 cpp_hashnode *result;
2c3fcba6
ZW
491 const U_CHAR *cur, *rlimit;
492
493 /* Fast-path loop. Skim over a normal identifier.
494 N.B. ISIDNUM does not include $. */
495 cur = pfile->buffer->cur - 1;
496 rlimit = pfile->buffer->rlimit;
497 do
498 cur++;
499 while (cur < rlimit && ISIDNUM (*cur));
500
501 /* Check for slow-path cases. */
502 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
503 result = parse_identifier_slow (pfile, cur);
504 else
505 {
506 const U_CHAR *base = pfile->buffer->cur - 1;
507 result = (cpp_hashnode *)
508 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
509 pfile->buffer->cur = cur;
510 }
511
512 /* Rarely, identifiers require diagnostics when lexed.
513 XXX Has to be forced out of the fast path. */
514 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
515 && !pfile->state.skipping, 0))
516 {
517 /* It is allowed to poison the same identifier twice. */
518 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
519 cpp_error (pfile, "attempt to use poisoned \"%s\"",
520 NODE_NAME (result));
521
522 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
523 replacement list of a variadic macro. */
524 if (result == pfile->spec_nodes.n__VA_ARGS__
525 && !pfile->state.va_args_ok)
526 cpp_pedwarn (pfile,
527 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
528 }
529
530 return result;
531}
532
533/* Slow path. This handles identifiers which have been split, and
534 identifiers which contain dollar signs. The part of the identifier
535 from PFILE->buffer->cur-1 to CUR has already been scanned. */
536static cpp_hashnode *
537parse_identifier_slow (pfile, cur)
538 cpp_reader *pfile;
539 const U_CHAR *cur;
540{
0d9f234d 541 cpp_buffer *buffer = pfile->buffer;
2c3fcba6 542 const U_CHAR *base = buffer->cur - 1;
2a967f3d 543 struct obstack *stack = &pfile->hash_table->stack;
2c3fcba6
ZW
544 unsigned int c, saw_dollar = 0, len;
545
546 /* Copy the part of the token which is known to be okay. */
547 obstack_grow (stack, base, cur - base);
041c3194 548
2c3fcba6
ZW
549 /* Now process the part which isn't. We are looking at one of
550 '$', '\\', or '?' on entry to this loop. */
551 c = *cur++;
552 buffer->cur = cur;
0d9f234d 553 do
041c3194 554 {
2c3fcba6
ZW
555 while (is_idchar (c))
556 {
557 obstack_1grow (stack, c);
45b966db 558
2c3fcba6
ZW
559 if (c == '$')
560 saw_dollar++;
ba89d661 561
2c3fcba6
ZW
562 c = EOF;
563 if (buffer->cur == buffer->rlimit)
564 break;
ba89d661 565
2c3fcba6
ZW
566 c = *buffer->cur++;
567 }
ba89d661 568
0d9f234d
NB
569 /* Potential escaped newline? */
570 if (c != '?' && c != '\\')
2c3fcba6 571 break;
29401c30 572 c = skip_escaped_newlines (pfile, c);
041c3194 573 }
0d9f234d
NB
574 while (is_idchar (c));
575
93c80368
NB
576 /* Remember the next character. */
577 buffer->read_ahead = c;
578
0d9f234d
NB
579 /* $ is not a identifier character in the standard, but is commonly
580 accepted as an extension. Don't warn about it in skipped
581 conditional blocks. */
cef0d199 582 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
0d9f234d
NB
583 cpp_pedwarn (pfile, "'$' character(s) in identifier");
584
93c80368 585 /* Identifiers are null-terminated. */
2a967f3d
NB
586 len = obstack_object_size (stack);
587 obstack_1grow (stack, '\0');
93c80368 588
2c3fcba6 589 return (cpp_hashnode *)
2a967f3d 590 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
45b966db
ZW
591}
592
0d9f234d 593/* Parse a number, skipping embedded backslash-newlines. */
45b966db 594static void
93c80368 595parse_number (pfile, number, c, leading_period)
45b966db 596 cpp_reader *pfile;
0d9f234d
NB
597 cpp_string *number;
598 cppchar_t c;
93c80368 599 int leading_period;
45b966db 600{
041c3194 601 cpp_buffer *buffer = pfile->buffer;
93c80368 602 unsigned char *dest, *limit;
45b966db 603
ece54d54
NB
604 dest = BUFF_FRONT (pfile->u_buff);
605 limit = BUFF_LIMIT (pfile->u_buff);
cbcff6df 606
93c80368
NB
607 /* Place a leading period. */
608 if (leading_period)
609 {
ece54d54
NB
610 if (dest == limit)
611 {
612 pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 1);
613 dest = BUFF_FRONT (pfile->u_buff);
614 limit = BUFF_LIMIT (pfile->u_buff);
615 }
93c80368
NB
616 *dest++ = '.';
617 }
618
0d9f234d 619 do
041c3194 620 {
0d9f234d
NB
621 do
622 {
93c80368 623 /* Need room for terminating null. */
ece54d54
NB
624 if ((size_t) (limit - dest) < 2)
625 {
626 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
627 pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 2);
628 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
629 limit = BUFF_LIMIT (pfile->u_buff);
630 }
93c80368 631 *dest++ = c;
0d9f234d 632
0d9f234d
NB
633 c = EOF;
634 if (buffer->cur == buffer->rlimit)
635 break;
45b966db 636
0d9f234d
NB
637 c = *buffer->cur++;
638 }
93c80368 639 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 640
0d9f234d
NB
641 /* Potential escaped newline? */
642 if (c != '?' && c != '\\')
643 break;
29401c30 644 c = skip_escaped_newlines (pfile, c);
45b966db 645 }
93c80368 646 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 647
0d9f234d
NB
648 /* Remember the next character. */
649 buffer->read_ahead = c;
64aaf407 650
93c80368
NB
651 /* Null-terminate the number. */
652 *dest = '\0';
653
ece54d54 654 number->text = BUFF_FRONT (pfile->u_buff);
93c80368 655 number->len = dest - number->text;
ece54d54 656 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d
NB
657}
658
659/* Subroutine of parse_string. Emits error for unterminated strings. */
660static void
93c80368 661unterminated (pfile, term)
0d9f234d 662 cpp_reader *pfile;
0d9f234d
NB
663 int term;
664{
665 cpp_error (pfile, "missing terminating %c character", term);
666
50410426 667 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
041c3194 668 {
50410426 669 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
0d9f234d 670 "possible start of unterminated string literal");
50410426 671 pfile->mls_line = 0;
041c3194 672 }
45b966db
ZW
673}
674
93c80368
NB
675/* Subroutine of parse_string. */
676static int
677unescaped_terminator_p (pfile, dest)
678 cpp_reader *pfile;
679 const unsigned char *dest;
680{
681 const unsigned char *start, *temp;
682
683 /* In #include-style directives, terminators are not escapeable. */
684 if (pfile->state.angled_headers)
685 return 1;
686
ece54d54 687 start = BUFF_FRONT (pfile->u_buff);
93c80368
NB
688
689 /* An odd number of consecutive backslashes represents an escaped
690 terminator. */
691 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
692 ;
693
694 return ((dest - temp) & 1) == 0;
695}
696
0d9f234d 697/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
698 name. Handles embedded trigraphs and escaped newlines. The stored
699 string is guaranteed NUL-terminated, but it is not guaranteed that
700 this is the first NUL since embedded NULs are preserved.
45b966db 701
7868b4a2 702 Multi-line strings are allowed, but they are deprecated. */
041c3194 703static void
0d9f234d 704parse_string (pfile, token, terminator)
45b966db 705 cpp_reader *pfile;
041c3194 706 cpp_token *token;
0d9f234d 707 cppchar_t terminator;
45b966db 708{
041c3194 709 cpp_buffer *buffer = pfile->buffer;
93c80368 710 unsigned char *dest, *limit;
0d9f234d 711 cppchar_t c;
d82fc108 712 bool warned_nulls = false, warned_multi = false;
0d9f234d 713
ece54d54
NB
714 dest = BUFF_FRONT (pfile->u_buff);
715 limit = BUFF_LIMIT (pfile->u_buff);
93c80368 716
0d9f234d 717 for (;;)
45b966db 718 {
0d9f234d 719 if (buffer->cur == buffer->rlimit)
7868b4a2
NB
720 c = EOF;
721 else
722 c = *buffer->cur++;
723
724 have_char:
725 /* We need space for the terminating NUL. */
ece54d54
NB
726 if ((size_t) (limit - dest) < 1)
727 {
728 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
729 pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 2);
730 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
731 limit = BUFF_LIMIT (pfile->u_buff);
732 }
7868b4a2
NB
733
734 if (c == EOF)
0d9f234d 735 {
93c80368 736 unterminated (pfile, terminator);
0d9f234d
NB
737 break;
738 }
0d9f234d 739
0d9f234d
NB
740 /* Handle trigraphs, escaped newlines etc. */
741 if (c == '?' || c == '\\')
29401c30 742 c = skip_escaped_newlines (pfile, c);
45b966db 743
93c80368 744 if (c == terminator && unescaped_terminator_p (pfile, dest))
45b966db 745 {
93c80368
NB
746 c = EOF;
747 break;
0d9f234d
NB
748 }
749 else if (is_vspace (c))
750 {
751 /* In assembly language, silently terminate string and
752 character literals at end of line. This is a kludge
753 around not knowing where comments are. */
bdb05a7b 754 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
0d9f234d 755 break;
45b966db 756
0d9f234d
NB
757 /* Character constants and header names may not extend over
758 multiple lines. In Standard C, neither may strings.
759 Unfortunately, we accept multiline strings as an
16eb2788
NB
760 extension, except in #include family directives. */
761 if (terminator != '"' || pfile->state.angled_headers)
45b966db 762 {
93c80368 763 unterminated (pfile, terminator);
0d9f234d 764 break;
45b966db 765 }
45b966db 766
d82fc108
NB
767 if (!warned_multi)
768 {
769 warned_multi = true;
770 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
771 }
772
50410426
NB
773 if (pfile->mls_line == 0)
774 {
775 pfile->mls_line = token->line;
776 pfile->mls_col = token->col;
777 }
0d9f234d 778
1444f2ed 779 c = handle_newline (pfile, c);
7868b4a2
NB
780 *dest++ = '\n';
781 goto have_char;
0d9f234d 782 }
d82fc108 783 else if (c == '\0' && !warned_nulls)
0d9f234d 784 {
d82fc108
NB
785 warned_nulls = true;
786 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 787 }
45b966db 788
93c80368 789 *dest++ = c;
45b966db
ZW
790 }
791
93c80368 792 /* Remember the next character. */
0d9f234d 793 buffer->read_ahead = c;
7868b4a2 794 *dest = '\0';
45b966db 795
ece54d54
NB
796 token->val.str.text = BUFF_FRONT (pfile->u_buff);
797 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
798 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d 799}
041c3194 800
93c80368 801/* The stored comment includes the comment start and any terminator. */
9e62c811 802static void
0d9f234d
NB
803save_comment (pfile, token, from)
804 cpp_reader *pfile;
041c3194
ZW
805 cpp_token *token;
806 const unsigned char *from;
9e62c811 807{
041c3194 808 unsigned char *buffer;
0d9f234d 809 unsigned int len;
0d9f234d 810
1c6d33ef 811 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3542203b
NB
812 /* C++ comments probably (not definitely) have moved past a new
813 line, which we don't want to save in the comment. */
814 if (pfile->buffer->read_ahead != EOF)
815 len--;
ece54d54 816 buffer = _cpp_unaligned_alloc (pfile, len);
041c3194 817
041c3194 818 token->type = CPP_COMMENT;
bfb9dc7f 819 token->val.str.len = len;
0d9f234d 820 token->val.str.text = buffer;
45b966db 821
1c6d33ef
NB
822 buffer[0] = '/';
823 memcpy (buffer + 1, from, len - 1);
0d9f234d 824}
45b966db 825
14baae01 826/* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
cbcff6df 827 want to avoid stepping back when lexing %:%X. */
0d9f234d 828static void
29401c30
NB
829lex_percent (pfile, result)
830 cpp_reader *pfile;
0d9f234d 831 cpp_token *result;
0d9f234d 832{
29401c30 833 cpp_buffer *buffer= pfile->buffer;
cbcff6df
NB
834 cppchar_t c;
835
836 result->type = CPP_MOD;
837 /* Parsing %:%X could leave an extra character. */
838 if (buffer->extra_char == EOF)
29401c30 839 c = get_effective_char (pfile);
cbcff6df
NB
840 else
841 {
842 c = buffer->read_ahead = buffer->extra_char;
843 buffer->extra_char = EOF;
844 }
845
846 if (c == '=')
847 ACCEPT_CHAR (CPP_MOD_EQ);
29401c30 848 else if (CPP_OPTION (pfile, digraphs))
cbcff6df
NB
849 {
850 if (c == ':')
851 {
852 result->flags |= DIGRAPH;
853 ACCEPT_CHAR (CPP_HASH);
29401c30 854 if (get_effective_char (pfile) == '%')
cbcff6df 855 {
29401c30 856 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
857 if (buffer->extra_char == ':')
858 {
859 buffer->extra_char = EOF;
860 ACCEPT_CHAR (CPP_PASTE);
861 }
862 else
863 /* We'll catch the extra_char when we're called back. */
864 buffer->read_ahead = '%';
865 }
866 }
867 else if (c == '>')
868 {
869 result->flags |= DIGRAPH;
870 ACCEPT_CHAR (CPP_CLOSE_BRACE);
871 }
872 }
873}
874
14baae01 875/* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
cbcff6df
NB
876 want to avoid stepping back when lexing '...' or '.123'. In the
877 latter case we should also set a flag for parse_number. */
878static void
879lex_dot (pfile, result)
880 cpp_reader *pfile;
881 cpp_token *result;
882{
883 cpp_buffer *buffer = pfile->buffer;
884 cppchar_t c;
885
886 /* Parsing ..X could leave an extra character. */
887 if (buffer->extra_char == EOF)
29401c30 888 c = get_effective_char (pfile);
cbcff6df
NB
889 else
890 {
891 c = buffer->read_ahead = buffer->extra_char;
892 buffer->extra_char = EOF;
893 }
0d9f234d 894
cbcff6df
NB
895 /* All known character sets have 0...9 contiguous. */
896 if (c >= '0' && c <= '9')
897 {
898 result->type = CPP_NUMBER;
93c80368 899 parse_number (pfile, &result->val.str, c, 1);
cbcff6df 900 }
041c3194 901 else
ea4a453b 902 {
cbcff6df
NB
903 result->type = CPP_DOT;
904 if (c == '.')
905 {
29401c30 906 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
907 if (buffer->extra_char == '.')
908 {
909 buffer->extra_char = EOF;
910 ACCEPT_CHAR (CPP_ELLIPSIS);
911 }
912 else
913 /* We'll catch the extra_char when we're called back. */
914 buffer->read_ahead = '.';
915 }
916 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
917 ACCEPT_CHAR (CPP_DOT_STAR);
ea4a453b 918 }
45b966db
ZW
919}
920
5fddcffc
NB
921/* Allocate COUNT tokens for RUN. */
922void
923_cpp_init_tokenrun (run, count)
924 tokenrun *run;
925 unsigned int count;
926{
927 run->base = xnewvec (cpp_token, count);
928 run->limit = run->base + count;
929 run->next = NULL;
930}
931
932/* Returns the next tokenrun, or creates one if there is none. */
933static tokenrun *
934next_tokenrun (run)
935 tokenrun *run;
936{
937 if (run->next == NULL)
938 {
939 run->next = xnew (tokenrun);
bdcbe496 940 run->next->prev = run;
5fddcffc
NB
941 _cpp_init_tokenrun (run->next, 250);
942 }
943
944 return run->next;
945}
946
4ed5bcfb
NB
947/* Allocate a single token that is invalidated at the same time as the
948 rest of the tokens on the line. Has its line and col set to the
949 same as the last lexed token, so that diagnostics appear in the
950 right place. */
951cpp_token *
952_cpp_temp_token (pfile)
953 cpp_reader *pfile;
954{
955 cpp_token *old, *result;
956
957 old = pfile->cur_token - 1;
958 if (pfile->cur_token == pfile->cur_run->limit)
959 {
960 pfile->cur_run = next_tokenrun (pfile->cur_run);
961 pfile->cur_token = pfile->cur_run->base;
962 }
963
964 result = pfile->cur_token++;
965 result->line = old->line;
966 result->col = old->col;
967 return result;
968}
969
14baae01
NB
970/* Lex a token into RESULT (external interface). Takes care of issues
971 like directive handling, token lookahead, multiple include
972 opimisation and skipping. */
345894b4
NB
973const cpp_token *
974_cpp_lex_token (pfile)
45b966db 975 cpp_reader *pfile;
5fddcffc 976{
bdcbe496 977 cpp_token *result;
5fddcffc 978
bdcbe496 979 for (;;)
5fddcffc 980 {
bdcbe496 981 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 982 {
bdcbe496
NB
983 pfile->cur_run = next_tokenrun (pfile->cur_run);
984 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
985 }
986
bdcbe496 987 if (pfile->lookaheads)
14baae01
NB
988 {
989 pfile->lookaheads--;
990 result = pfile->cur_token++;
991 }
bdcbe496 992 else
14baae01 993 result = _cpp_lex_direct (pfile);
bdcbe496
NB
994
995 if (result->flags & BOL)
5fddcffc 996 {
bdcbe496
NB
997 /* Is this a directive. If _cpp_handle_directive returns
998 false, it is an assembler #. */
999 if (result->type == CPP_HASH
1000 && !pfile->state.parsing_args
1001 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1002 continue;
97293897
NB
1003 if (pfile->cb.line_change && !pfile->state.skipping)
1004 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 1005 }
5fddcffc 1006
bdcbe496
NB
1007 /* We don't skip tokens in directives. */
1008 if (pfile->state.in_directive)
1009 break;
5fddcffc 1010
bdcbe496 1011 /* Outside a directive, invalidate controlling macros. At file
14baae01 1012 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 1013 get here and MI optimisation works. */
5fddcffc 1014 pfile->mi_valid = false;
bdcbe496
NB
1015
1016 if (!pfile->state.skipping || result->type == CPP_EOF)
1017 break;
5fddcffc
NB
1018 }
1019
345894b4 1020 return result;
5fddcffc
NB
1021}
1022
14baae01
NB
1023/* Lex a token into pfile->cur_token, which is also incremented, to
1024 get diagnostics pointing to the correct location.
1025
1026 Does not handle issues such as token lookahead, multiple-include
1027 optimisation, directives, skipping etc. This function is only
1028 suitable for use by _cpp_lex_token, and in special cases like
1029 lex_expansion_token which doesn't care for any of these issues.
1030
1031 When meeting a newline, returns CPP_EOF if parsing a directive,
1032 otherwise returns to the start of the token buffer if permissible.
1033 Returns the location of the lexed token. */
1034cpp_token *
1035_cpp_lex_direct (pfile)
5fddcffc 1036 cpp_reader *pfile;
45b966db 1037{
0d9f234d 1038 cppchar_t c;
adb84b42 1039 cpp_buffer *buffer;
0d9f234d 1040 const unsigned char *comment_start;
14baae01 1041 cpp_token *result = pfile->cur_token++;
9ec7291f 1042
5fddcffc 1043 fresh_line:
adb84b42 1044 buffer = pfile->buffer;
bd969772
NB
1045 result->flags = buffer->saved_flags;
1046 buffer->saved_flags = 0;
5fddcffc 1047 update_tokens_line:
1444f2ed 1048 result->line = pfile->line;
041c3194 1049
5fddcffc 1050 skipped_white:
0d9f234d
NB
1051 c = buffer->read_ahead;
1052 if (c == EOF && buffer->cur < buffer->rlimit)
5fddcffc
NB
1053 c = *buffer->cur++;
1054 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
0d9f234d 1055 buffer->read_ahead = EOF;
5fddcffc
NB
1056
1057 trigraph:
0d9f234d 1058 switch (c)
45b966db 1059 {
0d9f234d 1060 case EOF:
bdcbe496 1061 buffer->saved_flags = BOL;
5fddcffc 1062 if (!pfile->state.parsing_args && !pfile->state.in_directive)
ef6e958a 1063 {
bdcbe496 1064 if (buffer->cur != buffer->line_base)
5fddcffc
NB
1065 {
1066 /* Non-empty files should end in a newline. Don't warn
1067 for command line and _Pragma buffers. */
1068 if (!buffer->from_stage3)
1069 cpp_pedwarn (pfile, "no newline at end of file");
1070 handle_newline (pfile, '\n');
7364fdd8 1071 }
bdcbe496
NB
1072
1073 /* Don't pop the last buffer. */
1074 if (buffer->prev)
1075 {
1076 unsigned char stop = buffer->return_at_eof;
1077
1078 _cpp_pop_buffer (pfile);
1079 if (!stop)
1080 goto fresh_line;
1081 }
ef6e958a 1082 }
0d9f234d 1083 result->type = CPP_EOF;
5fddcffc 1084 break;
45b966db 1085
0d9f234d
NB
1086 case ' ': case '\t': case '\f': case '\v': case '\0':
1087 skip_whitespace (pfile, c);
1088 result->flags |= PREV_WHITE;
5fddcffc 1089 goto skipped_white;
0d9f234d
NB
1090
1091 case '\n': case '\r':
bdcbe496
NB
1092 handle_newline (pfile, c);
1093 buffer->saved_flags = BOL;
1094 if (! pfile->state.in_directive)
45b966db 1095 {
4ed5bcfb
NB
1096 if (pfile->state.parsing_args == 2)
1097 buffer->saved_flags |= PREV_WHITE;
bdcbe496
NB
1098 if (!pfile->keep_tokens)
1099 {
1100 pfile->cur_run = &pfile->base_run;
1101 result = pfile->base_run.base;
1102 pfile->cur_token = result + 1;
1103 }
1104 goto fresh_line;
45b966db 1105 }
5fddcffc
NB
1106 result->type = CPP_EOF;
1107 break;
46d07497 1108
0d9f234d
NB
1109 case '?':
1110 case '\\':
1111 /* These could start an escaped newline, or '?' a trigraph. Let
1112 skip_escaped_newlines do all the work. */
1113 {
67821e3a 1114 unsigned int line = pfile->line;
0d9f234d 1115
29401c30 1116 c = skip_escaped_newlines (pfile, c);
67821e3a 1117 if (line != pfile->line)
0d9f234d
NB
1118 /* We had at least one escaped newline of some sort, and the
1119 next character is in buffer->read_ahead. Update the
1120 token's line and column. */
5fddcffc 1121 goto update_tokens_line;
0d9f234d
NB
1122
1123 /* We are either the original '?' or '\\', or a trigraph. */
1124 result->type = CPP_QUERY;
1125 buffer->read_ahead = EOF;
1126 if (c == '\\')
12c4f523 1127 goto random_char;
0d9f234d 1128 else if (c != '?')
5fddcffc 1129 goto trigraph;
0d9f234d
NB
1130 }
1131 break;
46d07497 1132
0d9f234d
NB
1133 case '0': case '1': case '2': case '3': case '4':
1134 case '5': case '6': case '7': case '8': case '9':
1135 result->type = CPP_NUMBER;
93c80368 1136 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 1137 break;
46d07497 1138
0d9f234d
NB
1139 case '$':
1140 if (!CPP_OPTION (pfile, dollars_in_ident))
1141 goto random_char;
ec5c56db 1142 /* Fall through... */
0d9f234d
NB
1143
1144 case '_':
1145 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1146 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1147 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1148 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1149 case 'y': case 'z':
1150 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1151 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1152 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1153 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1154 case 'Y': case 'Z':
1155 result->type = CPP_NAME;
2c3fcba6 1156 result->val.node = parse_identifier (pfile);
0d9f234d
NB
1157
1158 /* 'L' may introduce wide characters or strings. */
93c80368 1159 if (result->val.node == pfile->spec_nodes.n_L)
0d9f234d 1160 {
2c3fcba6
ZW
1161 c = buffer->read_ahead;
1162 if (c == EOF && buffer->cur < buffer->rlimit)
1163 c = *buffer->cur;
0d9f234d 1164 if (c == '\'' || c == '"')
ba89d661 1165 {
2c3fcba6 1166 buffer->cur++;
0d9f234d
NB
1167 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1168 goto make_string;
ba89d661 1169 }
0d9f234d
NB
1170 }
1171 /* Convert named operators to their proper types. */
93c80368 1172 else if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1173 {
1174 result->flags |= NAMED_OP;
93c80368 1175 result->type = result->val.node->value.operator;
0d9f234d
NB
1176 }
1177 break;
1178
1179 case '\'':
1180 case '"':
1181 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1182 make_string:
1183 parse_string (pfile, result, c);
1184 break;
041c3194 1185
0d9f234d 1186 case '/':
1c6d33ef
NB
1187 /* A potential block or line comment. */
1188 comment_start = buffer->cur;
0d9f234d 1189 result->type = CPP_DIV;
29401c30 1190 c = get_effective_char (pfile);
0d9f234d
NB
1191 if (c == '=')
1192 ACCEPT_CHAR (CPP_DIV_EQ);
1c6d33ef
NB
1193 if (c != '/' && c != '*')
1194 break;
e61fc951 1195
1c6d33ef
NB
1196 if (c == '*')
1197 {
0d9f234d 1198 if (skip_block_comment (pfile))
67821e3a 1199 cpp_error (pfile, "unterminated comment");
0d9f234d 1200 }
1c6d33ef 1201 else
0d9f234d 1202 {
1c6d33ef
NB
1203 if (!CPP_OPTION (pfile, cplusplus_comments)
1204 && !CPP_IN_SYSTEM_HEADER (pfile))
1205 break;
1206
bdb05a7b
NB
1207 /* Warn about comments only if pedantically GNUC89, and not
1208 in system headers. */
1209 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1210 && ! buffer->warned_cplusplus_comments)
041c3194 1211 {
1c6d33ef
NB
1212 cpp_pedwarn (pfile,
1213 "C++ style comments are not allowed in ISO C89");
1214 cpp_pedwarn (pfile,
1215 "(this will be reported only once per input file)");
1216 buffer->warned_cplusplus_comments = 1;
1217 }
0d9f234d 1218
a94c1199 1219 /* Skip_line_comment updates buffer->read_ahead. */
01ef6563 1220 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
50410426 1221 cpp_warning (pfile, "multi-line comment");
1c6d33ef 1222 }
0d9f234d 1223
1c6d33ef
NB
1224 /* Skipping the comment has updated buffer->read_ahead. */
1225 if (!pfile->state.save_comments)
1226 {
1227 result->flags |= PREV_WHITE;
5fddcffc 1228 goto update_tokens_line;
0d9f234d 1229 }
1c6d33ef
NB
1230
1231 /* Save the comment as a token in its own right. */
1232 save_comment (pfile, result, comment_start);
bdcbe496 1233 break;
0d9f234d
NB
1234
1235 case '<':
1236 if (pfile->state.angled_headers)
1237 {
1238 result->type = CPP_HEADER_NAME;
1239 c = '>'; /* terminator. */
1240 goto make_string;
1241 }
45b966db 1242
0d9f234d 1243 result->type = CPP_LESS;
29401c30 1244 c = get_effective_char (pfile);
0d9f234d
NB
1245 if (c == '=')
1246 ACCEPT_CHAR (CPP_LESS_EQ);
1247 else if (c == '<')
1248 {
1249 ACCEPT_CHAR (CPP_LSHIFT);
29401c30 1250 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1251 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1252 }
1253 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1254 {
1255 ACCEPT_CHAR (CPP_MIN);
29401c30 1256 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1257 ACCEPT_CHAR (CPP_MIN_EQ);
1258 }
1259 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1260 {
1261 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1262 result->flags |= DIGRAPH;
1263 }
1264 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1265 {
1266 ACCEPT_CHAR (CPP_OPEN_BRACE);
1267 result->flags |= DIGRAPH;
1268 }
1269 break;
1270
1271 case '>':
1272 result->type = CPP_GREATER;
29401c30 1273 c = get_effective_char (pfile);
0d9f234d
NB
1274 if (c == '=')
1275 ACCEPT_CHAR (CPP_GREATER_EQ);
1276 else if (c == '>')
1277 {
1278 ACCEPT_CHAR (CPP_RSHIFT);
29401c30 1279 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1280 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1281 }
1282 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1283 {
1284 ACCEPT_CHAR (CPP_MAX);
29401c30 1285 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1286 ACCEPT_CHAR (CPP_MAX_EQ);
1287 }
1288 break;
1289
cbcff6df 1290 case '%':
29401c30 1291 lex_percent (pfile, result);
0d9f234d
NB
1292 break;
1293
cbcff6df
NB
1294 case '.':
1295 lex_dot (pfile, result);
0d9f234d 1296 break;
45b966db 1297
0d9f234d
NB
1298 case '+':
1299 result->type = CPP_PLUS;
29401c30 1300 c = get_effective_char (pfile);
0d9f234d
NB
1301 if (c == '=')
1302 ACCEPT_CHAR (CPP_PLUS_EQ);
1303 else if (c == '+')
1304 ACCEPT_CHAR (CPP_PLUS_PLUS);
1305 break;
04e3ec78 1306
0d9f234d
NB
1307 case '-':
1308 result->type = CPP_MINUS;
29401c30 1309 c = get_effective_char (pfile);
0d9f234d
NB
1310 if (c == '>')
1311 {
1312 ACCEPT_CHAR (CPP_DEREF);
1313 if (CPP_OPTION (pfile, cplusplus)
29401c30 1314 && get_effective_char (pfile) == '*')
0d9f234d
NB
1315 ACCEPT_CHAR (CPP_DEREF_STAR);
1316 }
1317 else if (c == '=')
1318 ACCEPT_CHAR (CPP_MINUS_EQ);
1319 else if (c == '-')
1320 ACCEPT_CHAR (CPP_MINUS_MINUS);
1321 break;
45b966db 1322
0d9f234d
NB
1323 case '*':
1324 result->type = CPP_MULT;
29401c30 1325 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1326 ACCEPT_CHAR (CPP_MULT_EQ);
1327 break;
04e3ec78 1328
0d9f234d
NB
1329 case '=':
1330 result->type = CPP_EQ;
29401c30 1331 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1332 ACCEPT_CHAR (CPP_EQ_EQ);
1333 break;
f8f769ea 1334
0d9f234d
NB
1335 case '!':
1336 result->type = CPP_NOT;
29401c30 1337 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1338 ACCEPT_CHAR (CPP_NOT_EQ);
1339 break;
45b966db 1340
0d9f234d
NB
1341 case '&':
1342 result->type = CPP_AND;
29401c30 1343 c = get_effective_char (pfile);
0d9f234d
NB
1344 if (c == '=')
1345 ACCEPT_CHAR (CPP_AND_EQ);
1346 else if (c == '&')
1347 ACCEPT_CHAR (CPP_AND_AND);
1348 break;
1349
1350 case '#':
a949941c 1351 result->type = CPP_HASH;
5fddcffc
NB
1352 if (get_effective_char (pfile) == '#')
1353 ACCEPT_CHAR (CPP_PASTE);
0d9f234d 1354 break;
45b966db 1355
0d9f234d
NB
1356 case '|':
1357 result->type = CPP_OR;
29401c30 1358 c = get_effective_char (pfile);
0d9f234d
NB
1359 if (c == '=')
1360 ACCEPT_CHAR (CPP_OR_EQ);
1361 else if (c == '|')
1362 ACCEPT_CHAR (CPP_OR_OR);
1363 break;
45b966db 1364
0d9f234d
NB
1365 case '^':
1366 result->type = CPP_XOR;
29401c30 1367 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1368 ACCEPT_CHAR (CPP_XOR_EQ);
1369 break;
45b966db 1370
0d9f234d
NB
1371 case ':':
1372 result->type = CPP_COLON;
29401c30 1373 c = get_effective_char (pfile);
0d9f234d
NB
1374 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1375 ACCEPT_CHAR (CPP_SCOPE);
1376 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1377 {
1378 result->flags |= DIGRAPH;
1379 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1380 }
1381 break;
45b966db 1382
0d9f234d
NB
1383 case '~': result->type = CPP_COMPL; break;
1384 case ',': result->type = CPP_COMMA; break;
1385 case '(': result->type = CPP_OPEN_PAREN; break;
1386 case ')': result->type = CPP_CLOSE_PAREN; break;
1387 case '[': result->type = CPP_OPEN_SQUARE; break;
1388 case ']': result->type = CPP_CLOSE_SQUARE; break;
1389 case '{': result->type = CPP_OPEN_BRACE; break;
1390 case '}': result->type = CPP_CLOSE_BRACE; break;
1391 case ';': result->type = CPP_SEMICOLON; break;
1392
cc937581
ZW
1393 /* @ is a punctuator in Objective C. */
1394 case '@': result->type = CPP_ATSIGN; break;
0d9f234d
NB
1395
1396 random_char:
1397 default:
1398 result->type = CPP_OTHER;
6c53ebff 1399 result->val.c = c;
0d9f234d
NB
1400 break;
1401 }
bdcbe496
NB
1402
1403 return result;
0d9f234d
NB
1404}
1405
93c80368
NB
1406/* An upper bound on the number of bytes needed to spell a token,
1407 including preceding whitespace. */
1408unsigned int
1409cpp_token_len (token)
1410 const cpp_token *token;
0d9f234d 1411{
93c80368 1412 unsigned int len;
6d2c2047 1413
93c80368 1414 switch (TOKEN_SPELL (token))
041c3194 1415 {
a28c5035
NB
1416 default: len = 0; break;
1417 case SPELL_STRING: len = token->val.str.len; break;
1418 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1419 }
93c80368
NB
1420 /* 1 for whitespace, 4 for comment delimeters. */
1421 return len + 5;
6d2c2047
ZW
1422}
1423
041c3194 1424/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1425 already contain the enough space to hold the token's spelling.
1426 Returns a pointer to the character after the last character
1427 written. */
93c80368
NB
1428unsigned char *
1429cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1430 cpp_reader *pfile; /* Would be nice to be rid of this... */
1431 const cpp_token *token;
1432 unsigned char *buffer;
1433{
96be6998 1434 switch (TOKEN_SPELL (token))
041c3194
ZW
1435 {
1436 case SPELL_OPERATOR:
1437 {
1438 const unsigned char *spelling;
1439 unsigned char c;
d6d5f795 1440
041c3194 1441 if (token->flags & DIGRAPH)
37b8524c
JDA
1442 spelling
1443 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1444 else if (token->flags & NAMED_OP)
1445 goto spell_ident;
041c3194 1446 else
96be6998 1447 spelling = TOKEN_NAME (token);
041c3194
ZW
1448
1449 while ((c = *spelling++) != '\0')
1450 *buffer++ = c;
1451 }
1452 break;
d6d5f795 1453
041c3194 1454 case SPELL_IDENT:
92936ecf 1455 spell_ident:
a28c5035
NB
1456 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1457 buffer += NODE_LEN (token->val.node);
041c3194 1458 break;
d6d5f795 1459
041c3194
ZW
1460 case SPELL_STRING:
1461 {
ba89d661
ZW
1462 int left, right, tag;
1463 switch (token->type)
1464 {
1465 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1466 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1467 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1468 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1469 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1470 default: left = '\0'; right = '\0'; tag = '\0'; break;
1471 }
1472 if (tag) *buffer++ = tag;
1473 if (left) *buffer++ = left;
bfb9dc7f
ZW
1474 memcpy (buffer, token->val.str.text, token->val.str.len);
1475 buffer += token->val.str.len;
ba89d661 1476 if (right) *buffer++ = right;
041c3194
ZW
1477 }
1478 break;
d6d5f795 1479
041c3194 1480 case SPELL_CHAR:
6c53ebff 1481 *buffer++ = token->val.c;
041c3194 1482 break;
d6d5f795 1483
041c3194 1484 case SPELL_NONE:
96be6998 1485 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1486 break;
1487 }
d6d5f795 1488
041c3194
ZW
1489 return buffer;
1490}
d6d5f795 1491
93c80368
NB
1492/* Returns a token as a null-terminated string. The string is
1493 temporary, and automatically freed later. Useful for diagnostics. */
1494unsigned char *
1495cpp_token_as_text (pfile, token)
c5a04734 1496 cpp_reader *pfile;
041c3194 1497 const cpp_token *token;
c5a04734 1498{
93c80368 1499 unsigned int len = cpp_token_len (token);
ece54d54 1500 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1501
93c80368
NB
1502 end = cpp_spell_token (pfile, token, start);
1503 end[0] = '\0';
c5a04734 1504
93c80368
NB
1505 return start;
1506}
c5a04734 1507
93c80368
NB
1508/* Used by C front ends. Should really move to using cpp_token_as_text. */
1509const char *
1510cpp_type2name (type)
1511 enum cpp_ttype type;
1512{
1513 return (const char *) token_spellings[type].name;
1514}
c5a04734 1515
4ed5bcfb
NB
1516/* Writes the spelling of token to FP, without any preceding space.
1517 Separated from cpp_spell_token for efficiency - to avoid stdio
1518 double-buffering. */
93c80368
NB
1519void
1520cpp_output_token (token, fp)
1521 const cpp_token *token;
1522 FILE *fp;
1523{
93c80368 1524 switch (TOKEN_SPELL (token))
c5a04734 1525 {
93c80368
NB
1526 case SPELL_OPERATOR:
1527 {
1528 const unsigned char *spelling;
3b681e9d 1529 int c;
c5a04734 1530
93c80368 1531 if (token->flags & DIGRAPH)
37b8524c
JDA
1532 spelling
1533 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1534 else if (token->flags & NAMED_OP)
1535 goto spell_ident;
1536 else
1537 spelling = TOKEN_NAME (token);
041c3194 1538
3b681e9d
ZW
1539 c = *spelling;
1540 do
1541 putc (c, fp);
1542 while ((c = *++spelling) != '\0');
93c80368
NB
1543 }
1544 break;
041c3194 1545
93c80368
NB
1546 spell_ident:
1547 case SPELL_IDENT:
3b681e9d 1548 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1549 break;
041c3194 1550
93c80368
NB
1551 case SPELL_STRING:
1552 {
1553 int left, right, tag;
1554 switch (token->type)
1555 {
1556 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1557 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1558 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1559 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1560 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1561 default: left = '\0'; right = '\0'; tag = '\0'; break;
1562 }
1563 if (tag) putc (tag, fp);
1564 if (left) putc (left, fp);
1565 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1566 if (right) putc (right, fp);
1567 }
1568 break;
c5a04734 1569
93c80368 1570 case SPELL_CHAR:
6c53ebff 1571 putc (token->val.c, fp);
93c80368 1572 break;
c5a04734 1573
93c80368
NB
1574 case SPELL_NONE:
1575 /* An error, most probably. */
1576 break;
041c3194 1577 }
c5a04734
ZW
1578}
1579
93c80368
NB
1580/* Compare two tokens. */
1581int
1582_cpp_equiv_tokens (a, b)
1583 const cpp_token *a, *b;
c5a04734 1584{
93c80368
NB
1585 if (a->type == b->type && a->flags == b->flags)
1586 switch (TOKEN_SPELL (a))
1587 {
1588 default: /* Keep compiler happy. */
1589 case SPELL_OPERATOR:
1590 return 1;
1591 case SPELL_CHAR:
6c53ebff 1592 return a->val.c == b->val.c; /* Character. */
93c80368 1593 case SPELL_NONE:
56051c0a 1594 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1595 case SPELL_IDENT:
1596 return a->val.node == b->val.node;
1597 case SPELL_STRING:
1598 return (a->val.str.len == b->val.str.len
1599 && !memcmp (a->val.str.text, b->val.str.text,
1600 a->val.str.len));
1601 }
c5a04734 1602
041c3194
ZW
1603 return 0;
1604}
1605
93c80368
NB
1606/* Returns nonzero if a space should be inserted to avoid an
1607 accidental token paste for output. For simplicity, it is
1608 conservative, and occasionally advises a space where one is not
1609 needed, e.g. "." and ".2". */
041c3194 1610
93c80368
NB
1611int
1612cpp_avoid_paste (pfile, token1, token2)
c5a04734 1613 cpp_reader *pfile;
93c80368 1614 const cpp_token *token1, *token2;
c5a04734 1615{
93c80368
NB
1616 enum cpp_ttype a = token1->type, b = token2->type;
1617 cppchar_t c;
c5a04734 1618
93c80368
NB
1619 if (token1->flags & NAMED_OP)
1620 a = CPP_NAME;
1621 if (token2->flags & NAMED_OP)
1622 b = CPP_NAME;
c5a04734 1623
93c80368
NB
1624 c = EOF;
1625 if (token2->flags & DIGRAPH)
37b8524c 1626 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1627 else if (token_spellings[b].category == SPELL_OPERATOR)
1628 c = token_spellings[b].name[0];
c5a04734 1629
93c80368 1630 /* Quickly get everything that can paste with an '='. */
37b8524c 1631 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1632 return 1;
c5a04734 1633
93c80368 1634 switch (a)
c5a04734 1635 {
93c80368
NB
1636 case CPP_GREATER: return c == '>' || c == '?';
1637 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1638 case CPP_PLUS: return c == '+';
1639 case CPP_MINUS: return c == '-' || c == '>';
1640 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1641 case CPP_MOD: return c == ':' || c == '>';
1642 case CPP_AND: return c == '&';
1643 case CPP_OR: return c == '|';
1644 case CPP_COLON: return c == ':' || c == '>';
1645 case CPP_DEREF: return c == '*';
26ec42ee 1646 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1647 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1648 case CPP_NAME: return ((b == CPP_NUMBER
1649 && name_p (pfile, &token2->val.str))
1650 || b == CPP_NAME
1651 || b == CPP_CHAR || b == CPP_STRING); /* L */
1652 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1653 || c == '.' || c == '+' || c == '-');
1654 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1655 && token1->val.c == '@'
93c80368
NB
1656 && (b == CPP_NAME || b == CPP_STRING));
1657 default: break;
c5a04734 1658 }
c5a04734 1659
417f3e3a 1660 return 0;
c5a04734
ZW
1661}
1662
93c80368 1663/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1664 character, to FP. Leading whitespace is removed. If there are
1665 macros, special token padding is not performed. */
c5a04734 1666void
93c80368 1667cpp_output_line (pfile, fp)
c5a04734 1668 cpp_reader *pfile;
93c80368 1669 FILE *fp;
c5a04734 1670{
4ed5bcfb 1671 const cpp_token *token;
96be6998 1672
4ed5bcfb
NB
1673 token = cpp_get_token (pfile);
1674 while (token->type != CPP_EOF)
96be6998 1675 {
4ed5bcfb
NB
1676 cpp_output_token (token, fp);
1677 token = cpp_get_token (pfile);
1678 if (token->flags & PREV_WHITE)
1679 putc (' ', fp);
96be6998
ZW
1680 }
1681
93c80368 1682 putc ('\n', fp);
041c3194 1683}
c5a04734 1684
c8a96070
NB
1685/* Returns the value of a hexadecimal digit. */
1686static unsigned int
1687hex_digit_value (c)
1688 unsigned int c;
1689{
1690 if (c >= 'a' && c <= 'f')
1691 return c - 'a' + 10;
1692 if (c >= 'A' && c <= 'F')
1693 return c - 'A' + 10;
1694 if (c >= '0' && c <= '9')
1695 return c - '0';
1696 abort ();
1697}
1698
62729350
NB
1699/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1700 failure if cpplib is not parsing C++ or C99. Such failure is
1701 silent, and no variables are updated. Otherwise returns 0, and
1702 warns if -Wtraditional.
c8a96070
NB
1703
1704 [lex.charset]: The character designated by the universal character
1705 name \UNNNNNNNN is that character whose character short name in
1706 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1707 universal character name \uNNNN is that character whose character
1708 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1709 for a universal character name is less than 0x20 or in the range
1710 0x7F-0x9F (inclusive), or if the universal character name
1711 designates a character in the basic source character set, then the
1712 program is ill-formed.
1713
1714 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1715 mapping. Is this ever wrong?
c8a96070 1716
62729350
NB
1717 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1718 LIMIT is the end of the string or charconst. PSTR is updated to
1719 point after the UCS on return, and the UCS is written into PC. */
1720
1721static int
1722maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1723 cpp_reader *pfile;
1724 const unsigned char **pstr;
1725 const unsigned char *limit;
62729350 1726 unsigned int *pc;
c8a96070
NB
1727{
1728 const unsigned char *p = *pstr;
62729350
NB
1729 unsigned int code = 0;
1730 unsigned int c = *pc, length;
1731
1732 /* Only attempt to interpret a UCS for C++ and C99. */
1733 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1734 return 1;
c8a96070 1735
62729350
NB
1736 if (CPP_WTRADITIONAL (pfile))
1737 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
c8a96070 1738
f8710242
NB
1739 length = (c == 'u' ? 4: 8);
1740
1741 if ((size_t) (limit - p) < length)
1742 {
1743 cpp_error (pfile, "incomplete universal-character-name");
1744 /* Skip to the end to avoid more diagnostics. */
1745 p = limit;
1746 }
1747 else
1748 {
1749 for (; length; length--, p++)
c8a96070 1750 {
f8710242
NB
1751 c = *p;
1752 if (ISXDIGIT (c))
1753 code = (code << 4) + hex_digit_value (c);
1754 else
1755 {
1756 cpp_error (pfile,
1757 "non-hex digit '%c' in universal-character-name", c);
1758 /* We shouldn't skip in case there are multibyte chars. */
1759 break;
1760 }
c8a96070 1761 }
c8a96070
NB
1762 }
1763
1764#ifdef TARGET_EBCDIC
1765 cpp_error (pfile, "universal-character-name on EBCDIC target");
1766 code = 0x3f; /* EBCDIC invalid character */
1767#else
f8710242
NB
1768 /* True extended characters are OK. */
1769 if (code >= 0xa0
1770 && !(code & 0x80000000)
1771 && !(code >= 0xD800 && code <= 0xDFFF))
1772 ;
1773 /* The standard permits $, @ and ` to be specified as UCNs. We use
1774 hex escapes so that this also works with EBCDIC hosts. */
1775 else if (code == 0x24 || code == 0x40 || code == 0x60)
1776 ;
1777 /* Don't give another error if one occurred above. */
1778 else if (length == 0)
1779 cpp_error (pfile, "universal-character-name out of range");
c8a96070
NB
1780#endif
1781
1782 *pstr = p;
62729350
NB
1783 *pc = code;
1784 return 0;
c8a96070
NB
1785}
1786
1787/* Interpret an escape sequence, and return its value. PSTR points to
1788 the input pointer, which is just after the backslash. LIMIT is how
62729350
NB
1789 much text we have. MASK is a bitmask for the precision for the
1790 destination type (char or wchar_t). TRADITIONAL, if true, does not
1791 interpret escapes that did not exist in traditional C.
c8a96070 1792
62729350
NB
1793 Handles all relevant diagnostics. */
1794
1795unsigned int
1796cpp_parse_escape (pfile, pstr, limit, mask, traditional)
c8a96070
NB
1797 cpp_reader *pfile;
1798 const unsigned char **pstr;
1799 const unsigned char *limit;
62729350 1800 unsigned HOST_WIDE_INT mask;
c8a96070
NB
1801 int traditional;
1802{
1803 int unknown = 0;
1804 const unsigned char *str = *pstr;
1805 unsigned int c = *str++;
1806
1807 switch (c)
1808 {
1809 case '\\': case '\'': case '"': case '?': break;
1810 case 'b': c = TARGET_BS; break;
1811 case 'f': c = TARGET_FF; break;
1812 case 'n': c = TARGET_NEWLINE; break;
1813 case 'r': c = TARGET_CR; break;
1814 case 't': c = TARGET_TAB; break;
1815 case 'v': c = TARGET_VT; break;
1816
1817 case '(': case '{': case '[': case '%':
1818 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1819 '\%' is used to prevent SCCS from getting confused. */
1820 unknown = CPP_PEDANTIC (pfile);
1821 break;
1822
1823 case 'a':
1824 if (CPP_WTRADITIONAL (pfile))
1825 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1826 if (!traditional)
1827 c = TARGET_BELL;
1828 break;
1829
1830 case 'e': case 'E':
1831 if (CPP_PEDANTIC (pfile))
1832 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1833 c = TARGET_ESC;
1834 break;
1835
c8a96070 1836 case 'u': case 'U':
62729350 1837 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1838 break;
1839
1840 case 'x':
1841 if (CPP_WTRADITIONAL (pfile))
1842 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1843
1844 if (!traditional)
1845 {
1846 unsigned int i = 0, overflow = 0;
1847 int digits_found = 0;
1848
1849 while (str < limit)
1850 {
1851 c = *str;
1852 if (! ISXDIGIT (c))
1853 break;
1854 str++;
1855 overflow |= i ^ (i << 4 >> 4);
1856 i = (i << 4) + hex_digit_value (c);
1857 digits_found = 1;
1858 }
1859
1860 if (!digits_found)
1861 cpp_error (pfile, "\\x used with no following hex digits");
1862
1863 if (overflow | (i != (i & mask)))
1864 {
1865 cpp_pedwarn (pfile, "hex escape sequence out of range");
1866 i &= mask;
1867 }
1868 c = i;
1869 }
1870 break;
1871
1872 case '0': case '1': case '2': case '3':
1873 case '4': case '5': case '6': case '7':
1874 {
1875 unsigned int i = c - '0';
1876 int count = 0;
1877
1878 while (str < limit && ++count < 3)
1879 {
1880 c = *str;
1881 if (c < '0' || c > '7')
1882 break;
1883 str++;
1884 i = (i << 3) + c - '0';
1885 }
1886
1887 if (i != (i & mask))
1888 {
1889 cpp_pedwarn (pfile, "octal escape sequence out of range");
1890 i &= mask;
1891 }
1892 c = i;
1893 }
1894 break;
1895
1896 default:
1897 unknown = 1;
1898 break;
1899 }
1900
1901 if (unknown)
1902 {
1903 if (ISGRAPH (c))
1904 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1905 else
1906 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1907 }
1908
62729350
NB
1909 if (c > mask)
1910 cpp_pedwarn (pfile, "escape sequence out of range for character");
1911
c8a96070
NB
1912 *pstr = str;
1913 return c;
1914}
1915
1916#ifndef MAX_CHAR_TYPE_SIZE
1917#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1918#endif
1919
1920#ifndef MAX_WCHAR_TYPE_SIZE
1921#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1922#endif
1923
1924/* Interpret a (possibly wide) character constant in TOKEN.
1925 WARN_MULTI warns about multi-character charconsts, if not
1926 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1927 that did not exist in traditional C. PCHARS_SEEN points to a
1928 variable that is filled in with the number of characters seen. */
1929HOST_WIDE_INT
1930cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1931 cpp_reader *pfile;
1932 const cpp_token *token;
1933 int warn_multi;
1934 int traditional;
1935 unsigned int *pchars_seen;
1936{
1937 const unsigned char *str = token->val.str.text;
1938 const unsigned char *limit = str + token->val.str.len;
1939 unsigned int chars_seen = 0;
1940 unsigned int width, max_chars, c;
2a967f3d
NB
1941 unsigned HOST_WIDE_INT mask;
1942 HOST_WIDE_INT result = 0;
c8a96070
NB
1943
1944#ifdef MULTIBYTE_CHARS
1945 (void) local_mbtowc (NULL, NULL, 0);
1946#endif
1947
1948 /* Width in bits. */
1949 if (token->type == CPP_CHAR)
1950 width = MAX_CHAR_TYPE_SIZE;
1951 else
1952 width = MAX_WCHAR_TYPE_SIZE;
1953
1954 if (width < HOST_BITS_PER_WIDE_INT)
1955 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1956 else
1957 mask = ~0;
1958 max_chars = HOST_BITS_PER_WIDE_INT / width;
1959
1960 while (str < limit)
1961 {
1962#ifdef MULTIBYTE_CHARS
1963 wchar_t wc;
1964 int char_len;
1965
1966 char_len = local_mbtowc (&wc, str, limit - str);
1967 if (char_len == -1)
1968 {
1969 cpp_warning (pfile, "ignoring invalid multibyte character");
1970 c = *str++;
1971 }
1972 else
1973 {
1974 str += char_len;
1975 c = wc;
1976 }
1977#else
1978 c = *str++;
1979#endif
1980
1981 if (c == '\\')
62729350 1982 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
c8a96070
NB
1983
1984#ifdef MAP_CHARACTER
1985 if (ISPRINT (c))
1986 c = MAP_CHARACTER (c);
1987#endif
1988
1989 /* Merge character into result; ignore excess chars. */
1990 if (++chars_seen <= max_chars)
1991 {
1992 if (width < HOST_BITS_PER_WIDE_INT)
1993 result = (result << width) | (c & mask);
1994 else
1995 result = c;
1996 }
1997 }
1998
1999 if (chars_seen == 0)
2000 cpp_error (pfile, "empty character constant");
2001 else if (chars_seen > max_chars)
2002 {
2003 chars_seen = max_chars;
f8710242 2004 cpp_warning (pfile, "character constant too long");
c8a96070
NB
2005 }
2006 else if (chars_seen > 1 && !traditional && warn_multi)
2007 cpp_warning (pfile, "multi-character character constant");
2008
2009 /* If char type is signed, sign-extend the constant. The
2010 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2011 if (token->type == CPP_CHAR && chars_seen)
2012 {
2013 unsigned int nbits = chars_seen * width;
2014 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2015
2016 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2017 || ((result >> (nbits - 1)) & 1) == 0)
2018 result &= mask;
2019 else
2020 result |= ~mask;
2021 }
2022
2023 *pchars_seen = chars_seen;
2024 return result;
2025}
2026
1e013d2e
NB
2027/* Memory buffers. Changing these three constants can have a dramatic
2028 effect on performance. The values here are reasonable defaults,
2029 but might be tuned. If you adjust them, be sure to test across a
2030 range of uses of cpplib, including heavy nested function-like macro
2031 expansion. Also check the change in peak memory usage (NJAMD is a
2032 good tool for this). */
2033#define MIN_BUFF_SIZE 8000
2034#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2035#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2036 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 2037
93c80368 2038struct dummy
417f3e3a 2039{
93c80368
NB
2040 char c;
2041 union
2042 {
2043 double d;
2044 int *p;
2045 } u;
2046};
417f3e3a 2047
93c80368 2048#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
b8af0ca5
NB
2049#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2050
c9e7a609
NB
2051/* Create a new allocation buffer. Place the control block at the end
2052 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5
NB
2053static _cpp_buff *
2054new_buff (len)
2055 unsigned int len;
2056{
2057 _cpp_buff *result;
ece54d54 2058 unsigned char *base;
b8af0ca5 2059
1e013d2e
NB
2060 if (len < MIN_BUFF_SIZE)
2061 len = MIN_BUFF_SIZE;
b8af0ca5
NB
2062 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2063
2064 base = xmalloc (len + sizeof (_cpp_buff));
2065 result = (_cpp_buff *) (base + len);
2066 result->base = base;
2067 result->cur = base;
2068 result->limit = base + len;
2069 result->next = NULL;
2070 return result;
2071}
2072
2073/* Place a chain of unwanted allocation buffers on the free list. */
2074void
2075_cpp_release_buff (pfile, buff)
2076 cpp_reader *pfile;
2077 _cpp_buff *buff;
2078{
2079 _cpp_buff *end = buff;
2080
2081 while (end->next)
2082 end = end->next;
2083 end->next = pfile->free_buffs;
2084 pfile->free_buffs = buff;
2085}
2086
2087/* Return a free buffer of size at least MIN_SIZE. */
2088_cpp_buff *
2089_cpp_get_buff (pfile, min_size)
2090 cpp_reader *pfile;
2091 unsigned int min_size;
2092{
2093 _cpp_buff *result, **p;
2094
2095 for (p = &pfile->free_buffs;; p = &(*p)->next)
2096 {
1e013d2e
NB
2097 unsigned int size;
2098
2099 if (*p == NULL)
b8af0ca5 2100 return new_buff (min_size);
1e013d2e
NB
2101 result = *p;
2102 size = result->limit - result->base;
2103 /* Return a buffer that's big enough, but don't waste one that's
2104 way too big. */
2105 if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
2106 break;
2107 }
2108
2109 *p = result->next;
2110 result->next = NULL;
2111 result->cur = result->base;
2112 return result;
2113}
2114
2115/* Return a buffer chained on the end of BUFF. Copy to it the
2116 uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more
2117 bytes. */
2118_cpp_buff *
2119_cpp_extend_buff (pfile, buff, min_extra)
2120 cpp_reader *pfile;
2121 _cpp_buff *buff;
2122 unsigned int min_extra;
2123{
1e013d2e 2124 unsigned int size = EXTENDED_BUFF_SIZE (buff, min_extra);
b8af0ca5
NB
2125
2126 buff->next = _cpp_get_buff (pfile, size);
2127 memcpy (buff->next->base, buff->cur, buff->limit - buff->cur);
2128 return buff->next;
2129}
2130
2131/* Free a chain of buffers starting at BUFF. */
2132void
2133_cpp_free_buff (buff)
2134 _cpp_buff *buff;
2135{
2136 _cpp_buff *next;
2137
2138 for (; buff; buff = next)
2139 {
2140 next = buff->next;
2141 free (buff->base);
2142 }
2143}
417f3e3a 2144
ece54d54
NB
2145/* Allocate permanent, unaligned storage of length LEN. */
2146unsigned char *
2147_cpp_unaligned_alloc (pfile, len)
2148 cpp_reader *pfile;
2149 size_t len;
2150{
2151 _cpp_buff *buff = pfile->u_buff;
2152 unsigned char *result = buff->cur;
2153
2154 if (len > (size_t) (buff->limit - result))
2155 {
2156 buff = _cpp_get_buff (pfile, len);
2157 buff->next = pfile->u_buff;
2158 pfile->u_buff = buff;
2159 result = buff->cur;
2160 }
2161
2162 buff->cur = result + len;
2163 return result;
2164}
2165
93c80368 2166static int
1e013d2e 2167chunk_suitable (chunk, size)
93c80368
NB
2168 cpp_chunk *chunk;
2169 unsigned int size;
2170{
2171 /* Being at least twice SIZE means we can use memcpy in
2172 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2173 anyway. */
1e013d2e 2174 return (chunk && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
041c3194 2175}
c5a04734 2176
93c80368
NB
2177/* Returns the end of the new pool. PTR points to a char in the old
2178 pool, and is updated to point to the same char in the new pool. */
2179unsigned char *
2180_cpp_next_chunk (pool, len, ptr)
2181 cpp_pool *pool;
2182 unsigned int len;
2183 unsigned char **ptr;
041c3194 2184{
93c80368 2185 cpp_chunk *chunk = pool->cur->next;
c5a04734 2186
93c80368
NB
2187 /* LEN is the minimum size we want in the new pool. */
2188 len += POOL_ROOM (pool);
1e013d2e 2189 if (! chunk_suitable (chunk, len))
041c3194 2190 {
93c80368 2191 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
c5a04734 2192
93c80368
NB
2193 chunk->next = pool->cur->next;
2194 pool->cur->next = chunk;
c5a04734
ZW
2195 }
2196
93c80368
NB
2197 /* Update the pointer before changing chunk's front. */
2198 if (ptr)
2199 *ptr += chunk->base - POOL_FRONT (pool);
041c3194 2200
93c80368
NB
2201 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2202 chunk->front = chunk->base;
041c3194 2203
93c80368
NB
2204 pool->cur = chunk;
2205 return POOL_LIMIT (pool);
c5a04734
ZW
2206}
2207
93c80368
NB
2208static cpp_chunk *
2209new_chunk (size)
2210 unsigned int size;
041c3194 2211{
93c80368
NB
2212 unsigned char *base;
2213 cpp_chunk *result;
3fef5b2b 2214
269592a8 2215 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
93c80368
NB
2216 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2217 /* Put the chunk descriptor at the end. Then chunk overruns will
2218 cause obvious chaos. */
2219 result = (cpp_chunk *) (base + size);
2220 result->base = base;
2221 result->front = base;
2222 result->limit = base + size;
2223 result->next = 0;
417f3e3a 2224
93c80368 2225 return result;
041c3194
ZW
2226}
2227
93c80368
NB
2228void
2229_cpp_init_pool (pool, size, align, temp)
2230 cpp_pool *pool;
2231 unsigned int size, align, temp;
2232{
2233 if (align == 0)
2234 align = DEFAULT_ALIGNMENT;
2235 if (align & (align - 1))
2236 abort ();
2237 pool->align = align;
bef985f3
NB
2238 pool->first = new_chunk (size);
2239 pool->cur = pool->first;
93c80368
NB
2240 if (temp)
2241 pool->cur->next = pool->cur;
041c3194
ZW
2242}
2243
93c80368
NB
2244void
2245_cpp_free_pool (pool)
2246 cpp_pool *pool;
3fef5b2b 2247{
bef985f3 2248 cpp_chunk *chunk = pool->first, *next;
3fef5b2b 2249
93c80368 2250 do
3fef5b2b 2251 {
93c80368
NB
2252 next = chunk->next;
2253 free (chunk->base);
2254 chunk = next;
3fef5b2b 2255 }
bef985f3 2256 while (chunk && chunk != pool->first);
041c3194 2257}
041c3194 2258
93c80368
NB
2259/* Reserve LEN bytes from a memory pool. */
2260unsigned char *
2261_cpp_pool_reserve (pool, len)
2262 cpp_pool *pool;
2263 unsigned int len;
041c3194 2264{
269592a8 2265 len = POOL_ALIGN (len, pool->align);
93c80368
NB
2266 if (len > (unsigned int) POOL_ROOM (pool))
2267 _cpp_next_chunk (pool, len, 0);
041c3194 2268
93c80368 2269 return POOL_FRONT (pool);
c5a04734
ZW
2270}
2271
93c80368
NB
2272/* Allocate LEN bytes from a memory pool. */
2273unsigned char *
2274_cpp_pool_alloc (pool, len)
2275 cpp_pool *pool;
2276 unsigned int len;
041c3194 2277{
93c80368 2278 unsigned char *result = _cpp_pool_reserve (pool, len);
417f3e3a 2279
93c80368
NB
2280 POOL_COMMIT (pool, len);
2281 return result;
041c3194 2282}
This page took 0.67694 seconds and 5 git commands to generate.