]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
* collect2.c (main): Use strcmp when testing for "-shared".
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
e1e7d56b 2 Copyright (C) 2000, 2001 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
45b966db
ZW
25#include "cpplib.h"
26#include "cpphash.h"
27
c8a96070
NB
28/* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
31#ifdef CROSS_COMPILE
32#undef MULTIBYTE_CHARS
33#endif
34
35#ifdef MULTIBYTE_CHARS
36#include "mbchar.h"
37#include <locale.h>
38#endif
39
93c80368
NB
40/* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
42enum spell_type
f9a0e96c 43{
93c80368
NB
44 SPELL_OPERATOR = 0,
45 SPELL_CHAR,
46 SPELL_IDENT,
47ad4138 47 SPELL_NUMBER,
93c80368
NB
48 SPELL_STRING,
49 SPELL_NONE
f9a0e96c
ZW
50};
51
93c80368 52struct token_spelling
f9a0e96c 53{
93c80368
NB
54 enum spell_type category;
55 const unsigned char *name;
f9a0e96c
ZW
56};
57
8206c799
ZW
58static const unsigned char *const digraph_spellings[] =
59{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
60
61#define OP(e, s) { SPELL_OPERATOR, U s },
62#define TK(e, s) { s, U STRINGX (e) },
8206c799 63static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
64#undef OP
65#undef TK
66
67#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
480709cc 69#define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
f2d5f0cc 70
87062813
NB
71static void handle_newline PARAMS ((cpp_reader *));
72static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
29401c30 73static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 74
041c3194 75static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 76static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d 77static void adjust_column PARAMS ((cpp_reader *));
4d6baafa 78static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6
ZW
79static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
80static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
81 const U_CHAR *));
93c80368
NB
82static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
83static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 84static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 85static void unterminated PARAMS ((cpp_reader *, int));
87062813 86static bool trigraph_p PARAMS ((cpp_reader *));
0d9f234d 87static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
93c80368 88static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350
NB
89static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
90 const unsigned char *, unsigned int *));
5fddcffc 91static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 92
c8a96070 93static unsigned int hex_digit_value PARAMS ((unsigned int));
6142088c 94static _cpp_buff *new_buff PARAMS ((size_t));
15dad1d9 95
041c3194 96/* Utility routine:
9e62c811 97
bfb9dc7f
ZW
98 Compares, the token TOKEN to the NUL-terminated string STRING.
99 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 100
041c3194 101int
bfb9dc7f
ZW
102cpp_ideq (token, string)
103 const cpp_token *token;
041c3194
ZW
104 const char *string;
105{
bfb9dc7f 106 if (token->type != CPP_NAME)
041c3194 107 return 0;
bfb9dc7f 108
a28c5035 109 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
15dad1d9 110}
1368ee70 111
87062813
NB
112/* Call when meeting a newline, assumed to be in buffer->cur[-1].
113 Returns with buffer->cur pointing to the character immediately
114 following the newline (combination). */
115static void
116handle_newline (pfile)
1444f2ed 117 cpp_reader *pfile;
0d9f234d 118{
87062813 119 cpp_buffer *buffer = pfile->buffer;
0d9f234d 120
87062813 121 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
4d6baafa
NB
122 only accept CR-LF; maybe we should fall back to that behaviour? */
123 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
87062813 124 buffer->cur++;
0d9f234d 125
87062813
NB
126 buffer->line_base = buffer->cur;
127 buffer->col_adjust = 0;
128 pfile->line++;
0d9f234d
NB
129}
130
87062813
NB
131/* Subroutine of skip_escaped_newlines; called when a 3-character
132 sequence beginning with "??" is encountered. buffer->cur points to
133 the second '?'.
134
135 Warn if necessary, and returns true if the sequence forms a
136 trigraph and the trigraph should be honoured. */
137static bool
138trigraph_p (pfile)
45b966db 139 cpp_reader *pfile;
45b966db 140{
87062813
NB
141 cpp_buffer *buffer = pfile->buffer;
142 cppchar_t from_char = buffer->cur[1];
143 bool accept;
144
145 if (!_cpp_trigraph_map[from_char])
146 return false;
147
148 accept = CPP_OPTION (pfile, trigraphs);
149
cbcff6df
NB
150 /* Don't warn about trigraphs in comments. */
151 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 152 {
041c3194 153 if (accept)
87062813 154 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
041c3194 155 "trigraph ??%c converted to %c",
0d9f234d
NB
156 (int) from_char,
157 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
158 else if (buffer->cur != buffer->last_Wtrigraphs)
159 {
160 buffer->last_Wtrigraphs = buffer->cur;
67821e3a 161 cpp_warning_with_line (pfile, pfile->line,
87062813 162 CPP_BUF_COL (buffer) - 1,
4a5b68a2
NB
163 "trigraph ??%c ignored", (int) from_char);
164 }
45b966db 165 }
0d9f234d 166
041c3194 167 return accept;
45b966db
ZW
168}
169
87062813 170/* Skips any escaped newlines introduced by '?' or a '\\', assumed to
480709cc
NB
171 lie in buffer->cur[-1]. Returns the next byte, which will be in
172 buffer->cur[-1]. This routine performs preprocessing stages 1 and
173 2 of the ISO C standard. */
0d9f234d 174static cppchar_t
87062813 175skip_escaped_newlines (pfile)
29401c30 176 cpp_reader *pfile;
45b966db 177{
29401c30 178 cpp_buffer *buffer = pfile->buffer;
87062813 179 cppchar_t next = buffer->cur[-1];
29401c30 180
a5c3cccd
NB
181 /* Only do this if we apply stages 1 and 2. */
182 if (!buffer->from_stage3)
041c3194 183 {
a5c3cccd 184 const unsigned char *saved_cur;
87062813 185 cppchar_t next1;
a5c3cccd
NB
186
187 do
0d9f234d 188 {
a5c3cccd 189 if (next == '?')
0d9f234d 190 {
4d6baafa 191 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
87062813 192 break;
a5c3cccd 193
87062813
NB
194 /* Translate the trigraph. */
195 next = _cpp_trigraph_map[buffer->cur[1]];
196 buffer->cur += 2;
4d6baafa 197 if (next != '\\')
a5c3cccd 198 break;
a5c3cccd
NB
199 }
200
4d6baafa
NB
201 if (buffer->cur == buffer->rlimit)
202 break;
203
87062813
NB
204 /* We have a backslash, and room for at least one more
205 character. Skip horizontal whitespace. */
206 saved_cur = buffer->cur;
a5c3cccd 207 do
87062813
NB
208 next1 = *buffer->cur++;
209 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
041c3194 210
a5c3cccd 211 if (!is_vspace (next1))
0d9f234d 212 {
87062813 213 buffer->cur = saved_cur;
0d9f234d
NB
214 break;
215 }
45b966db 216
87062813
NB
217 if (saved_cur != buffer->cur - 1
218 && !pfile->state.lexing_comment)
29401c30 219 cpp_warning (pfile, "backslash and newline separated by space");
0d9f234d 220
87062813 221 handle_newline (pfile);
480709cc 222 buffer->backup_to = buffer->cur;
87062813
NB
223 if (buffer->cur == buffer->rlimit)
224 {
225 cpp_pedwarn (pfile, "backslash-newline at end of file");
226 next = EOF;
227 }
228 else
229 next = *buffer->cur++;
0d9f234d 230 }
a5c3cccd 231 while (next == '\\' || next == '?');
041c3194 232 }
45b966db 233
0d9f234d 234 return next;
45b966db
ZW
235}
236
0d9f234d 237/* Obtain the next character, after trigraph conversion and skipping
87062813
NB
238 an arbitrarily long string of escaped newlines. The common case of
239 no trigraphs or escaped newlines falls through quickly. On return,
480709cc
NB
240 buffer->backup_to points to where to return to if the character is
241 not to be processed. */
0d9f234d 242static cppchar_t
29401c30
NB
243get_effective_char (pfile)
244 cpp_reader *pfile;
64aaf407 245{
4d6baafa 246 cppchar_t next;
480709cc 247 cpp_buffer *buffer = pfile->buffer;
0d9f234d 248
480709cc 249 buffer->backup_to = buffer->cur;
4d6baafa
NB
250 next = *buffer->cur++;
251 if (__builtin_expect (next == '?' || next == '\\', 0))
252 next = skip_escaped_newlines (pfile);
0d9f234d 253
480709cc 254 return next;
64aaf407
NB
255}
256
0d9f234d
NB
257/* Skip a C-style block comment. We find the end of the comment by
258 seeing if an asterisk is before every '/' we encounter. Returns
259 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
260static int
261skip_block_comment (pfile)
45b966db
ZW
262 cpp_reader *pfile;
263{
041c3194 264 cpp_buffer *buffer = pfile->buffer;
d8090680 265 cppchar_t c = EOF, prevc = EOF;
0d9f234d 266
cbcff6df 267 pfile->state.lexing_comment = 1;
0d9f234d 268 while (buffer->cur != buffer->rlimit)
45b966db 269 {
0d9f234d
NB
270 prevc = c, c = *buffer->cur++;
271
0d9f234d 272 /* FIXME: For speed, create a new character class of characters
93c80368 273 of interest inside block comments. */
0d9f234d 274 if (c == '?' || c == '\\')
87062813 275 c = skip_escaped_newlines (pfile);
041c3194 276
0d9f234d
NB
277 /* People like decorating comments with '*', so check for '/'
278 instead for efficiency. */
041c3194 279 if (c == '/')
45b966db 280 {
0d9f234d
NB
281 if (prevc == '*')
282 break;
041c3194 283
0d9f234d 284 /* Warn about potential nested comments, but not if the '/'
a1f300c0 285 comes immediately before the true comment delimiter.
041c3194 286 Don't bother to get it right across escaped newlines. */
0d9f234d 287 if (CPP_OPTION (pfile, warn_comments)
87062813
NB
288 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
289 cpp_warning_with_line (pfile,
290 pfile->line, CPP_BUF_COL (buffer),
291 "\"/*\" within comment");
45b966db 292 }
91fcd158 293 else if (is_vspace (c))
87062813 294 handle_newline (pfile);
52fadca8 295 else if (c == '\t')
0d9f234d 296 adjust_column (pfile);
45b966db 297 }
041c3194 298
cbcff6df 299 pfile->state.lexing_comment = 0;
0d9f234d 300 return c != '/' || prevc != '*';
45b966db
ZW
301}
302
480709cc
NB
303/* Skip a C++ line comment, leaving buffer->cur pointing to the
304 terminating newline. Handles escaped newlines. Returns non-zero
305 if a multiline comment. */
041c3194 306static int
cbcff6df
NB
307skip_line_comment (pfile)
308 cpp_reader *pfile;
45b966db 309{
cbcff6df 310 cpp_buffer *buffer = pfile->buffer;
67821e3a 311 unsigned int orig_line = pfile->line;
0d9f234d 312 cppchar_t c;
041c3194 313
cbcff6df 314 pfile->state.lexing_comment = 1;
0d9f234d 315 do
041c3194 316 {
0d9f234d 317 if (buffer->cur == buffer->rlimit)
480709cc 318 goto at_eof;
041c3194 319
0d9f234d
NB
320 c = *buffer->cur++;
321 if (c == '?' || c == '\\')
87062813 322 c = skip_escaped_newlines (pfile);
041c3194 323 }
0d9f234d 324 while (!is_vspace (c));
45b966db 325
480709cc
NB
326 /* Step back over the newline, except at EOF. */
327 buffer->cur--;
328 at_eof:
329
cbcff6df 330 pfile->state.lexing_comment = 0;
67821e3a 331 return orig_line != pfile->line;
041c3194 332}
45b966db 333
0d9f234d
NB
334/* pfile->buffer->cur is one beyond the \t character. Update
335 col_adjust so we track the column correctly. */
52fadca8 336static void
0d9f234d 337adjust_column (pfile)
52fadca8 338 cpp_reader *pfile;
52fadca8 339{
0d9f234d
NB
340 cpp_buffer *buffer = pfile->buffer;
341 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
342
343 /* Round it up to multiple of the tabstop, but subtract 1 since the
344 tab itself occupies a character position. */
0d9f234d
NB
345 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
346 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
347}
348
0d9f234d
NB
349/* Skips whitespace, saving the next non-whitespace character.
350 Adjusts pfile->col_adjust to account for tabs. Without this,
351 tokens might be assigned an incorrect column. */
4d6baafa 352static int
0d9f234d 353skip_whitespace (pfile, c)
041c3194 354 cpp_reader *pfile;
0d9f234d 355 cppchar_t c;
041c3194
ZW
356{
357 cpp_buffer *buffer = pfile->buffer;
0d9f234d 358 unsigned int warned = 0;
45b966db 359
0d9f234d 360 do
041c3194 361 {
91fcd158
NB
362 /* Horizontal space always OK. */
363 if (c == ' ')
0d9f234d 364 ;
91fcd158 365 else if (c == '\t')
0d9f234d
NB
366 adjust_column (pfile);
367 /* Just \f \v or \0 left. */
91fcd158 368 else if (c == '\0')
041c3194 369 {
4d6baafa
NB
370 if (buffer->cur - 1 == buffer->rlimit)
371 return 0;
91fcd158 372 if (!warned)
0d9f234d
NB
373 {
374 cpp_warning (pfile, "null character(s) ignored");
375 warned = 1;
376 }
45b966db 377 }
93c80368 378 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
67821e3a 379 cpp_pedwarn_with_line (pfile, pfile->line,
91fcd158
NB
380 CPP_BUF_COL (buffer),
381 "%s in preprocessing directive",
382 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 383
0d9f234d 384 c = *buffer->cur++;
45b966db 385 }
ec5c56db 386 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
387 while (is_nvspace (c));
388
480709cc 389 buffer->cur--;
4d6baafa 390 return 1;
041c3194 391}
45b966db 392
93c80368
NB
393/* See if the characters of a number token are valid in a name (no
394 '.', '+' or '-'). */
395static int
396name_p (pfile, string)
397 cpp_reader *pfile;
398 const cpp_string *string;
399{
400 unsigned int i;
401
402 for (i = 0; i < string->len; i++)
403 if (!is_idchar (string->text[i]))
404 return 0;
405
406 return 1;
407}
408
2c3fcba6
ZW
409/* Parse an identifier, skipping embedded backslash-newlines. This is
410 a critical inner loop. The common case is an identifier which has
411 not been split by backslash-newline, does not contain a dollar
412 sign, and has already been scanned (roughly 10:1 ratio of
413 seen:unseen identifiers in normal code; the distribution is
414 Poisson-like). Second most common case is a new identifier, not
415 split and no dollar sign. The other possibilities are rare and
416 have been relegated to parse_identifier_slow. */
0d9f234d
NB
417
418static cpp_hashnode *
2c3fcba6 419parse_identifier (pfile)
45b966db 420 cpp_reader *pfile;
45b966db 421{
93c80368 422 cpp_hashnode *result;
4d6baafa 423 const U_CHAR *cur;
2c3fcba6
ZW
424
425 /* Fast-path loop. Skim over a normal identifier.
426 N.B. ISIDNUM does not include $. */
4d6baafa
NB
427 cur = pfile->buffer->cur;
428 while (ISIDNUM (*cur))
2c3fcba6 429 cur++;
2c3fcba6
ZW
430
431 /* Check for slow-path cases. */
4d6baafa 432 if (*cur == '?' || *cur == '\\' || *cur == '$')
2c3fcba6
ZW
433 result = parse_identifier_slow (pfile, cur);
434 else
435 {
436 const U_CHAR *base = pfile->buffer->cur - 1;
437 result = (cpp_hashnode *)
438 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
439 pfile->buffer->cur = cur;
440 }
441
442 /* Rarely, identifiers require diagnostics when lexed.
443 XXX Has to be forced out of the fast path. */
444 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
445 && !pfile->state.skipping, 0))
446 {
447 /* It is allowed to poison the same identifier twice. */
448 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
449 cpp_error (pfile, "attempt to use poisoned \"%s\"",
450 NODE_NAME (result));
451
452 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
453 replacement list of a variadic macro. */
454 if (result == pfile->spec_nodes.n__VA_ARGS__
455 && !pfile->state.va_args_ok)
456 cpp_pedwarn (pfile,
457 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
458 }
459
460 return result;
461}
462
463/* Slow path. This handles identifiers which have been split, and
464 identifiers which contain dollar signs. The part of the identifier
465 from PFILE->buffer->cur-1 to CUR has already been scanned. */
466static cpp_hashnode *
467parse_identifier_slow (pfile, cur)
468 cpp_reader *pfile;
469 const U_CHAR *cur;
470{
0d9f234d 471 cpp_buffer *buffer = pfile->buffer;
2c3fcba6 472 const U_CHAR *base = buffer->cur - 1;
2a967f3d 473 struct obstack *stack = &pfile->hash_table->stack;
2c3fcba6
ZW
474 unsigned int c, saw_dollar = 0, len;
475
476 /* Copy the part of the token which is known to be okay. */
477 obstack_grow (stack, base, cur - base);
041c3194 478
2c3fcba6
ZW
479 /* Now process the part which isn't. We are looking at one of
480 '$', '\\', or '?' on entry to this loop. */
481 c = *cur++;
482 buffer->cur = cur;
0d9f234d 483 do
041c3194 484 {
2c3fcba6
ZW
485 while (is_idchar (c))
486 {
487 obstack_1grow (stack, c);
45b966db 488
2c3fcba6
ZW
489 if (c == '$')
490 saw_dollar++;
ba89d661 491
2c3fcba6
ZW
492 c = *buffer->cur++;
493 }
ba89d661 494
0d9f234d 495 /* Potential escaped newline? */
480709cc 496 buffer->backup_to = buffer->cur - 1;
0d9f234d 497 if (c != '?' && c != '\\')
2c3fcba6 498 break;
87062813 499 c = skip_escaped_newlines (pfile);
041c3194 500 }
0d9f234d
NB
501 while (is_idchar (c));
502
4d6baafa 503 /* Step back over the unwanted char. */
480709cc 504 BACKUP ();
93c80368 505
4fe9b91c 506 /* $ is not an identifier character in the standard, but is commonly
0d9f234d
NB
507 accepted as an extension. Don't warn about it in skipped
508 conditional blocks. */
cef0d199 509 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
0d9f234d
NB
510 cpp_pedwarn (pfile, "'$' character(s) in identifier");
511
93c80368 512 /* Identifiers are null-terminated. */
2a967f3d
NB
513 len = obstack_object_size (stack);
514 obstack_1grow (stack, '\0');
93c80368 515
2c3fcba6 516 return (cpp_hashnode *)
2a967f3d 517 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
45b966db
ZW
518}
519
0d9f234d 520/* Parse a number, skipping embedded backslash-newlines. */
45b966db 521static void
93c80368 522parse_number (pfile, number, c, leading_period)
45b966db 523 cpp_reader *pfile;
0d9f234d
NB
524 cpp_string *number;
525 cppchar_t c;
93c80368 526 int leading_period;
45b966db 527{
041c3194 528 cpp_buffer *buffer = pfile->buffer;
93c80368 529 unsigned char *dest, *limit;
45b966db 530
ece54d54
NB
531 dest = BUFF_FRONT (pfile->u_buff);
532 limit = BUFF_LIMIT (pfile->u_buff);
cbcff6df 533
93c80368
NB
534 /* Place a leading period. */
535 if (leading_period)
536 {
ece54d54
NB
537 if (dest == limit)
538 {
8c3b2693 539 _cpp_extend_buff (pfile, &pfile->u_buff, 1);
ece54d54
NB
540 dest = BUFF_FRONT (pfile->u_buff);
541 limit = BUFF_LIMIT (pfile->u_buff);
542 }
93c80368
NB
543 *dest++ = '.';
544 }
545
0d9f234d 546 do
041c3194 547 {
0d9f234d
NB
548 do
549 {
93c80368 550 /* Need room for terminating null. */
ece54d54
NB
551 if ((size_t) (limit - dest) < 2)
552 {
553 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 554 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
555 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
556 limit = BUFF_LIMIT (pfile->u_buff);
557 }
93c80368 558 *dest++ = c;
0d9f234d 559
0d9f234d
NB
560 c = *buffer->cur++;
561 }
93c80368 562 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 563
0d9f234d 564 /* Potential escaped newline? */
480709cc 565 buffer->backup_to = buffer->cur - 1;
0d9f234d
NB
566 if (c != '?' && c != '\\')
567 break;
87062813 568 c = skip_escaped_newlines (pfile);
45b966db 569 }
93c80368 570 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 571
4d6baafa 572 /* Step back over the unwanted char. */
480709cc 573 BACKUP ();
64aaf407 574
93c80368
NB
575 /* Null-terminate the number. */
576 *dest = '\0';
577
ece54d54 578 number->text = BUFF_FRONT (pfile->u_buff);
93c80368 579 number->len = dest - number->text;
ece54d54 580 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d
NB
581}
582
583/* Subroutine of parse_string. Emits error for unterminated strings. */
584static void
93c80368 585unterminated (pfile, term)
0d9f234d 586 cpp_reader *pfile;
0d9f234d
NB
587 int term;
588{
589 cpp_error (pfile, "missing terminating %c character", term);
590
50410426 591 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
041c3194 592 {
50410426 593 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
0d9f234d 594 "possible start of unterminated string literal");
50410426 595 pfile->mls_line = 0;
041c3194 596 }
45b966db
ZW
597}
598
93c80368
NB
599/* Subroutine of parse_string. */
600static int
601unescaped_terminator_p (pfile, dest)
602 cpp_reader *pfile;
603 const unsigned char *dest;
604{
605 const unsigned char *start, *temp;
606
607 /* In #include-style directives, terminators are not escapeable. */
608 if (pfile->state.angled_headers)
609 return 1;
610
ece54d54 611 start = BUFF_FRONT (pfile->u_buff);
93c80368
NB
612
613 /* An odd number of consecutive backslashes represents an escaped
614 terminator. */
615 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
616 ;
617
618 return ((dest - temp) & 1) == 0;
619}
620
0d9f234d 621/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
622 name. Handles embedded trigraphs and escaped newlines. The stored
623 string is guaranteed NUL-terminated, but it is not guaranteed that
624 this is the first NUL since embedded NULs are preserved.
87062813 625 Multi-line strings are allowed, but they are deprecated.
45b966db 626
87062813
NB
627 When this function returns, buffer->cur points to the next
628 character to be processed. */
041c3194 629static void
0d9f234d 630parse_string (pfile, token, terminator)
45b966db 631 cpp_reader *pfile;
041c3194 632 cpp_token *token;
0d9f234d 633 cppchar_t terminator;
45b966db 634{
041c3194 635 cpp_buffer *buffer = pfile->buffer;
93c80368 636 unsigned char *dest, *limit;
0d9f234d 637 cppchar_t c;
d82fc108 638 bool warned_nulls = false, warned_multi = false;
0d9f234d 639
ece54d54
NB
640 dest = BUFF_FRONT (pfile->u_buff);
641 limit = BUFF_LIMIT (pfile->u_buff);
93c80368 642
0d9f234d 643 for (;;)
45b966db 644 {
87062813 645 /* We need room for another char, possibly the terminating NUL. */
ece54d54
NB
646 if ((size_t) (limit - dest) < 1)
647 {
648 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 649 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
650 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
651 limit = BUFF_LIMIT (pfile->u_buff);
652 }
7868b4a2 653
0d9f234d 654 /* Handle trigraphs, escaped newlines etc. */
87062813 655 c = *buffer->cur++;
0d9f234d 656 if (c == '?' || c == '\\')
87062813 657 c = skip_escaped_newlines (pfile);
45b966db 658
87062813 659 if (c == terminator)
45b966db 660 {
87062813
NB
661 if (unescaped_terminator_p (pfile, dest))
662 break;
0d9f234d
NB
663 }
664 else if (is_vspace (c))
665 {
666 /* In assembly language, silently terminate string and
667 character literals at end of line. This is a kludge
668 around not knowing where comments are. */
bdb05a7b 669 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
87062813
NB
670 {
671 buffer->cur--;
672 break;
673 }
45b966db 674
0d9f234d
NB
675 /* Character constants and header names may not extend over
676 multiple lines. In Standard C, neither may strings.
677 Unfortunately, we accept multiline strings as an
16eb2788
NB
678 extension, except in #include family directives. */
679 if (terminator != '"' || pfile->state.angled_headers)
45b966db 680 {
93c80368 681 unterminated (pfile, terminator);
87062813 682 buffer->cur--;
0d9f234d 683 break;
45b966db 684 }
45b966db 685
d82fc108
NB
686 if (!warned_multi)
687 {
688 warned_multi = true;
689 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
690 }
691
50410426
NB
692 if (pfile->mls_line == 0)
693 {
694 pfile->mls_line = token->line;
695 pfile->mls_col = token->col;
696 }
0d9f234d 697
87062813
NB
698 handle_newline (pfile);
699 c = '\n';
0d9f234d 700 }
4d6baafa 701 else if (c == '\0')
0d9f234d 702 {
4d6baafa
NB
703 if (buffer->cur - 1 == buffer->rlimit)
704 {
705 unterminated (pfile, terminator);
706 buffer->cur--;
707 break;
708 }
709 if (!warned_nulls)
710 {
711 warned_nulls = true;
712 cpp_warning (pfile, "null character(s) preserved in literal");
713 }
45b966db 714 }
45b966db 715
93c80368 716 *dest++ = c;
45b966db
ZW
717 }
718
7868b4a2 719 *dest = '\0';
45b966db 720
ece54d54
NB
721 token->val.str.text = BUFF_FRONT (pfile->u_buff);
722 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
723 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d 724}
041c3194 725
93c80368 726/* The stored comment includes the comment start and any terminator. */
9e62c811 727static void
0d9f234d
NB
728save_comment (pfile, token, from)
729 cpp_reader *pfile;
041c3194
ZW
730 cpp_token *token;
731 const unsigned char *from;
9e62c811 732{
041c3194 733 unsigned char *buffer;
0d9f234d 734 unsigned int len;
0d9f234d 735
1c6d33ef 736 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 737
3542203b
NB
738 /* C++ comments probably (not definitely) have moved past a new
739 line, which we don't want to save in the comment. */
480709cc 740 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 741 len--;
ece54d54 742 buffer = _cpp_unaligned_alloc (pfile, len);
041c3194 743
041c3194 744 token->type = CPP_COMMENT;
bfb9dc7f 745 token->val.str.len = len;
0d9f234d 746 token->val.str.text = buffer;
45b966db 747
1c6d33ef
NB
748 buffer[0] = '/';
749 memcpy (buffer + 1, from, len - 1);
0d9f234d 750}
45b966db 751
5fddcffc
NB
752/* Allocate COUNT tokens for RUN. */
753void
754_cpp_init_tokenrun (run, count)
755 tokenrun *run;
756 unsigned int count;
757{
758 run->base = xnewvec (cpp_token, count);
759 run->limit = run->base + count;
760 run->next = NULL;
761}
762
763/* Returns the next tokenrun, or creates one if there is none. */
764static tokenrun *
765next_tokenrun (run)
766 tokenrun *run;
767{
768 if (run->next == NULL)
769 {
770 run->next = xnew (tokenrun);
bdcbe496 771 run->next->prev = run;
5fddcffc
NB
772 _cpp_init_tokenrun (run->next, 250);
773 }
774
775 return run->next;
776}
777
4ed5bcfb
NB
778/* Allocate a single token that is invalidated at the same time as the
779 rest of the tokens on the line. Has its line and col set to the
780 same as the last lexed token, so that diagnostics appear in the
781 right place. */
782cpp_token *
783_cpp_temp_token (pfile)
784 cpp_reader *pfile;
785{
786 cpp_token *old, *result;
787
788 old = pfile->cur_token - 1;
789 if (pfile->cur_token == pfile->cur_run->limit)
790 {
791 pfile->cur_run = next_tokenrun (pfile->cur_run);
792 pfile->cur_token = pfile->cur_run->base;
793 }
794
795 result = pfile->cur_token++;
796 result->line = old->line;
797 result->col = old->col;
798 return result;
799}
800
14baae01
NB
801/* Lex a token into RESULT (external interface). Takes care of issues
802 like directive handling, token lookahead, multiple include
a1f300c0 803 optimization and skipping. */
345894b4
NB
804const cpp_token *
805_cpp_lex_token (pfile)
45b966db 806 cpp_reader *pfile;
5fddcffc 807{
bdcbe496 808 cpp_token *result;
5fddcffc 809
bdcbe496 810 for (;;)
5fddcffc 811 {
bdcbe496 812 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 813 {
bdcbe496
NB
814 pfile->cur_run = next_tokenrun (pfile->cur_run);
815 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
816 }
817
bdcbe496 818 if (pfile->lookaheads)
14baae01
NB
819 {
820 pfile->lookaheads--;
821 result = pfile->cur_token++;
822 }
bdcbe496 823 else
14baae01 824 result = _cpp_lex_direct (pfile);
bdcbe496
NB
825
826 if (result->flags & BOL)
5fddcffc 827 {
bdcbe496
NB
828 /* Is this a directive. If _cpp_handle_directive returns
829 false, it is an assembler #. */
830 if (result->type == CPP_HASH
831 && !pfile->state.parsing_args
832 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
833 continue;
97293897
NB
834 if (pfile->cb.line_change && !pfile->state.skipping)
835 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 836 }
5fddcffc 837
bdcbe496
NB
838 /* We don't skip tokens in directives. */
839 if (pfile->state.in_directive)
840 break;
5fddcffc 841
bdcbe496 842 /* Outside a directive, invalidate controlling macros. At file
14baae01 843 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 844 get here and MI optimisation works. */
5fddcffc 845 pfile->mi_valid = false;
bdcbe496
NB
846
847 if (!pfile->state.skipping || result->type == CPP_EOF)
848 break;
5fddcffc
NB
849 }
850
345894b4 851 return result;
5fddcffc
NB
852}
853
480709cc
NB
854#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
855 do { \
856 if (get_effective_char (pfile) == CHAR) \
857 result->type = THEN_TYPE; \
858 else \
859 { \
860 BACKUP (); \
861 result->type = ELSE_TYPE; \
862 } \
863 } while (0)
864
14baae01
NB
865/* Lex a token into pfile->cur_token, which is also incremented, to
866 get diagnostics pointing to the correct location.
867
868 Does not handle issues such as token lookahead, multiple-include
869 optimisation, directives, skipping etc. This function is only
870 suitable for use by _cpp_lex_token, and in special cases like
871 lex_expansion_token which doesn't care for any of these issues.
872
873 When meeting a newline, returns CPP_EOF if parsing a directive,
874 otherwise returns to the start of the token buffer if permissible.
875 Returns the location of the lexed token. */
876cpp_token *
877_cpp_lex_direct (pfile)
5fddcffc 878 cpp_reader *pfile;
45b966db 879{
0d9f234d 880 cppchar_t c;
adb84b42 881 cpp_buffer *buffer;
0d9f234d 882 const unsigned char *comment_start;
14baae01 883 cpp_token *result = pfile->cur_token++;
9ec7291f 884
5fddcffc 885 fresh_line:
adb84b42 886 buffer = pfile->buffer;
bd969772
NB
887 result->flags = buffer->saved_flags;
888 buffer->saved_flags = 0;
5fddcffc 889 update_tokens_line:
1444f2ed 890 result->line = pfile->line;
041c3194 891
5fddcffc 892 skipped_white:
480709cc 893 c = *buffer->cur++;
5fddcffc 894 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
5fddcffc
NB
895
896 trigraph:
0d9f234d 897 switch (c)
45b966db 898 {
4d6baafa
NB
899 case ' ': case '\t': case '\f': case '\v': case '\0':
900 result->flags |= PREV_WHITE;
901 if (skip_whitespace (pfile, c))
902 goto skipped_white;
903
904 /* EOF. */
905 buffer->cur--;
bdcbe496 906 buffer->saved_flags = BOL;
5fddcffc 907 if (!pfile->state.parsing_args && !pfile->state.in_directive)
ef6e958a 908 {
bdcbe496 909 if (buffer->cur != buffer->line_base)
5fddcffc
NB
910 {
911 /* Non-empty files should end in a newline. Don't warn
912 for command line and _Pragma buffers. */
913 if (!buffer->from_stage3)
914 cpp_pedwarn (pfile, "no newline at end of file");
87062813 915 handle_newline (pfile);
7364fdd8 916 }
bdcbe496
NB
917
918 /* Don't pop the last buffer. */
919 if (buffer->prev)
920 {
921 unsigned char stop = buffer->return_at_eof;
922
923 _cpp_pop_buffer (pfile);
924 if (!stop)
925 goto fresh_line;
926 }
ef6e958a 927 }
0d9f234d 928 result->type = CPP_EOF;
5fddcffc 929 break;
45b966db 930
0d9f234d 931 case '\n': case '\r':
87062813 932 handle_newline (pfile);
bdcbe496
NB
933 buffer->saved_flags = BOL;
934 if (! pfile->state.in_directive)
45b966db 935 {
4ed5bcfb
NB
936 if (pfile->state.parsing_args == 2)
937 buffer->saved_flags |= PREV_WHITE;
bdcbe496
NB
938 if (!pfile->keep_tokens)
939 {
940 pfile->cur_run = &pfile->base_run;
941 result = pfile->base_run.base;
942 pfile->cur_token = result + 1;
943 }
944 goto fresh_line;
45b966db 945 }
5fddcffc
NB
946 result->type = CPP_EOF;
947 break;
46d07497 948
0d9f234d
NB
949 case '?':
950 case '\\':
951 /* These could start an escaped newline, or '?' a trigraph. Let
952 skip_escaped_newlines do all the work. */
953 {
67821e3a 954 unsigned int line = pfile->line;
0d9f234d 955
87062813 956 c = skip_escaped_newlines (pfile);
67821e3a 957 if (line != pfile->line)
87062813 958 {
480709cc 959 buffer->cur--;
87062813
NB
960 /* We had at least one escaped newline of some sort.
961 Update the token's line and column. */
5fddcffc 962 goto update_tokens_line;
87062813 963 }
480709cc 964 }
0d9f234d 965
480709cc
NB
966 /* We are either the original '?' or '\\', or a trigraph. */
967 if (c == '?')
0d9f234d 968 result->type = CPP_QUERY;
480709cc
NB
969 else if (c == '\\')
970 goto random_char;
971 else
972 goto trigraph;
0d9f234d 973 break;
46d07497 974
0d9f234d
NB
975 case '0': case '1': case '2': case '3': case '4':
976 case '5': case '6': case '7': case '8': case '9':
977 result->type = CPP_NUMBER;
93c80368 978 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 979 break;
46d07497 980
0abc6a6a
NB
981 case 'L':
982 /* 'L' may introduce wide characters or strings. */
983 {
984 const unsigned char *pos = buffer->cur;
0d9f234d 985
0abc6a6a
NB
986 c = get_effective_char (pfile);
987 if (c == '\'' || c == '"')
988 {
989 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
990 parse_string (pfile, result, c);
991 break;
992 }
993 buffer->cur = pos;
994 }
995 /* Fall through. */
996
997 start_ident:
0d9f234d
NB
998 case '_':
999 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1000 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1001 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1002 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1003 case 'y': case 'z':
1004 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 1005 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
1006 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1007 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1008 case 'Y': case 'Z':
1009 result->type = CPP_NAME;
2c3fcba6 1010 result->val.node = parse_identifier (pfile);
0d9f234d 1011
0d9f234d 1012 /* Convert named operators to their proper types. */
0abc6a6a 1013 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1014 {
1015 result->flags |= NAMED_OP;
93c80368 1016 result->type = result->val.node->value.operator;
0d9f234d
NB
1017 }
1018 break;
1019
1020 case '\'':
1021 case '"':
1022 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
0d9f234d
NB
1023 parse_string (pfile, result, c);
1024 break;
041c3194 1025
0d9f234d 1026 case '/':
1c6d33ef
NB
1027 /* A potential block or line comment. */
1028 comment_start = buffer->cur;
29401c30 1029 c = get_effective_char (pfile);
480709cc 1030
1c6d33ef
NB
1031 if (c == '*')
1032 {
0d9f234d 1033 if (skip_block_comment (pfile))
67821e3a 1034 cpp_error (pfile, "unterminated comment");
0d9f234d 1035 }
480709cc
NB
1036 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1037 || CPP_IN_SYSTEM_HEADER (pfile)))
0d9f234d 1038 {
bdb05a7b
NB
1039 /* Warn about comments only if pedantically GNUC89, and not
1040 in system headers. */
1041 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1042 && ! buffer->warned_cplusplus_comments)
041c3194 1043 {
1c6d33ef
NB
1044 cpp_pedwarn (pfile,
1045 "C++ style comments are not allowed in ISO C89");
1046 cpp_pedwarn (pfile,
1047 "(this will be reported only once per input file)");
1048 buffer->warned_cplusplus_comments = 1;
1049 }
0d9f234d 1050
01ef6563 1051 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
50410426 1052 cpp_warning (pfile, "multi-line comment");
1c6d33ef 1053 }
480709cc
NB
1054 else if (c == '=')
1055 {
1056 result->type = CPP_DIV_EQ;
1057 break;
1058 }
1059 else
1060 {
1061 BACKUP ();
1062 result->type = CPP_DIV;
1063 break;
1064 }
0d9f234d 1065
1c6d33ef
NB
1066 if (!pfile->state.save_comments)
1067 {
1068 result->flags |= PREV_WHITE;
5fddcffc 1069 goto update_tokens_line;
0d9f234d 1070 }
1c6d33ef
NB
1071
1072 /* Save the comment as a token in its own right. */
1073 save_comment (pfile, result, comment_start);
bdcbe496 1074 break;
0d9f234d
NB
1075
1076 case '<':
1077 if (pfile->state.angled_headers)
1078 {
1079 result->type = CPP_HEADER_NAME;
480709cc
NB
1080 parse_string (pfile, result, '>');
1081 break;
0d9f234d 1082 }
45b966db 1083
29401c30 1084 c = get_effective_char (pfile);
0d9f234d 1085 if (c == '=')
480709cc 1086 result->type = CPP_LESS_EQ;
0d9f234d 1087 else if (c == '<')
480709cc 1088 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 1089 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc 1090 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d
NB
1091 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1092 {
480709cc 1093 result->type = CPP_OPEN_SQUARE;
0d9f234d
NB
1094 result->flags |= DIGRAPH;
1095 }
1096 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1097 {
480709cc 1098 result->type = CPP_OPEN_BRACE;
0d9f234d
NB
1099 result->flags |= DIGRAPH;
1100 }
480709cc
NB
1101 else
1102 {
1103 BACKUP ();
1104 result->type = CPP_LESS;
1105 }
0d9f234d
NB
1106 break;
1107
1108 case '>':
29401c30 1109 c = get_effective_char (pfile);
0d9f234d 1110 if (c == '=')
480709cc 1111 result->type = CPP_GREATER_EQ;
0d9f234d 1112 else if (c == '>')
480709cc 1113 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
0d9f234d 1114 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc
NB
1115 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1116 else
0d9f234d 1117 {
480709cc
NB
1118 BACKUP ();
1119 result->type = CPP_GREATER;
0d9f234d
NB
1120 }
1121 break;
1122
cbcff6df 1123 case '%':
480709cc
NB
1124 c = get_effective_char (pfile);
1125 if (c == '=')
1126 result->type = CPP_MOD_EQ;
1127 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1128 {
1129 result->flags |= DIGRAPH;
1130 result->type = CPP_HASH;
1131 if (get_effective_char (pfile) == '%')
1132 {
1133 const unsigned char *pos = buffer->cur;
1134
1135 if (get_effective_char (pfile) == ':')
1136 result->type = CPP_PASTE;
1137 else
1138 buffer->cur = pos - 1;
1139 }
1140 else
1141 BACKUP ();
1142 }
1143 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1144 {
1145 result->flags |= DIGRAPH;
1146 result->type = CPP_CLOSE_BRACE;
1147 }
1148 else
1149 {
1150 BACKUP ();
1151 result->type = CPP_MOD;
1152 }
0d9f234d
NB
1153 break;
1154
cbcff6df 1155 case '.':
480709cc
NB
1156 result->type = CPP_DOT;
1157 c = get_effective_char (pfile);
1158 if (c == '.')
1159 {
1160 const unsigned char *pos = buffer->cur;
1161
1162 if (get_effective_char (pfile) == '.')
1163 result->type = CPP_ELLIPSIS;
1164 else
1165 buffer->cur = pos - 1;
1166 }
1167 /* All known character sets have 0...9 contiguous. */
0df6c2c7 1168 else if (ISDIGIT (c))
480709cc
NB
1169 {
1170 result->type = CPP_NUMBER;
1171 parse_number (pfile, &result->val.str, c, 1);
1172 }
1173 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1174 result->type = CPP_DOT_STAR;
1175 else
1176 BACKUP ();
0d9f234d 1177 break;
45b966db 1178
0d9f234d 1179 case '+':
29401c30 1180 c = get_effective_char (pfile);
480709cc
NB
1181 if (c == '+')
1182 result->type = CPP_PLUS_PLUS;
1183 else if (c == '=')
1184 result->type = CPP_PLUS_EQ;
1185 else
1186 {
1187 BACKUP ();
1188 result->type = CPP_PLUS;
1189 }
0d9f234d 1190 break;
04e3ec78 1191
0d9f234d 1192 case '-':
29401c30 1193 c = get_effective_char (pfile);
0d9f234d
NB
1194 if (c == '>')
1195 {
480709cc
NB
1196 result->type = CPP_DEREF;
1197 if (CPP_OPTION (pfile, cplusplus))
1198 {
1199 if (get_effective_char (pfile) == '*')
1200 result->type = CPP_DEREF_STAR;
1201 else
1202 BACKUP ();
1203 }
0d9f234d 1204 }
0d9f234d 1205 else if (c == '-')
480709cc
NB
1206 result->type = CPP_MINUS_MINUS;
1207 else if (c == '=')
1208 result->type = CPP_MINUS_EQ;
1209 else
1210 {
1211 BACKUP ();
1212 result->type = CPP_MINUS;
1213 }
0d9f234d 1214 break;
45b966db 1215
0d9f234d 1216 case '&':
29401c30 1217 c = get_effective_char (pfile);
480709cc
NB
1218 if (c == '&')
1219 result->type = CPP_AND_AND;
1220 else if (c == '=')
1221 result->type = CPP_AND_EQ;
1222 else
1223 {
1224 BACKUP ();
1225 result->type = CPP_AND;
1226 }
0d9f234d
NB
1227 break;
1228
0d9f234d 1229 case '|':
29401c30 1230 c = get_effective_char (pfile);
480709cc
NB
1231 if (c == '|')
1232 result->type = CPP_OR_OR;
1233 else if (c == '=')
1234 result->type = CPP_OR_EQ;
1235 else
1236 {
1237 BACKUP ();
1238 result->type = CPP_OR;
1239 }
0d9f234d 1240 break;
45b966db 1241
0d9f234d 1242 case ':':
29401c30 1243 c = get_effective_char (pfile);
0d9f234d 1244 if (c == ':' && CPP_OPTION (pfile, cplusplus))
480709cc 1245 result->type = CPP_SCOPE;
0d9f234d
NB
1246 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1247 {
1248 result->flags |= DIGRAPH;
480709cc
NB
1249 result->type = CPP_CLOSE_SQUARE;
1250 }
1251 else
1252 {
1253 BACKUP ();
1254 result->type = CPP_COLON;
0d9f234d
NB
1255 }
1256 break;
45b966db 1257
480709cc
NB
1258 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1259 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1260 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1261 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1262 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1263
0d9f234d
NB
1264 case '~': result->type = CPP_COMPL; break;
1265 case ',': result->type = CPP_COMMA; break;
1266 case '(': result->type = CPP_OPEN_PAREN; break;
1267 case ')': result->type = CPP_CLOSE_PAREN; break;
1268 case '[': result->type = CPP_OPEN_SQUARE; break;
1269 case ']': result->type = CPP_CLOSE_SQUARE; break;
1270 case '{': result->type = CPP_OPEN_BRACE; break;
1271 case '}': result->type = CPP_CLOSE_BRACE; break;
1272 case ';': result->type = CPP_SEMICOLON; break;
1273
cc937581
ZW
1274 /* @ is a punctuator in Objective C. */
1275 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1276
0abc6a6a
NB
1277 case '$':
1278 if (CPP_OPTION (pfile, dollars_in_ident))
1279 goto start_ident;
1280 /* Fall through... */
1281
0d9f234d
NB
1282 random_char:
1283 default:
1284 result->type = CPP_OTHER;
6c53ebff 1285 result->val.c = c;
0d9f234d
NB
1286 break;
1287 }
bdcbe496
NB
1288
1289 return result;
0d9f234d
NB
1290}
1291
93c80368
NB
1292/* An upper bound on the number of bytes needed to spell a token,
1293 including preceding whitespace. */
1294unsigned int
1295cpp_token_len (token)
1296 const cpp_token *token;
0d9f234d 1297{
93c80368 1298 unsigned int len;
6d2c2047 1299
93c80368 1300 switch (TOKEN_SPELL (token))
041c3194 1301 {
a28c5035 1302 default: len = 0; break;
47ad4138 1303 case SPELL_NUMBER:
a28c5035
NB
1304 case SPELL_STRING: len = token->val.str.len; break;
1305 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1306 }
47ad4138 1307 /* 1 for whitespace, 4 for comment delimiters. */
93c80368 1308 return len + 5;
6d2c2047
ZW
1309}
1310
041c3194 1311/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1312 already contain the enough space to hold the token's spelling.
1313 Returns a pointer to the character after the last character
1314 written. */
93c80368
NB
1315unsigned char *
1316cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1317 cpp_reader *pfile; /* Would be nice to be rid of this... */
1318 const cpp_token *token;
1319 unsigned char *buffer;
1320{
96be6998 1321 switch (TOKEN_SPELL (token))
041c3194
ZW
1322 {
1323 case SPELL_OPERATOR:
1324 {
1325 const unsigned char *spelling;
1326 unsigned char c;
d6d5f795 1327
041c3194 1328 if (token->flags & DIGRAPH)
37b8524c
JDA
1329 spelling
1330 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1331 else if (token->flags & NAMED_OP)
1332 goto spell_ident;
041c3194 1333 else
96be6998 1334 spelling = TOKEN_NAME (token);
041c3194
ZW
1335
1336 while ((c = *spelling++) != '\0')
1337 *buffer++ = c;
1338 }
1339 break;
d6d5f795 1340
47ad4138
ZW
1341 case SPELL_CHAR:
1342 *buffer++ = token->val.c;
1343 break;
1344
1345 spell_ident:
041c3194 1346 case SPELL_IDENT:
a28c5035
NB
1347 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1348 buffer += NODE_LEN (token->val.node);
041c3194 1349 break;
d6d5f795 1350
47ad4138
ZW
1351 case SPELL_NUMBER:
1352 memcpy (buffer, token->val.str.text, token->val.str.len);
1353 buffer += token->val.str.len;
1354 break;
1355
041c3194
ZW
1356 case SPELL_STRING:
1357 {
ba89d661
ZW
1358 int left, right, tag;
1359 switch (token->type)
1360 {
1361 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1362 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1363 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1364 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1365 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1366 default:
1367 cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1368 return buffer;
ba89d661
ZW
1369 }
1370 if (tag) *buffer++ = tag;
47ad4138 1371 *buffer++ = left;
bfb9dc7f
ZW
1372 memcpy (buffer, token->val.str.text, token->val.str.len);
1373 buffer += token->val.str.len;
47ad4138 1374 *buffer++ = right;
041c3194
ZW
1375 }
1376 break;
d6d5f795 1377
041c3194 1378 case SPELL_NONE:
1f978f5f 1379 cpp_ice (pfile, "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1380 break;
1381 }
d6d5f795 1382
041c3194
ZW
1383 return buffer;
1384}
d6d5f795 1385
93c80368
NB
1386/* Returns a token as a null-terminated string. The string is
1387 temporary, and automatically freed later. Useful for diagnostics. */
1388unsigned char *
1389cpp_token_as_text (pfile, token)
c5a04734 1390 cpp_reader *pfile;
041c3194 1391 const cpp_token *token;
c5a04734 1392{
93c80368 1393 unsigned int len = cpp_token_len (token);
ece54d54 1394 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1395
93c80368
NB
1396 end = cpp_spell_token (pfile, token, start);
1397 end[0] = '\0';
c5a04734 1398
93c80368
NB
1399 return start;
1400}
c5a04734 1401
93c80368
NB
1402/* Used by C front ends. Should really move to using cpp_token_as_text. */
1403const char *
1404cpp_type2name (type)
1405 enum cpp_ttype type;
1406{
1407 return (const char *) token_spellings[type].name;
1408}
c5a04734 1409
4ed5bcfb
NB
1410/* Writes the spelling of token to FP, without any preceding space.
1411 Separated from cpp_spell_token for efficiency - to avoid stdio
1412 double-buffering. */
93c80368
NB
1413void
1414cpp_output_token (token, fp)
1415 const cpp_token *token;
1416 FILE *fp;
1417{
93c80368 1418 switch (TOKEN_SPELL (token))
c5a04734 1419 {
93c80368
NB
1420 case SPELL_OPERATOR:
1421 {
1422 const unsigned char *spelling;
3b681e9d 1423 int c;
c5a04734 1424
93c80368 1425 if (token->flags & DIGRAPH)
37b8524c
JDA
1426 spelling
1427 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1428 else if (token->flags & NAMED_OP)
1429 goto spell_ident;
1430 else
1431 spelling = TOKEN_NAME (token);
041c3194 1432
3b681e9d
ZW
1433 c = *spelling;
1434 do
1435 putc (c, fp);
1436 while ((c = *++spelling) != '\0');
93c80368
NB
1437 }
1438 break;
041c3194 1439
47ad4138
ZW
1440 case SPELL_CHAR:
1441 putc (token->val.c, fp);
1442 break;
1443
93c80368
NB
1444 spell_ident:
1445 case SPELL_IDENT:
3b681e9d 1446 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1447 break;
041c3194 1448
47ad4138
ZW
1449 case SPELL_NUMBER:
1450 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1451 break;
1452
93c80368
NB
1453 case SPELL_STRING:
1454 {
1455 int left, right, tag;
1456 switch (token->type)
1457 {
1458 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1459 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1460 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1461 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1462 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1463 default:
1464 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1465 return;
93c80368
NB
1466 }
1467 if (tag) putc (tag, fp);
47ad4138 1468 putc (left, fp);
93c80368 1469 fwrite (token->val.str.text, 1, token->val.str.len, fp);
47ad4138 1470 putc (right, fp);
93c80368
NB
1471 }
1472 break;
c5a04734 1473
93c80368
NB
1474 case SPELL_NONE:
1475 /* An error, most probably. */
1476 break;
041c3194 1477 }
c5a04734
ZW
1478}
1479
93c80368
NB
1480/* Compare two tokens. */
1481int
1482_cpp_equiv_tokens (a, b)
1483 const cpp_token *a, *b;
c5a04734 1484{
93c80368
NB
1485 if (a->type == b->type && a->flags == b->flags)
1486 switch (TOKEN_SPELL (a))
1487 {
1488 default: /* Keep compiler happy. */
1489 case SPELL_OPERATOR:
1490 return 1;
1491 case SPELL_CHAR:
6c53ebff 1492 return a->val.c == b->val.c; /* Character. */
93c80368 1493 case SPELL_NONE:
56051c0a 1494 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1495 case SPELL_IDENT:
1496 return a->val.node == b->val.node;
47ad4138 1497 case SPELL_NUMBER:
93c80368
NB
1498 case SPELL_STRING:
1499 return (a->val.str.len == b->val.str.len
1500 && !memcmp (a->val.str.text, b->val.str.text,
1501 a->val.str.len));
1502 }
c5a04734 1503
041c3194
ZW
1504 return 0;
1505}
1506
93c80368
NB
1507/* Returns nonzero if a space should be inserted to avoid an
1508 accidental token paste for output. For simplicity, it is
1509 conservative, and occasionally advises a space where one is not
1510 needed, e.g. "." and ".2". */
041c3194 1511
93c80368
NB
1512int
1513cpp_avoid_paste (pfile, token1, token2)
c5a04734 1514 cpp_reader *pfile;
93c80368 1515 const cpp_token *token1, *token2;
c5a04734 1516{
93c80368
NB
1517 enum cpp_ttype a = token1->type, b = token2->type;
1518 cppchar_t c;
c5a04734 1519
93c80368
NB
1520 if (token1->flags & NAMED_OP)
1521 a = CPP_NAME;
1522 if (token2->flags & NAMED_OP)
1523 b = CPP_NAME;
c5a04734 1524
93c80368
NB
1525 c = EOF;
1526 if (token2->flags & DIGRAPH)
37b8524c 1527 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1528 else if (token_spellings[b].category == SPELL_OPERATOR)
1529 c = token_spellings[b].name[0];
c5a04734 1530
93c80368 1531 /* Quickly get everything that can paste with an '='. */
37b8524c 1532 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1533 return 1;
c5a04734 1534
93c80368 1535 switch (a)
c5a04734 1536 {
93c80368
NB
1537 case CPP_GREATER: return c == '>' || c == '?';
1538 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1539 case CPP_PLUS: return c == '+';
1540 case CPP_MINUS: return c == '-' || c == '>';
1541 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1542 case CPP_MOD: return c == ':' || c == '>';
1543 case CPP_AND: return c == '&';
1544 case CPP_OR: return c == '|';
1545 case CPP_COLON: return c == ':' || c == '>';
1546 case CPP_DEREF: return c == '*';
26ec42ee 1547 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1548 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1549 case CPP_NAME: return ((b == CPP_NUMBER
1550 && name_p (pfile, &token2->val.str))
1551 || b == CPP_NAME
1552 || b == CPP_CHAR || b == CPP_STRING); /* L */
1553 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1554 || c == '.' || c == '+' || c == '-');
1555 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1556 && token1->val.c == '@'
93c80368
NB
1557 && (b == CPP_NAME || b == CPP_STRING));
1558 default: break;
c5a04734 1559 }
c5a04734 1560
417f3e3a 1561 return 0;
c5a04734
ZW
1562}
1563
93c80368 1564/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1565 character, to FP. Leading whitespace is removed. If there are
1566 macros, special token padding is not performed. */
c5a04734 1567void
93c80368 1568cpp_output_line (pfile, fp)
c5a04734 1569 cpp_reader *pfile;
93c80368 1570 FILE *fp;
c5a04734 1571{
4ed5bcfb 1572 const cpp_token *token;
96be6998 1573
4ed5bcfb
NB
1574 token = cpp_get_token (pfile);
1575 while (token->type != CPP_EOF)
96be6998 1576 {
4ed5bcfb
NB
1577 cpp_output_token (token, fp);
1578 token = cpp_get_token (pfile);
1579 if (token->flags & PREV_WHITE)
1580 putc (' ', fp);
96be6998
ZW
1581 }
1582
93c80368 1583 putc ('\n', fp);
041c3194 1584}
c5a04734 1585
c8a96070
NB
1586/* Returns the value of a hexadecimal digit. */
1587static unsigned int
1588hex_digit_value (c)
1589 unsigned int c;
1590{
9e1ac915
KG
1591 if (hex_p (c))
1592 return hex_value (c);
1593 else
1594 abort ();
c8a96070
NB
1595}
1596
62729350
NB
1597/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1598 failure if cpplib is not parsing C++ or C99. Such failure is
1599 silent, and no variables are updated. Otherwise returns 0, and
1600 warns if -Wtraditional.
c8a96070
NB
1601
1602 [lex.charset]: The character designated by the universal character
1603 name \UNNNNNNNN is that character whose character short name in
1604 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1605 universal character name \uNNNN is that character whose character
1606 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1607 for a universal character name is less than 0x20 or in the range
1608 0x7F-0x9F (inclusive), or if the universal character name
1609 designates a character in the basic source character set, then the
1610 program is ill-formed.
1611
1612 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1613 mapping. Is this ever wrong?
c8a96070 1614
62729350
NB
1615 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1616 LIMIT is the end of the string or charconst. PSTR is updated to
1617 point after the UCS on return, and the UCS is written into PC. */
1618
1619static int
1620maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1621 cpp_reader *pfile;
1622 const unsigned char **pstr;
1623 const unsigned char *limit;
62729350 1624 unsigned int *pc;
c8a96070
NB
1625{
1626 const unsigned char *p = *pstr;
62729350
NB
1627 unsigned int code = 0;
1628 unsigned int c = *pc, length;
1629
1630 /* Only attempt to interpret a UCS for C++ and C99. */
1631 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1632 return 1;
c8a96070 1633
62729350
NB
1634 if (CPP_WTRADITIONAL (pfile))
1635 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
c8a96070 1636
f8710242
NB
1637 length = (c == 'u' ? 4: 8);
1638
1639 if ((size_t) (limit - p) < length)
1640 {
1641 cpp_error (pfile, "incomplete universal-character-name");
1642 /* Skip to the end to avoid more diagnostics. */
1643 p = limit;
1644 }
1645 else
1646 {
1647 for (; length; length--, p++)
c8a96070 1648 {
f8710242
NB
1649 c = *p;
1650 if (ISXDIGIT (c))
1651 code = (code << 4) + hex_digit_value (c);
1652 else
1653 {
1654 cpp_error (pfile,
1655 "non-hex digit '%c' in universal-character-name", c);
1656 /* We shouldn't skip in case there are multibyte chars. */
1657 break;
1658 }
c8a96070 1659 }
c8a96070
NB
1660 }
1661
1662#ifdef TARGET_EBCDIC
1663 cpp_error (pfile, "universal-character-name on EBCDIC target");
1664 code = 0x3f; /* EBCDIC invalid character */
1665#else
f8710242
NB
1666 /* True extended characters are OK. */
1667 if (code >= 0xa0
1668 && !(code & 0x80000000)
1669 && !(code >= 0xD800 && code <= 0xDFFF))
1670 ;
1671 /* The standard permits $, @ and ` to be specified as UCNs. We use
1672 hex escapes so that this also works with EBCDIC hosts. */
1673 else if (code == 0x24 || code == 0x40 || code == 0x60)
1674 ;
1675 /* Don't give another error if one occurred above. */
1676 else if (length == 0)
1677 cpp_error (pfile, "universal-character-name out of range");
c8a96070
NB
1678#endif
1679
1680 *pstr = p;
62729350
NB
1681 *pc = code;
1682 return 0;
c8a96070
NB
1683}
1684
1685/* Interpret an escape sequence, and return its value. PSTR points to
1686 the input pointer, which is just after the backslash. LIMIT is how
62729350
NB
1687 much text we have. MASK is a bitmask for the precision for the
1688 destination type (char or wchar_t). TRADITIONAL, if true, does not
1689 interpret escapes that did not exist in traditional C.
c8a96070 1690
62729350
NB
1691 Handles all relevant diagnostics. */
1692
1693unsigned int
1694cpp_parse_escape (pfile, pstr, limit, mask, traditional)
c8a96070
NB
1695 cpp_reader *pfile;
1696 const unsigned char **pstr;
1697 const unsigned char *limit;
62729350 1698 unsigned HOST_WIDE_INT mask;
c8a96070
NB
1699 int traditional;
1700{
1701 int unknown = 0;
1702 const unsigned char *str = *pstr;
1703 unsigned int c = *str++;
1704
1705 switch (c)
1706 {
1707 case '\\': case '\'': case '"': case '?': break;
1708 case 'b': c = TARGET_BS; break;
1709 case 'f': c = TARGET_FF; break;
1710 case 'n': c = TARGET_NEWLINE; break;
1711 case 'r': c = TARGET_CR; break;
1712 case 't': c = TARGET_TAB; break;
1713 case 'v': c = TARGET_VT; break;
1714
1715 case '(': case '{': case '[': case '%':
1716 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1717 '\%' is used to prevent SCCS from getting confused. */
1718 unknown = CPP_PEDANTIC (pfile);
1719 break;
1720
1721 case 'a':
1722 if (CPP_WTRADITIONAL (pfile))
1723 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1724 if (!traditional)
1725 c = TARGET_BELL;
1726 break;
1727
1728 case 'e': case 'E':
1729 if (CPP_PEDANTIC (pfile))
1730 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1731 c = TARGET_ESC;
1732 break;
1733
c8a96070 1734 case 'u': case 'U':
62729350 1735 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1736 break;
1737
1738 case 'x':
1739 if (CPP_WTRADITIONAL (pfile))
1740 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1741
1742 if (!traditional)
1743 {
1744 unsigned int i = 0, overflow = 0;
1745 int digits_found = 0;
1746
1747 while (str < limit)
1748 {
1749 c = *str;
1750 if (! ISXDIGIT (c))
1751 break;
1752 str++;
1753 overflow |= i ^ (i << 4 >> 4);
1754 i = (i << 4) + hex_digit_value (c);
1755 digits_found = 1;
1756 }
1757
1758 if (!digits_found)
1759 cpp_error (pfile, "\\x used with no following hex digits");
1760
1761 if (overflow | (i != (i & mask)))
1762 {
1763 cpp_pedwarn (pfile, "hex escape sequence out of range");
1764 i &= mask;
1765 }
1766 c = i;
1767 }
1768 break;
1769
1770 case '0': case '1': case '2': case '3':
1771 case '4': case '5': case '6': case '7':
1772 {
1773 unsigned int i = c - '0';
1774 int count = 0;
1775
1776 while (str < limit && ++count < 3)
1777 {
1778 c = *str;
1779 if (c < '0' || c > '7')
1780 break;
1781 str++;
1782 i = (i << 3) + c - '0';
1783 }
1784
1785 if (i != (i & mask))
1786 {
1787 cpp_pedwarn (pfile, "octal escape sequence out of range");
1788 i &= mask;
1789 }
1790 c = i;
1791 }
1792 break;
1793
1794 default:
1795 unknown = 1;
1796 break;
1797 }
1798
1799 if (unknown)
1800 {
1801 if (ISGRAPH (c))
1802 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1803 else
1804 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1805 }
1806
62729350
NB
1807 if (c > mask)
1808 cpp_pedwarn (pfile, "escape sequence out of range for character");
1809
c8a96070
NB
1810 *pstr = str;
1811 return c;
1812}
1813
1814#ifndef MAX_CHAR_TYPE_SIZE
1815#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1816#endif
1817
1818#ifndef MAX_WCHAR_TYPE_SIZE
1819#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1820#endif
1821
1822/* Interpret a (possibly wide) character constant in TOKEN.
1823 WARN_MULTI warns about multi-character charconsts, if not
1824 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1825 that did not exist in traditional C. PCHARS_SEEN points to a
1826 variable that is filled in with the number of characters seen. */
1827HOST_WIDE_INT
1828cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1829 cpp_reader *pfile;
1830 const cpp_token *token;
1831 int warn_multi;
1832 int traditional;
1833 unsigned int *pchars_seen;
1834{
1835 const unsigned char *str = token->val.str.text;
1836 const unsigned char *limit = str + token->val.str.len;
1837 unsigned int chars_seen = 0;
1838 unsigned int width, max_chars, c;
2a967f3d
NB
1839 unsigned HOST_WIDE_INT mask;
1840 HOST_WIDE_INT result = 0;
c8a96070
NB
1841
1842#ifdef MULTIBYTE_CHARS
1843 (void) local_mbtowc (NULL, NULL, 0);
1844#endif
1845
1846 /* Width in bits. */
1847 if (token->type == CPP_CHAR)
1848 width = MAX_CHAR_TYPE_SIZE;
1849 else
1850 width = MAX_WCHAR_TYPE_SIZE;
1851
1852 if (width < HOST_BITS_PER_WIDE_INT)
1853 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1854 else
1855 mask = ~0;
1856 max_chars = HOST_BITS_PER_WIDE_INT / width;
1857
1858 while (str < limit)
1859 {
1860#ifdef MULTIBYTE_CHARS
1861 wchar_t wc;
1862 int char_len;
1863
1864 char_len = local_mbtowc (&wc, str, limit - str);
1865 if (char_len == -1)
1866 {
1867 cpp_warning (pfile, "ignoring invalid multibyte character");
1868 c = *str++;
1869 }
1870 else
1871 {
1872 str += char_len;
1873 c = wc;
1874 }
1875#else
1876 c = *str++;
1877#endif
1878
1879 if (c == '\\')
62729350 1880 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
c8a96070
NB
1881
1882#ifdef MAP_CHARACTER
1883 if (ISPRINT (c))
1884 c = MAP_CHARACTER (c);
1885#endif
1886
1887 /* Merge character into result; ignore excess chars. */
1888 if (++chars_seen <= max_chars)
1889 {
1890 if (width < HOST_BITS_PER_WIDE_INT)
1891 result = (result << width) | (c & mask);
1892 else
1893 result = c;
1894 }
1895 }
1896
1897 if (chars_seen == 0)
1898 cpp_error (pfile, "empty character constant");
1899 else if (chars_seen > max_chars)
1900 {
1901 chars_seen = max_chars;
f8710242 1902 cpp_warning (pfile, "character constant too long");
c8a96070
NB
1903 }
1904 else if (chars_seen > 1 && !traditional && warn_multi)
1905 cpp_warning (pfile, "multi-character character constant");
1906
1907 /* If char type is signed, sign-extend the constant. The
1908 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1909 if (token->type == CPP_CHAR && chars_seen)
1910 {
1911 unsigned int nbits = chars_seen * width;
c8a96070 1912
e1e7d56b 1913 mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
c8a96070
NB
1914 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1915 || ((result >> (nbits - 1)) & 1) == 0)
1916 result &= mask;
1917 else
1918 result |= ~mask;
1919 }
1920
1921 *pchars_seen = chars_seen;
1922 return result;
1923}
1924
1e013d2e
NB
1925/* Memory buffers. Changing these three constants can have a dramatic
1926 effect on performance. The values here are reasonable defaults,
1927 but might be tuned. If you adjust them, be sure to test across a
1928 range of uses of cpplib, including heavy nested function-like macro
1929 expansion. Also check the change in peak memory usage (NJAMD is a
1930 good tool for this). */
1931#define MIN_BUFF_SIZE 8000
87062813 1932#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1933#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1934 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1935
87062813
NB
1936#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1937 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1938#endif
1939
93c80368 1940struct dummy
417f3e3a 1941{
93c80368
NB
1942 char c;
1943 union
1944 {
1945 double d;
1946 int *p;
1947 } u;
1948};
417f3e3a 1949
93c80368 1950#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
b8af0ca5
NB
1951#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1952
c9e7a609
NB
1953/* Create a new allocation buffer. Place the control block at the end
1954 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5
NB
1955static _cpp_buff *
1956new_buff (len)
6142088c 1957 size_t len;
b8af0ca5
NB
1958{
1959 _cpp_buff *result;
ece54d54 1960 unsigned char *base;
b8af0ca5 1961
1e013d2e
NB
1962 if (len < MIN_BUFF_SIZE)
1963 len = MIN_BUFF_SIZE;
b8af0ca5
NB
1964 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1965
1966 base = xmalloc (len + sizeof (_cpp_buff));
1967 result = (_cpp_buff *) (base + len);
1968 result->base = base;
1969 result->cur = base;
1970 result->limit = base + len;
1971 result->next = NULL;
1972 return result;
1973}
1974
1975/* Place a chain of unwanted allocation buffers on the free list. */
1976void
1977_cpp_release_buff (pfile, buff)
1978 cpp_reader *pfile;
1979 _cpp_buff *buff;
1980{
1981 _cpp_buff *end = buff;
1982
1983 while (end->next)
1984 end = end->next;
1985 end->next = pfile->free_buffs;
1986 pfile->free_buffs = buff;
1987}
1988
1989/* Return a free buffer of size at least MIN_SIZE. */
1990_cpp_buff *
1991_cpp_get_buff (pfile, min_size)
1992 cpp_reader *pfile;
6142088c 1993 size_t min_size;
b8af0ca5
NB
1994{
1995 _cpp_buff *result, **p;
1996
1997 for (p = &pfile->free_buffs;; p = &(*p)->next)
1998 {
6142088c 1999 size_t size;
1e013d2e
NB
2000
2001 if (*p == NULL)
b8af0ca5 2002 return new_buff (min_size);
1e013d2e
NB
2003 result = *p;
2004 size = result->limit - result->base;
2005 /* Return a buffer that's big enough, but don't waste one that's
2006 way too big. */
34f5271d 2007 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
2008 break;
2009 }
2010
2011 *p = result->next;
2012 result->next = NULL;
2013 result->cur = result->base;
2014 return result;
2015}
2016
4fe9b91c 2017/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2018 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2019 the excess bytes to the new buffer. Chains the new buffer after
2020 BUFF, and returns the new buffer. */
b8af0ca5 2021_cpp_buff *
8c3b2693 2022_cpp_append_extend_buff (pfile, buff, min_extra)
b8af0ca5
NB
2023 cpp_reader *pfile;
2024 _cpp_buff *buff;
6142088c 2025 size_t min_extra;
b8af0ca5 2026{
6142088c 2027 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 2028 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 2029
8c3b2693
NB
2030 buff->next = new_buff;
2031 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2032 return new_buff;
2033}
2034
4fe9b91c 2035/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2036 remaining bytes of the buffer pointed to by BUFF, and at least
2037 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2038 Chains the new buffer before the buffer pointed to by BUFF, and
2039 updates the pointer to point to the new buffer. */
2040void
2041_cpp_extend_buff (pfile, pbuff, min_extra)
2042 cpp_reader *pfile;
2043 _cpp_buff **pbuff;
2044 size_t min_extra;
2045{
2046 _cpp_buff *new_buff, *old_buff = *pbuff;
2047 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2048
2049 new_buff = _cpp_get_buff (pfile, size);
2050 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2051 new_buff->next = old_buff;
2052 *pbuff = new_buff;
b8af0ca5
NB
2053}
2054
2055/* Free a chain of buffers starting at BUFF. */
2056void
2057_cpp_free_buff (buff)
2058 _cpp_buff *buff;
2059{
2060 _cpp_buff *next;
2061
2062 for (; buff; buff = next)
2063 {
2064 next = buff->next;
2065 free (buff->base);
2066 }
2067}
417f3e3a 2068
ece54d54
NB
2069/* Allocate permanent, unaligned storage of length LEN. */
2070unsigned char *
2071_cpp_unaligned_alloc (pfile, len)
2072 cpp_reader *pfile;
2073 size_t len;
2074{
2075 _cpp_buff *buff = pfile->u_buff;
2076 unsigned char *result = buff->cur;
2077
2078 if (len > (size_t) (buff->limit - result))
2079 {
2080 buff = _cpp_get_buff (pfile, len);
2081 buff->next = pfile->u_buff;
2082 pfile->u_buff = buff;
2083 result = buff->cur;
2084 }
2085
2086 buff->cur = result + len;
2087 return result;
2088}
2089
87062813
NB
2090/* Allocate permanent, unaligned storage of length LEN from a_buff.
2091 That buffer is used for growing allocations when saving macro
2092 replacement lists in a #define, and when parsing an answer to an
2093 assertion in #assert, #unassert or #if (and therefore possibly
2094 whilst expanding macros). It therefore must not be used by any
2095 code that they might call: specifically the lexer and the guts of
2096 the macro expander.
2097
2098 All existing other uses clearly fit this restriction: storing
2099 registered pragmas during initialization. */
93c80368 2100unsigned char *
8c3b2693
NB
2101_cpp_aligned_alloc (pfile, len)
2102 cpp_reader *pfile;
2103 size_t len;
3fef5b2b 2104{
8c3b2693
NB
2105 _cpp_buff *buff = pfile->a_buff;
2106 unsigned char *result = buff->cur;
3fef5b2b 2107
8c3b2693 2108 if (len > (size_t) (buff->limit - result))
3fef5b2b 2109 {
8c3b2693
NB
2110 buff = _cpp_get_buff (pfile, len);
2111 buff->next = pfile->a_buff;
2112 pfile->a_buff = buff;
2113 result = buff->cur;
3fef5b2b 2114 }
041c3194 2115
8c3b2693 2116 buff->cur = result + len;
93c80368 2117 return result;
041c3194 2118}
This page took 0.693542 seconds and 5 git commands to generate.