]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
ia64.c (ia64_expand_load_address): Ensure correct mode for symbol address.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
5d8ebbd8 2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
45b966db
ZW
25#include "cpplib.h"
26#include "cpphash.h"
27
c8a96070
NB
28#ifdef MULTIBYTE_CHARS
29#include "mbchar.h"
30#include <locale.h>
31#endif
32
93c80368
NB
33/* Tokens with SPELL_STRING store their spelling in the token list,
34 and it's length in the token->val.name.len. */
35enum spell_type
f9a0e96c 36{
93c80368
NB
37 SPELL_OPERATOR = 0,
38 SPELL_CHAR,
39 SPELL_IDENT,
47ad4138 40 SPELL_NUMBER,
93c80368
NB
41 SPELL_STRING,
42 SPELL_NONE
f9a0e96c
ZW
43};
44
93c80368 45struct token_spelling
f9a0e96c 46{
93c80368
NB
47 enum spell_type category;
48 const unsigned char *name;
f9a0e96c
ZW
49};
50
8206c799
ZW
51static const unsigned char *const digraph_spellings[] =
52{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
53
54#define OP(e, s) { SPELL_OPERATOR, U s },
55#define TK(e, s) { s, U STRINGX (e) },
8206c799 56static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
57#undef OP
58#undef TK
59
60#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
61#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
480709cc 62#define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
f2d5f0cc 63
87062813
NB
64static void handle_newline PARAMS ((cpp_reader *));
65static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
29401c30 66static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 67
041c3194 68static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 69static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d 70static void adjust_column PARAMS ((cpp_reader *));
4d6baafa 71static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6 72static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
562a5c27 73static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
df383483 74 unsigned int *));
10cf9bde 75static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
562a5c27 76static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
0d9f234d 77static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
87062813 78static bool trigraph_p PARAMS ((cpp_reader *));
562a5c27 79static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
477cdac7 80 cppchar_t));
004cb263 81static bool continue_after_nul PARAMS ((cpp_reader *));
93c80368 82static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350 83static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
625458d0 84 const unsigned char *, cppchar_t *));
5fddcffc 85static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 86
c8a96070 87static unsigned int hex_digit_value PARAMS ((unsigned int));
6142088c 88static _cpp_buff *new_buff PARAMS ((size_t));
15dad1d9 89
041c3194 90/* Utility routine:
9e62c811 91
bfb9dc7f
ZW
92 Compares, the token TOKEN to the NUL-terminated string STRING.
93 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 94int
bfb9dc7f
ZW
95cpp_ideq (token, string)
96 const cpp_token *token;
041c3194
ZW
97 const char *string;
98{
bfb9dc7f 99 if (token->type != CPP_NAME)
041c3194 100 return 0;
bfb9dc7f 101
562a5c27 102 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
15dad1d9 103}
1368ee70 104
87062813
NB
105/* Call when meeting a newline, assumed to be in buffer->cur[-1].
106 Returns with buffer->cur pointing to the character immediately
107 following the newline (combination). */
108static void
109handle_newline (pfile)
1444f2ed 110 cpp_reader *pfile;
0d9f234d 111{
87062813 112 cpp_buffer *buffer = pfile->buffer;
0d9f234d 113
87062813 114 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
8d9afc4e 115 only accept CR-LF; maybe we should fall back to that behavior? */
4d6baafa 116 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
87062813 117 buffer->cur++;
0d9f234d 118
87062813
NB
119 buffer->line_base = buffer->cur;
120 buffer->col_adjust = 0;
121 pfile->line++;
0d9f234d
NB
122}
123
87062813
NB
124/* Subroutine of skip_escaped_newlines; called when a 3-character
125 sequence beginning with "??" is encountered. buffer->cur points to
126 the second '?'.
127
128 Warn if necessary, and returns true if the sequence forms a
8d9afc4e 129 trigraph and the trigraph should be honored. */
87062813
NB
130static bool
131trigraph_p (pfile)
45b966db 132 cpp_reader *pfile;
45b966db 133{
87062813
NB
134 cpp_buffer *buffer = pfile->buffer;
135 cppchar_t from_char = buffer->cur[1];
136 bool accept;
137
138 if (!_cpp_trigraph_map[from_char])
139 return false;
140
141 accept = CPP_OPTION (pfile, trigraphs);
142
cbcff6df
NB
143 /* Don't warn about trigraphs in comments. */
144 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 145 {
041c3194 146 if (accept)
ebef4e8c
NB
147 cpp_error_with_line (pfile, DL_WARNING,
148 pfile->line, CPP_BUF_COL (buffer) - 1,
149 "trigraph ??%c converted to %c",
150 (int) from_char,
151 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
152 else if (buffer->cur != buffer->last_Wtrigraphs)
153 {
154 buffer->last_Wtrigraphs = buffer->cur;
ebef4e8c
NB
155 cpp_error_with_line (pfile, DL_WARNING,
156 pfile->line, CPP_BUF_COL (buffer) - 1,
157 "trigraph ??%c ignored", (int) from_char);
4a5b68a2 158 }
45b966db 159 }
0d9f234d 160
041c3194 161 return accept;
45b966db
ZW
162}
163
87062813 164/* Skips any escaped newlines introduced by '?' or a '\\', assumed to
480709cc
NB
165 lie in buffer->cur[-1]. Returns the next byte, which will be in
166 buffer->cur[-1]. This routine performs preprocessing stages 1 and
167 2 of the ISO C standard. */
0d9f234d 168static cppchar_t
87062813 169skip_escaped_newlines (pfile)
29401c30 170 cpp_reader *pfile;
45b966db 171{
29401c30 172 cpp_buffer *buffer = pfile->buffer;
87062813 173 cppchar_t next = buffer->cur[-1];
29401c30 174
a5c3cccd
NB
175 /* Only do this if we apply stages 1 and 2. */
176 if (!buffer->from_stage3)
041c3194 177 {
a5c3cccd 178 const unsigned char *saved_cur;
87062813 179 cppchar_t next1;
a5c3cccd
NB
180
181 do
0d9f234d 182 {
a5c3cccd 183 if (next == '?')
0d9f234d 184 {
4d6baafa 185 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
87062813 186 break;
a5c3cccd 187
87062813
NB
188 /* Translate the trigraph. */
189 next = _cpp_trigraph_map[buffer->cur[1]];
190 buffer->cur += 2;
4d6baafa 191 if (next != '\\')
a5c3cccd 192 break;
a5c3cccd
NB
193 }
194
4d6baafa
NB
195 if (buffer->cur == buffer->rlimit)
196 break;
197
87062813
NB
198 /* We have a backslash, and room for at least one more
199 character. Skip horizontal whitespace. */
200 saved_cur = buffer->cur;
a5c3cccd 201 do
87062813
NB
202 next1 = *buffer->cur++;
203 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
041c3194 204
a5c3cccd 205 if (!is_vspace (next1))
0d9f234d 206 {
87062813 207 buffer->cur = saved_cur;
0d9f234d
NB
208 break;
209 }
45b966db 210
87062813
NB
211 if (saved_cur != buffer->cur - 1
212 && !pfile->state.lexing_comment)
ebef4e8c
NB
213 cpp_error (pfile, DL_WARNING,
214 "backslash and newline separated by space");
0d9f234d 215
87062813 216 handle_newline (pfile);
480709cc 217 buffer->backup_to = buffer->cur;
87062813
NB
218 if (buffer->cur == buffer->rlimit)
219 {
ebef4e8c
NB
220 cpp_error (pfile, DL_PEDWARN,
221 "backslash-newline at end of file");
87062813
NB
222 next = EOF;
223 }
224 else
225 next = *buffer->cur++;
0d9f234d 226 }
a5c3cccd 227 while (next == '\\' || next == '?');
041c3194 228 }
45b966db 229
0d9f234d 230 return next;
45b966db
ZW
231}
232
0d9f234d 233/* Obtain the next character, after trigraph conversion and skipping
87062813
NB
234 an arbitrarily long string of escaped newlines. The common case of
235 no trigraphs or escaped newlines falls through quickly. On return,
480709cc
NB
236 buffer->backup_to points to where to return to if the character is
237 not to be processed. */
0d9f234d 238static cppchar_t
29401c30
NB
239get_effective_char (pfile)
240 cpp_reader *pfile;
64aaf407 241{
4d6baafa 242 cppchar_t next;
480709cc 243 cpp_buffer *buffer = pfile->buffer;
0d9f234d 244
480709cc 245 buffer->backup_to = buffer->cur;
4d6baafa
NB
246 next = *buffer->cur++;
247 if (__builtin_expect (next == '?' || next == '\\', 0))
248 next = skip_escaped_newlines (pfile);
0d9f234d 249
df383483 250 return next;
64aaf407
NB
251}
252
0d9f234d
NB
253/* Skip a C-style block comment. We find the end of the comment by
254 seeing if an asterisk is before every '/' we encounter. Returns
da7d8304 255 nonzero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
256static int
257skip_block_comment (pfile)
45b966db
ZW
258 cpp_reader *pfile;
259{
041c3194 260 cpp_buffer *buffer = pfile->buffer;
d8090680 261 cppchar_t c = EOF, prevc = EOF;
0d9f234d 262
cbcff6df 263 pfile->state.lexing_comment = 1;
0d9f234d 264 while (buffer->cur != buffer->rlimit)
45b966db 265 {
0d9f234d
NB
266 prevc = c, c = *buffer->cur++;
267
0d9f234d 268 /* FIXME: For speed, create a new character class of characters
93c80368 269 of interest inside block comments. */
0d9f234d 270 if (c == '?' || c == '\\')
87062813 271 c = skip_escaped_newlines (pfile);
041c3194 272
0d9f234d
NB
273 /* People like decorating comments with '*', so check for '/'
274 instead for efficiency. */
041c3194 275 if (c == '/')
45b966db 276 {
0d9f234d
NB
277 if (prevc == '*')
278 break;
041c3194 279
0d9f234d 280 /* Warn about potential nested comments, but not if the '/'
a1f300c0 281 comes immediately before the true comment delimiter.
041c3194 282 Don't bother to get it right across escaped newlines. */
0d9f234d 283 if (CPP_OPTION (pfile, warn_comments)
87062813 284 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
ebef4e8c
NB
285 cpp_error_with_line (pfile, DL_WARNING,
286 pfile->line, CPP_BUF_COL (buffer),
287 "\"/*\" within comment");
45b966db 288 }
91fcd158 289 else if (is_vspace (c))
87062813 290 handle_newline (pfile);
52fadca8 291 else if (c == '\t')
0d9f234d 292 adjust_column (pfile);
45b966db 293 }
041c3194 294
cbcff6df 295 pfile->state.lexing_comment = 0;
0d9f234d 296 return c != '/' || prevc != '*';
45b966db
ZW
297}
298
480709cc 299/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 300 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 301 if a multiline comment. */
041c3194 302static int
cbcff6df
NB
303skip_line_comment (pfile)
304 cpp_reader *pfile;
45b966db 305{
cbcff6df 306 cpp_buffer *buffer = pfile->buffer;
67821e3a 307 unsigned int orig_line = pfile->line;
0d9f234d 308 cppchar_t c;
64cdc383
MH
309#ifdef MULTIBYTE_CHARS
310 wchar_t wc;
311 int char_len;
312#endif
041c3194 313
cbcff6df 314 pfile->state.lexing_comment = 1;
64cdc383
MH
315#ifdef MULTIBYTE_CHARS
316 /* Reset multibyte conversion state. */
317 (void) local_mbtowc (NULL, NULL, 0);
318#endif
0d9f234d 319 do
041c3194 320 {
0d9f234d 321 if (buffer->cur == buffer->rlimit)
480709cc 322 goto at_eof;
041c3194 323
64cdc383
MH
324#ifdef MULTIBYTE_CHARS
325 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
326 buffer->rlimit - buffer->cur);
327 if (char_len == -1)
328 {
329 cpp_error (pfile, DL_WARNING,
330 "ignoring invalid multibyte character");
331 char_len = 1;
332 c = *buffer->cur++;
333 }
334 else
335 {
336 buffer->cur += char_len;
337 c = wc;
338 }
339#else
0d9f234d 340 c = *buffer->cur++;
64cdc383 341#endif
0d9f234d 342 if (c == '?' || c == '\\')
87062813 343 c = skip_escaped_newlines (pfile);
041c3194 344 }
0d9f234d 345 while (!is_vspace (c));
45b966db 346
480709cc
NB
347 /* Step back over the newline, except at EOF. */
348 buffer->cur--;
349 at_eof:
350
cbcff6df 351 pfile->state.lexing_comment = 0;
67821e3a 352 return orig_line != pfile->line;
041c3194 353}
45b966db 354
0d9f234d
NB
355/* pfile->buffer->cur is one beyond the \t character. Update
356 col_adjust so we track the column correctly. */
52fadca8 357static void
0d9f234d 358adjust_column (pfile)
52fadca8 359 cpp_reader *pfile;
52fadca8 360{
0d9f234d
NB
361 cpp_buffer *buffer = pfile->buffer;
362 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
363
364 /* Round it up to multiple of the tabstop, but subtract 1 since the
365 tab itself occupies a character position. */
0d9f234d
NB
366 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
367 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
368}
369
0d9f234d
NB
370/* Skips whitespace, saving the next non-whitespace character.
371 Adjusts pfile->col_adjust to account for tabs. Without this,
372 tokens might be assigned an incorrect column. */
4d6baafa 373static int
0d9f234d 374skip_whitespace (pfile, c)
041c3194 375 cpp_reader *pfile;
0d9f234d 376 cppchar_t c;
041c3194
ZW
377{
378 cpp_buffer *buffer = pfile->buffer;
0d9f234d 379 unsigned int warned = 0;
45b966db 380
0d9f234d 381 do
041c3194 382 {
91fcd158
NB
383 /* Horizontal space always OK. */
384 if (c == ' ')
0d9f234d 385 ;
91fcd158 386 else if (c == '\t')
0d9f234d
NB
387 adjust_column (pfile);
388 /* Just \f \v or \0 left. */
91fcd158 389 else if (c == '\0')
041c3194 390 {
4d6baafa
NB
391 if (buffer->cur - 1 == buffer->rlimit)
392 return 0;
91fcd158 393 if (!warned)
0d9f234d 394 {
ebef4e8c 395 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
0d9f234d
NB
396 warned = 1;
397 }
45b966db 398 }
93c80368 399 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
ebef4e8c
NB
400 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
401 CPP_BUF_COL (buffer),
402 "%s in preprocessing directive",
403 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 404
0d9f234d 405 c = *buffer->cur++;
45b966db 406 }
ec5c56db 407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
408 while (is_nvspace (c));
409
480709cc 410 buffer->cur--;
4d6baafa 411 return 1;
041c3194 412}
45b966db 413
93c80368
NB
414/* See if the characters of a number token are valid in a name (no
415 '.', '+' or '-'). */
416static int
417name_p (pfile, string)
418 cpp_reader *pfile;
419 const cpp_string *string;
420{
421 unsigned int i;
422
423 for (i = 0; i < string->len; i++)
424 if (!is_idchar (string->text[i]))
425 return 0;
426
df383483 427 return 1;
93c80368
NB
428}
429
2c3fcba6
ZW
430/* Parse an identifier, skipping embedded backslash-newlines. This is
431 a critical inner loop. The common case is an identifier which has
432 not been split by backslash-newline, does not contain a dollar
433 sign, and has already been scanned (roughly 10:1 ratio of
434 seen:unseen identifiers in normal code; the distribution is
435 Poisson-like). Second most common case is a new identifier, not
436 split and no dollar sign. The other possibilities are rare and
10cf9bde 437 have been relegated to parse_slow. */
0d9f234d 438static cpp_hashnode *
2c3fcba6 439parse_identifier (pfile)
45b966db 440 cpp_reader *pfile;
45b966db 441{
93c80368 442 cpp_hashnode *result;
562a5c27 443 const uchar *cur, *base;
2c3fcba6
ZW
444
445 /* Fast-path loop. Skim over a normal identifier.
446 N.B. ISIDNUM does not include $. */
4d6baafa
NB
447 cur = pfile->buffer->cur;
448 while (ISIDNUM (*cur))
2c3fcba6 449 cur++;
2c3fcba6
ZW
450
451 /* Check for slow-path cases. */
4d6baafa 452 if (*cur == '?' || *cur == '\\' || *cur == '$')
10cf9bde
NB
453 {
454 unsigned int len;
455
456 base = parse_slow (pfile, cur, 0, &len);
457 result = (cpp_hashnode *)
458 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
459 }
2c3fcba6
ZW
460 else
461 {
10cf9bde
NB
462 base = pfile->buffer->cur - 1;
463 pfile->buffer->cur = cur;
2c3fcba6
ZW
464 result = (cpp_hashnode *)
465 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
2c3fcba6
ZW
466 }
467
468 /* Rarely, identifiers require diagnostics when lexed.
469 XXX Has to be forced out of the fast path. */
470 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
471 && !pfile->state.skipping, 0))
472 {
473 /* It is allowed to poison the same identifier twice. */
474 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
ebef4e8c 475 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
476 NODE_NAME (result));
477
478 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
479 replacement list of a variadic macro. */
480 if (result == pfile->spec_nodes.n__VA_ARGS__
481 && !pfile->state.va_args_ok)
ebef4e8c 482 cpp_error (pfile, DL_PEDWARN,
2c3fcba6
ZW
483 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
484 }
485
486 return result;
487}
488
10cf9bde
NB
489/* Slow path. This handles numbers and identifiers which have been
490 split, or contain dollar signs. The part of the token from
491 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
492 1 if it's a number, and 2 if it has a leading period. Returns a
493 pointer to the token's NUL-terminated spelling in permanent
494 storage, and sets PLEN to its length. */
562a5c27 495static uchar *
10cf9bde 496parse_slow (pfile, cur, number_p, plen)
2c3fcba6 497 cpp_reader *pfile;
562a5c27 498 const uchar *cur;
10cf9bde
NB
499 int number_p;
500 unsigned int *plen;
2c3fcba6 501{
0d9f234d 502 cpp_buffer *buffer = pfile->buffer;
562a5c27 503 const uchar *base = buffer->cur - 1;
2a967f3d 504 struct obstack *stack = &pfile->hash_table->stack;
10cf9bde
NB
505 unsigned int c, prevc, saw_dollar = 0;
506
507 /* Place any leading period. */
508 if (number_p == 2)
509 obstack_1grow (stack, '.');
2c3fcba6
ZW
510
511 /* Copy the part of the token which is known to be okay. */
512 obstack_grow (stack, base, cur - base);
041c3194 513
2c3fcba6
ZW
514 /* Now process the part which isn't. We are looking at one of
515 '$', '\\', or '?' on entry to this loop. */
10cf9bde 516 prevc = cur[-1];
2c3fcba6
ZW
517 c = *cur++;
518 buffer->cur = cur;
10cf9bde 519 for (;;)
041c3194 520 {
10cf9bde
NB
521 /* Potential escaped newline? */
522 buffer->backup_to = buffer->cur - 1;
523 if (c == '?' || c == '\\')
524 c = skip_escaped_newlines (pfile);
525
526 if (!is_idchar (c))
527 {
528 if (!number_p)
529 break;
530 if (c != '.' && !VALID_SIGN (c, prevc))
531 break;
532 }
533
534 /* Handle normal identifier characters in this loop. */
535 do
df383483 536 {
10cf9bde 537 prevc = c;
df383483 538 obstack_1grow (stack, c);
45b966db 539
df383483
KH
540 if (c == '$')
541 saw_dollar++;
ba89d661 542
df383483
KH
543 c = *buffer->cur++;
544 }
10cf9bde 545 while (is_idchar (c));
041c3194 546 }
0d9f234d 547
4d6baafa 548 /* Step back over the unwanted char. */
480709cc 549 BACKUP ();
93c80368 550
4fe9b91c 551 /* $ is not an identifier character in the standard, but is commonly
0d9f234d
NB
552 accepted as an extension. Don't warn about it in skipped
553 conditional blocks. */
cef0d199 554 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
ebef4e8c 555 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
0d9f234d 556
10cf9bde
NB
557 /* Identifiers and numbers are null-terminated. */
558 *plen = obstack_object_size (stack);
2a967f3d 559 obstack_1grow (stack, '\0');
10cf9bde 560 return obstack_finish (stack);
45b966db
ZW
561}
562
5d8ebbd8 563/* Parse a number, beginning with character C, skipping embedded
da7d8304 564 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
5d8ebbd8 565 before C. Place the result in NUMBER. */
45b966db 566static void
10cf9bde 567parse_number (pfile, number, leading_period)
45b966db 568 cpp_reader *pfile;
0d9f234d 569 cpp_string *number;
93c80368 570 int leading_period;
45b966db 571{
562a5c27 572 const uchar *cur;
45b966db 573
10cf9bde
NB
574 /* Fast-path loop. Skim over a normal number.
575 N.B. ISIDNUM does not include $. */
576 cur = pfile->buffer->cur;
577 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
578 cur++;
cbcff6df 579
10cf9bde
NB
580 /* Check for slow-path cases. */
581 if (*cur == '?' || *cur == '\\' || *cur == '$')
582 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
583 else
041c3194 584 {
562a5c27
NB
585 const uchar *base = pfile->buffer->cur - 1;
586 uchar *dest;
0d9f234d 587
10cf9bde
NB
588 number->len = cur - base + leading_period;
589 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
590 dest[number->len] = '\0';
591 number->text = dest;
45b966db 592
10cf9bde
NB
593 if (leading_period)
594 *dest++ = '.';
595 memcpy (dest, base, cur - base);
596 pfile->buffer->cur = cur;
45b966db 597 }
0d9f234d
NB
598}
599
93c80368
NB
600/* Subroutine of parse_string. */
601static int
602unescaped_terminator_p (pfile, dest)
603 cpp_reader *pfile;
604 const unsigned char *dest;
605{
606 const unsigned char *start, *temp;
607
608 /* In #include-style directives, terminators are not escapeable. */
609 if (pfile->state.angled_headers)
610 return 1;
611
ece54d54 612 start = BUFF_FRONT (pfile->u_buff);
93c80368
NB
613
614 /* An odd number of consecutive backslashes represents an escaped
615 terminator. */
616 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
617 ;
618
619 return ((dest - temp) & 1) == 0;
620}
621
0d9f234d 622/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
623 name. Handles embedded trigraphs and escaped newlines. The stored
624 string is guaranteed NUL-terminated, but it is not guaranteed that
625 this is the first NUL since embedded NULs are preserved.
45b966db 626
87062813
NB
627 When this function returns, buffer->cur points to the next
628 character to be processed. */
041c3194 629static void
0d9f234d 630parse_string (pfile, token, terminator)
45b966db 631 cpp_reader *pfile;
041c3194 632 cpp_token *token;
0d9f234d 633 cppchar_t terminator;
45b966db 634{
041c3194 635 cpp_buffer *buffer = pfile->buffer;
93c80368 636 unsigned char *dest, *limit;
0d9f234d 637 cppchar_t c;
d4e6133f 638 bool warned_nulls = false;
64cdc383
MH
639#ifdef MULTIBYTE_CHARS
640 wchar_t wc;
641 int char_len;
642#endif
0d9f234d 643
ece54d54
NB
644 dest = BUFF_FRONT (pfile->u_buff);
645 limit = BUFF_LIMIT (pfile->u_buff);
93c80368 646
64cdc383
MH
647#ifdef MULTIBYTE_CHARS
648 /* Reset multibyte conversion state. */
649 (void) local_mbtowc (NULL, NULL, 0);
650#endif
0d9f234d 651 for (;;)
45b966db 652 {
87062813 653 /* We need room for another char, possibly the terminating NUL. */
ece54d54
NB
654 if ((size_t) (limit - dest) < 1)
655 {
656 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 657 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
658 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
659 limit = BUFF_LIMIT (pfile->u_buff);
660 }
7868b4a2 661
64cdc383
MH
662#ifdef MULTIBYTE_CHARS
663 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
664 buffer->rlimit - buffer->cur);
665 if (char_len == -1)
666 {
667 cpp_error (pfile, DL_WARNING,
df383483 668 "ignoring invalid multibyte character");
64cdc383
MH
669 char_len = 1;
670 c = *buffer->cur++;
671 }
672 else
673 {
674 buffer->cur += char_len;
675 c = wc;
676 }
677#else
87062813 678 c = *buffer->cur++;
64cdc383
MH
679#endif
680
681 /* Handle trigraphs, escaped newlines etc. */
0d9f234d 682 if (c == '?' || c == '\\')
87062813 683 c = skip_escaped_newlines (pfile);
45b966db 684
87062813 685 if (c == terminator)
45b966db 686 {
87062813
NB
687 if (unescaped_terminator_p (pfile, dest))
688 break;
0d9f234d
NB
689 }
690 else if (is_vspace (c))
691 {
d4e6133f
NB
692 /* No string literal may extend over multiple lines. In
693 assembly language, suppress the error except for <>
694 includes. This is a kludge around not knowing where
695 comments are. */
696 unterminated:
697 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
ebef4e8c 698 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
625458d0 699 (int) terminator);
d4e6133f
NB
700 buffer->cur--;
701 break;
0d9f234d 702 }
4d6baafa 703 else if (c == '\0')
0d9f234d 704 {
4d6baafa 705 if (buffer->cur - 1 == buffer->rlimit)
d4e6133f 706 goto unterminated;
4d6baafa
NB
707 if (!warned_nulls)
708 {
709 warned_nulls = true;
ebef4e8c
NB
710 cpp_error (pfile, DL_WARNING,
711 "null character(s) preserved in literal");
4d6baafa 712 }
45b966db 713 }
64cdc383
MH
714#ifdef MULTIBYTE_CHARS
715 if (char_len > 1)
716 {
717 for ( ; char_len > 0; --char_len)
718 *dest++ = (*buffer->cur - char_len);
719 }
720 else
721#endif
722 *dest++ = c;
45b966db
ZW
723 }
724
7868b4a2 725 *dest = '\0';
45b966db 726
ece54d54
NB
727 token->val.str.text = BUFF_FRONT (pfile->u_buff);
728 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
729 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d 730}
041c3194 731
93c80368 732/* The stored comment includes the comment start and any terminator. */
9e62c811 733static void
477cdac7 734save_comment (pfile, token, from, type)
0d9f234d 735 cpp_reader *pfile;
041c3194
ZW
736 cpp_token *token;
737 const unsigned char *from;
477cdac7 738 cppchar_t type;
9e62c811 739{
041c3194 740 unsigned char *buffer;
477cdac7 741 unsigned int len, clen;
df383483 742
1c6d33ef 743 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 744
3542203b
NB
745 /* C++ comments probably (not definitely) have moved past a new
746 line, which we don't want to save in the comment. */
480709cc 747 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 748 len--;
477cdac7
JT
749
750 /* If we are currently in a directive, then we need to store all
751 C++ comments as C comments internally, and so we need to
752 allocate a little extra space in that case.
753
754 Note that the only time we encounter a directive here is
755 when we are saving comments in a "#define". */
756 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
757
758 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 759
041c3194 760 token->type = CPP_COMMENT;
477cdac7 761 token->val.str.len = clen;
0d9f234d 762 token->val.str.text = buffer;
45b966db 763
1c6d33ef
NB
764 buffer[0] = '/';
765 memcpy (buffer + 1, from, len - 1);
477cdac7 766
1eeeb6a4 767 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
768 if (pfile->state.in_directive && type == '/')
769 {
770 buffer[1] = '*';
771 buffer[clen - 2] = '*';
772 buffer[clen - 1] = '/';
773 }
0d9f234d 774}
45b966db 775
5fddcffc
NB
776/* Allocate COUNT tokens for RUN. */
777void
778_cpp_init_tokenrun (run, count)
779 tokenrun *run;
780 unsigned int count;
781{
782 run->base = xnewvec (cpp_token, count);
783 run->limit = run->base + count;
784 run->next = NULL;
785}
786
787/* Returns the next tokenrun, or creates one if there is none. */
788static tokenrun *
789next_tokenrun (run)
790 tokenrun *run;
791{
792 if (run->next == NULL)
793 {
794 run->next = xnew (tokenrun);
bdcbe496 795 run->next->prev = run;
5fddcffc
NB
796 _cpp_init_tokenrun (run->next, 250);
797 }
798
799 return run->next;
800}
801
4ed5bcfb
NB
802/* Allocate a single token that is invalidated at the same time as the
803 rest of the tokens on the line. Has its line and col set to the
804 same as the last lexed token, so that diagnostics appear in the
805 right place. */
806cpp_token *
807_cpp_temp_token (pfile)
808 cpp_reader *pfile;
809{
810 cpp_token *old, *result;
811
812 old = pfile->cur_token - 1;
813 if (pfile->cur_token == pfile->cur_run->limit)
814 {
815 pfile->cur_run = next_tokenrun (pfile->cur_run);
816 pfile->cur_token = pfile->cur_run->base;
817 }
818
819 result = pfile->cur_token++;
820 result->line = old->line;
821 result->col = old->col;
822 return result;
823}
824
14baae01
NB
825/* Lex a token into RESULT (external interface). Takes care of issues
826 like directive handling, token lookahead, multiple include
a1f300c0 827 optimization and skipping. */
345894b4
NB
828const cpp_token *
829_cpp_lex_token (pfile)
45b966db 830 cpp_reader *pfile;
5fddcffc 831{
bdcbe496 832 cpp_token *result;
5fddcffc 833
bdcbe496 834 for (;;)
5fddcffc 835 {
bdcbe496 836 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 837 {
bdcbe496
NB
838 pfile->cur_run = next_tokenrun (pfile->cur_run);
839 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
840 }
841
bdcbe496 842 if (pfile->lookaheads)
14baae01
NB
843 {
844 pfile->lookaheads--;
845 result = pfile->cur_token++;
846 }
bdcbe496 847 else
14baae01 848 result = _cpp_lex_direct (pfile);
bdcbe496
NB
849
850 if (result->flags & BOL)
5fddcffc 851 {
bdcbe496
NB
852 /* Is this a directive. If _cpp_handle_directive returns
853 false, it is an assembler #. */
854 if (result->type == CPP_HASH
e808ec9c
NB
855 /* 6.10.3 p 11: Directives in a list of macro arguments
856 gives undefined behavior. This implementation
857 handles the directive as normal. */
858 && pfile->state.parsing_args != 1
bdcbe496
NB
859 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
860 continue;
97293897
NB
861 if (pfile->cb.line_change && !pfile->state.skipping)
862 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 863 }
5fddcffc 864
bdcbe496
NB
865 /* We don't skip tokens in directives. */
866 if (pfile->state.in_directive)
867 break;
5fddcffc 868
bdcbe496 869 /* Outside a directive, invalidate controlling macros. At file
14baae01 870 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 871 get here and MI optimisation works. */
5fddcffc 872 pfile->mi_valid = false;
bdcbe496
NB
873
874 if (!pfile->state.skipping || result->type == CPP_EOF)
875 break;
5fddcffc
NB
876 }
877
345894b4 878 return result;
5fddcffc
NB
879}
880
004cb263
NB
881/* A NUL terminates the current buffer. For ISO preprocessing this is
882 EOF, but for traditional preprocessing it indicates we need a line
883 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
884 to return a CPP_EOF to the caller. */
885static bool
886continue_after_nul (pfile)
887 cpp_reader *pfile;
888{
889 cpp_buffer *buffer = pfile->buffer;
890 bool more = false;
df383483 891
004cb263
NB
892 buffer->saved_flags = BOL;
893 if (CPP_OPTION (pfile, traditional))
1a76916c
NB
894 {
895 if (pfile->state.in_directive)
896 return false;
897
898 _cpp_remove_overlay (pfile);
899 more = _cpp_read_logical_line_trad (pfile);
900 _cpp_overlay_buffer (pfile, pfile->out.base,
901 pfile->out.cur - pfile->out.base);
902 pfile->line = pfile->out.first_line;
903 }
004cb263
NB
904 else
905 {
906 /* Stop parsing arguments with a CPP_EOF. When we finally come
907 back here, do the work of popping the buffer. */
908 if (!pfile->state.parsing_args)
909 {
910 if (buffer->cur != buffer->line_base)
911 {
912 /* Non-empty files should end in a newline. Don't warn
913 for command line and _Pragma buffers. */
914 if (!buffer->from_stage3)
915 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
916 handle_newline (pfile);
917 }
918
919 /* Similarly, finish an in-progress directive with CPP_EOF
920 before popping the buffer. */
921 if (!pfile->state.in_directive && buffer->prev)
922 {
923 more = !buffer->return_at_eof;
924 _cpp_pop_buffer (pfile);
925 }
926 }
927 }
928
929 return more;
930}
931
480709cc
NB
932#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
933 do { \
934 if (get_effective_char (pfile) == CHAR) \
935 result->type = THEN_TYPE; \
936 else \
937 { \
938 BACKUP (); \
939 result->type = ELSE_TYPE; \
940 } \
941 } while (0)
942
14baae01
NB
943/* Lex a token into pfile->cur_token, which is also incremented, to
944 get diagnostics pointing to the correct location.
945
946 Does not handle issues such as token lookahead, multiple-include
947 optimisation, directives, skipping etc. This function is only
948 suitable for use by _cpp_lex_token, and in special cases like
949 lex_expansion_token which doesn't care for any of these issues.
950
951 When meeting a newline, returns CPP_EOF if parsing a directive,
952 otherwise returns to the start of the token buffer if permissible.
953 Returns the location of the lexed token. */
954cpp_token *
955_cpp_lex_direct (pfile)
5fddcffc 956 cpp_reader *pfile;
45b966db 957{
0d9f234d 958 cppchar_t c;
adb84b42 959 cpp_buffer *buffer;
0d9f234d 960 const unsigned char *comment_start;
14baae01 961 cpp_token *result = pfile->cur_token++;
9ec7291f 962
5fddcffc 963 fresh_line:
adb84b42 964 buffer = pfile->buffer;
bd969772
NB
965 result->flags = buffer->saved_flags;
966 buffer->saved_flags = 0;
5fddcffc 967 update_tokens_line:
1444f2ed 968 result->line = pfile->line;
041c3194 969
5fddcffc 970 skipped_white:
480709cc 971 c = *buffer->cur++;
5fddcffc 972 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
5fddcffc
NB
973
974 trigraph:
0d9f234d 975 switch (c)
45b966db 976 {
4d6baafa
NB
977 case ' ': case '\t': case '\f': case '\v': case '\0':
978 result->flags |= PREV_WHITE;
979 if (skip_whitespace (pfile, c))
980 goto skipped_white;
981
004cb263 982 /* End of buffer. */
4d6baafa 983 buffer->cur--;
004cb263
NB
984 if (continue_after_nul (pfile))
985 goto fresh_line;
0d9f234d 986 result->type = CPP_EOF;
5fddcffc 987 break;
45b966db 988
0d9f234d 989 case '\n': case '\r':
87062813 990 handle_newline (pfile);
bdcbe496
NB
991 buffer->saved_flags = BOL;
992 if (! pfile->state.in_directive)
45b966db 993 {
4ed5bcfb
NB
994 if (pfile->state.parsing_args == 2)
995 buffer->saved_flags |= PREV_WHITE;
bdcbe496
NB
996 if (!pfile->keep_tokens)
997 {
998 pfile->cur_run = &pfile->base_run;
999 result = pfile->base_run.base;
1000 pfile->cur_token = result + 1;
1001 }
1002 goto fresh_line;
45b966db 1003 }
5fddcffc
NB
1004 result->type = CPP_EOF;
1005 break;
46d07497 1006
0d9f234d
NB
1007 case '?':
1008 case '\\':
1009 /* These could start an escaped newline, or '?' a trigraph. Let
1010 skip_escaped_newlines do all the work. */
1011 {
67821e3a 1012 unsigned int line = pfile->line;
0d9f234d 1013
87062813 1014 c = skip_escaped_newlines (pfile);
67821e3a 1015 if (line != pfile->line)
87062813 1016 {
480709cc 1017 buffer->cur--;
87062813
NB
1018 /* We had at least one escaped newline of some sort.
1019 Update the token's line and column. */
5fddcffc 1020 goto update_tokens_line;
87062813 1021 }
480709cc 1022 }
0d9f234d 1023
480709cc
NB
1024 /* We are either the original '?' or '\\', or a trigraph. */
1025 if (c == '?')
0d9f234d 1026 result->type = CPP_QUERY;
480709cc
NB
1027 else if (c == '\\')
1028 goto random_char;
1029 else
1030 goto trigraph;
0d9f234d 1031 break;
46d07497 1032
0d9f234d
NB
1033 case '0': case '1': case '2': case '3': case '4':
1034 case '5': case '6': case '7': case '8': case '9':
1035 result->type = CPP_NUMBER;
10cf9bde 1036 parse_number (pfile, &result->val.str, 0);
0d9f234d 1037 break;
46d07497 1038
0abc6a6a
NB
1039 case 'L':
1040 /* 'L' may introduce wide characters or strings. */
df383483
KH
1041 {
1042 const unsigned char *pos = buffer->cur;
0d9f234d 1043
df383483
KH
1044 c = get_effective_char (pfile);
1045 if (c == '\'' || c == '"')
1046 {
1047 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1048 parse_string (pfile, result, c);
1049 break;
1050 }
1051 buffer->cur = pos;
1052 }
1053 /* Fall through. */
0abc6a6a
NB
1054
1055 start_ident:
0d9f234d
NB
1056 case '_':
1057 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1058 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1059 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1060 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1061 case 'y': case 'z':
1062 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 1063 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
1064 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1065 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1066 case 'Y': case 'Z':
1067 result->type = CPP_NAME;
2c3fcba6 1068 result->val.node = parse_identifier (pfile);
0d9f234d 1069
0d9f234d 1070 /* Convert named operators to their proper types. */
0abc6a6a 1071 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1072 {
1073 result->flags |= NAMED_OP;
93c80368 1074 result->type = result->val.node->value.operator;
0d9f234d
NB
1075 }
1076 break;
1077
1078 case '\'':
1079 case '"':
1080 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
0d9f234d
NB
1081 parse_string (pfile, result, c);
1082 break;
041c3194 1083
0d9f234d 1084 case '/':
1c6d33ef
NB
1085 /* A potential block or line comment. */
1086 comment_start = buffer->cur;
29401c30 1087 c = get_effective_char (pfile);
480709cc 1088
1c6d33ef
NB
1089 if (c == '*')
1090 {
0d9f234d 1091 if (skip_block_comment (pfile))
ebef4e8c 1092 cpp_error (pfile, DL_ERROR, "unterminated comment");
0d9f234d 1093 }
480709cc
NB
1094 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1095 || CPP_IN_SYSTEM_HEADER (pfile)))
0d9f234d 1096 {
bdb05a7b
NB
1097 /* Warn about comments only if pedantically GNUC89, and not
1098 in system headers. */
1099 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1100 && ! buffer->warned_cplusplus_comments)
041c3194 1101 {
ebef4e8c 1102 cpp_error (pfile, DL_PEDWARN,
56508306 1103 "C++ style comments are not allowed in ISO C90");
ebef4e8c
NB
1104 cpp_error (pfile, DL_PEDWARN,
1105 "(this will be reported only once per input file)");
1c6d33ef
NB
1106 buffer->warned_cplusplus_comments = 1;
1107 }
0d9f234d 1108
01ef6563 1109 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
ebef4e8c 1110 cpp_error (pfile, DL_WARNING, "multi-line comment");
1c6d33ef 1111 }
480709cc
NB
1112 else if (c == '=')
1113 {
1114 result->type = CPP_DIV_EQ;
1115 break;
1116 }
1117 else
1118 {
1119 BACKUP ();
1120 result->type = CPP_DIV;
1121 break;
1122 }
0d9f234d 1123
1c6d33ef
NB
1124 if (!pfile->state.save_comments)
1125 {
1126 result->flags |= PREV_WHITE;
5fddcffc 1127 goto update_tokens_line;
0d9f234d 1128 }
1c6d33ef
NB
1129
1130 /* Save the comment as a token in its own right. */
477cdac7 1131 save_comment (pfile, result, comment_start, c);
bdcbe496 1132 break;
0d9f234d
NB
1133
1134 case '<':
1135 if (pfile->state.angled_headers)
1136 {
1137 result->type = CPP_HEADER_NAME;
480709cc
NB
1138 parse_string (pfile, result, '>');
1139 break;
0d9f234d 1140 }
45b966db 1141
29401c30 1142 c = get_effective_char (pfile);
0d9f234d 1143 if (c == '=')
480709cc 1144 result->type = CPP_LESS_EQ;
0d9f234d 1145 else if (c == '<')
480709cc 1146 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 1147 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc 1148 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d
NB
1149 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1150 {
480709cc 1151 result->type = CPP_OPEN_SQUARE;
0d9f234d
NB
1152 result->flags |= DIGRAPH;
1153 }
1154 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1155 {
480709cc 1156 result->type = CPP_OPEN_BRACE;
0d9f234d
NB
1157 result->flags |= DIGRAPH;
1158 }
480709cc
NB
1159 else
1160 {
1161 BACKUP ();
1162 result->type = CPP_LESS;
1163 }
0d9f234d
NB
1164 break;
1165
1166 case '>':
29401c30 1167 c = get_effective_char (pfile);
0d9f234d 1168 if (c == '=')
480709cc 1169 result->type = CPP_GREATER_EQ;
0d9f234d 1170 else if (c == '>')
480709cc 1171 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
0d9f234d 1172 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc
NB
1173 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1174 else
0d9f234d 1175 {
480709cc
NB
1176 BACKUP ();
1177 result->type = CPP_GREATER;
0d9f234d
NB
1178 }
1179 break;
1180
cbcff6df 1181 case '%':
480709cc
NB
1182 c = get_effective_char (pfile);
1183 if (c == '=')
1184 result->type = CPP_MOD_EQ;
1185 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1186 {
1187 result->flags |= DIGRAPH;
1188 result->type = CPP_HASH;
1189 if (get_effective_char (pfile) == '%')
1190 {
1191 const unsigned char *pos = buffer->cur;
1192
1193 if (get_effective_char (pfile) == ':')
1194 result->type = CPP_PASTE;
1195 else
1196 buffer->cur = pos - 1;
1197 }
1198 else
1199 BACKUP ();
1200 }
1201 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1202 {
1203 result->flags |= DIGRAPH;
1204 result->type = CPP_CLOSE_BRACE;
1205 }
1206 else
1207 {
1208 BACKUP ();
1209 result->type = CPP_MOD;
1210 }
0d9f234d
NB
1211 break;
1212
cbcff6df 1213 case '.':
480709cc
NB
1214 result->type = CPP_DOT;
1215 c = get_effective_char (pfile);
1216 if (c == '.')
1217 {
1218 const unsigned char *pos = buffer->cur;
1219
1220 if (get_effective_char (pfile) == '.')
1221 result->type = CPP_ELLIPSIS;
1222 else
1223 buffer->cur = pos - 1;
1224 }
1225 /* All known character sets have 0...9 contiguous. */
0df6c2c7 1226 else if (ISDIGIT (c))
480709cc
NB
1227 {
1228 result->type = CPP_NUMBER;
10cf9bde 1229 parse_number (pfile, &result->val.str, 1);
480709cc
NB
1230 }
1231 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1232 result->type = CPP_DOT_STAR;
1233 else
1234 BACKUP ();
0d9f234d 1235 break;
45b966db 1236
0d9f234d 1237 case '+':
29401c30 1238 c = get_effective_char (pfile);
480709cc
NB
1239 if (c == '+')
1240 result->type = CPP_PLUS_PLUS;
1241 else if (c == '=')
1242 result->type = CPP_PLUS_EQ;
1243 else
1244 {
1245 BACKUP ();
1246 result->type = CPP_PLUS;
1247 }
0d9f234d 1248 break;
04e3ec78 1249
0d9f234d 1250 case '-':
29401c30 1251 c = get_effective_char (pfile);
0d9f234d
NB
1252 if (c == '>')
1253 {
480709cc
NB
1254 result->type = CPP_DEREF;
1255 if (CPP_OPTION (pfile, cplusplus))
1256 {
1257 if (get_effective_char (pfile) == '*')
1258 result->type = CPP_DEREF_STAR;
1259 else
1260 BACKUP ();
1261 }
0d9f234d 1262 }
0d9f234d 1263 else if (c == '-')
480709cc
NB
1264 result->type = CPP_MINUS_MINUS;
1265 else if (c == '=')
1266 result->type = CPP_MINUS_EQ;
1267 else
1268 {
1269 BACKUP ();
1270 result->type = CPP_MINUS;
1271 }
0d9f234d 1272 break;
45b966db 1273
0d9f234d 1274 case '&':
29401c30 1275 c = get_effective_char (pfile);
480709cc
NB
1276 if (c == '&')
1277 result->type = CPP_AND_AND;
1278 else if (c == '=')
1279 result->type = CPP_AND_EQ;
1280 else
1281 {
1282 BACKUP ();
1283 result->type = CPP_AND;
1284 }
0d9f234d 1285 break;
df383483 1286
0d9f234d 1287 case '|':
29401c30 1288 c = get_effective_char (pfile);
480709cc
NB
1289 if (c == '|')
1290 result->type = CPP_OR_OR;
1291 else if (c == '=')
1292 result->type = CPP_OR_EQ;
1293 else
1294 {
1295 BACKUP ();
1296 result->type = CPP_OR;
1297 }
0d9f234d 1298 break;
45b966db 1299
0d9f234d 1300 case ':':
29401c30 1301 c = get_effective_char (pfile);
0d9f234d 1302 if (c == ':' && CPP_OPTION (pfile, cplusplus))
480709cc 1303 result->type = CPP_SCOPE;
0d9f234d
NB
1304 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1305 {
1306 result->flags |= DIGRAPH;
480709cc
NB
1307 result->type = CPP_CLOSE_SQUARE;
1308 }
1309 else
1310 {
1311 BACKUP ();
1312 result->type = CPP_COLON;
0d9f234d
NB
1313 }
1314 break;
45b966db 1315
480709cc
NB
1316 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1317 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1318 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1319 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1320 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1321
0d9f234d
NB
1322 case '~': result->type = CPP_COMPL; break;
1323 case ',': result->type = CPP_COMMA; break;
1324 case '(': result->type = CPP_OPEN_PAREN; break;
1325 case ')': result->type = CPP_CLOSE_PAREN; break;
1326 case '[': result->type = CPP_OPEN_SQUARE; break;
1327 case ']': result->type = CPP_CLOSE_SQUARE; break;
1328 case '{': result->type = CPP_OPEN_BRACE; break;
1329 case '}': result->type = CPP_CLOSE_BRACE; break;
1330 case ';': result->type = CPP_SEMICOLON; break;
1331
cc937581
ZW
1332 /* @ is a punctuator in Objective C. */
1333 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1334
0abc6a6a
NB
1335 case '$':
1336 if (CPP_OPTION (pfile, dollars_in_ident))
1337 goto start_ident;
1338 /* Fall through... */
1339
0d9f234d
NB
1340 random_char:
1341 default:
1342 result->type = CPP_OTHER;
6c53ebff 1343 result->val.c = c;
0d9f234d
NB
1344 break;
1345 }
bdcbe496
NB
1346
1347 return result;
0d9f234d
NB
1348}
1349
5d8ebbd8 1350/* An upper bound on the number of bytes needed to spell TOKEN,
93c80368
NB
1351 including preceding whitespace. */
1352unsigned int
1353cpp_token_len (token)
1354 const cpp_token *token;
0d9f234d 1355{
93c80368 1356 unsigned int len;
6d2c2047 1357
93c80368 1358 switch (TOKEN_SPELL (token))
041c3194 1359 {
a28c5035 1360 default: len = 0; break;
47ad4138 1361 case SPELL_NUMBER:
a28c5035
NB
1362 case SPELL_STRING: len = token->val.str.len; break;
1363 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1364 }
47ad4138 1365 /* 1 for whitespace, 4 for comment delimiters. */
93c80368 1366 return len + 5;
6d2c2047
ZW
1367}
1368
041c3194 1369/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1370 already contain the enough space to hold the token's spelling.
1371 Returns a pointer to the character after the last character
1372 written. */
93c80368
NB
1373unsigned char *
1374cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1375 cpp_reader *pfile; /* Would be nice to be rid of this... */
1376 const cpp_token *token;
1377 unsigned char *buffer;
1378{
96be6998 1379 switch (TOKEN_SPELL (token))
041c3194
ZW
1380 {
1381 case SPELL_OPERATOR:
1382 {
1383 const unsigned char *spelling;
1384 unsigned char c;
d6d5f795 1385
041c3194 1386 if (token->flags & DIGRAPH)
37b8524c
JDA
1387 spelling
1388 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1389 else if (token->flags & NAMED_OP)
1390 goto spell_ident;
041c3194 1391 else
96be6998 1392 spelling = TOKEN_NAME (token);
df383483 1393
041c3194
ZW
1394 while ((c = *spelling++) != '\0')
1395 *buffer++ = c;
1396 }
1397 break;
d6d5f795 1398
47ad4138
ZW
1399 case SPELL_CHAR:
1400 *buffer++ = token->val.c;
1401 break;
1402
1403 spell_ident:
041c3194 1404 case SPELL_IDENT:
a28c5035
NB
1405 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1406 buffer += NODE_LEN (token->val.node);
041c3194 1407 break;
d6d5f795 1408
47ad4138
ZW
1409 case SPELL_NUMBER:
1410 memcpy (buffer, token->val.str.text, token->val.str.len);
1411 buffer += token->val.str.len;
1412 break;
1413
041c3194
ZW
1414 case SPELL_STRING:
1415 {
ba89d661
ZW
1416 int left, right, tag;
1417 switch (token->type)
1418 {
1419 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1420 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1421 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1422 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1423 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138 1424 default:
ebef4e8c
NB
1425 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1426 TOKEN_NAME (token));
47ad4138 1427 return buffer;
ba89d661
ZW
1428 }
1429 if (tag) *buffer++ = tag;
47ad4138 1430 *buffer++ = left;
bfb9dc7f
ZW
1431 memcpy (buffer, token->val.str.text, token->val.str.len);
1432 buffer += token->val.str.len;
47ad4138 1433 *buffer++ = right;
041c3194
ZW
1434 }
1435 break;
d6d5f795 1436
041c3194 1437 case SPELL_NONE:
ebef4e8c 1438 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1439 break;
1440 }
d6d5f795 1441
041c3194
ZW
1442 return buffer;
1443}
d6d5f795 1444
5d8ebbd8
NB
1445/* Returns TOKEN spelt as a null-terminated string. The string is
1446 freed when the reader is destroyed. Useful for diagnostics. */
93c80368
NB
1447unsigned char *
1448cpp_token_as_text (pfile, token)
c5a04734 1449 cpp_reader *pfile;
041c3194 1450 const cpp_token *token;
c5a04734 1451{
93c80368 1452 unsigned int len = cpp_token_len (token);
ece54d54 1453 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1454
93c80368
NB
1455 end = cpp_spell_token (pfile, token, start);
1456 end[0] = '\0';
c5a04734 1457
93c80368
NB
1458 return start;
1459}
c5a04734 1460
5d8ebbd8
NB
1461/* Used by C front ends, which really should move to using
1462 cpp_token_as_text. */
93c80368
NB
1463const char *
1464cpp_type2name (type)
1465 enum cpp_ttype type;
1466{
1467 return (const char *) token_spellings[type].name;
1468}
c5a04734 1469
4ed5bcfb
NB
1470/* Writes the spelling of token to FP, without any preceding space.
1471 Separated from cpp_spell_token for efficiency - to avoid stdio
1472 double-buffering. */
93c80368
NB
1473void
1474cpp_output_token (token, fp)
1475 const cpp_token *token;
1476 FILE *fp;
1477{
93c80368 1478 switch (TOKEN_SPELL (token))
c5a04734 1479 {
93c80368
NB
1480 case SPELL_OPERATOR:
1481 {
1482 const unsigned char *spelling;
3b681e9d 1483 int c;
c5a04734 1484
93c80368 1485 if (token->flags & DIGRAPH)
37b8524c
JDA
1486 spelling
1487 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1488 else if (token->flags & NAMED_OP)
1489 goto spell_ident;
1490 else
1491 spelling = TOKEN_NAME (token);
041c3194 1492
3b681e9d
ZW
1493 c = *spelling;
1494 do
1495 putc (c, fp);
1496 while ((c = *++spelling) != '\0');
93c80368
NB
1497 }
1498 break;
041c3194 1499
47ad4138
ZW
1500 case SPELL_CHAR:
1501 putc (token->val.c, fp);
1502 break;
1503
93c80368
NB
1504 spell_ident:
1505 case SPELL_IDENT:
3b681e9d 1506 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1507 break;
041c3194 1508
47ad4138
ZW
1509 case SPELL_NUMBER:
1510 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1511 break;
1512
93c80368
NB
1513 case SPELL_STRING:
1514 {
1515 int left, right, tag;
1516 switch (token->type)
1517 {
1518 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1519 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1520 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1521 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1522 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1523 default:
1524 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1525 return;
93c80368
NB
1526 }
1527 if (tag) putc (tag, fp);
47ad4138 1528 putc (left, fp);
93c80368 1529 fwrite (token->val.str.text, 1, token->val.str.len, fp);
47ad4138 1530 putc (right, fp);
93c80368
NB
1531 }
1532 break;
c5a04734 1533
93c80368
NB
1534 case SPELL_NONE:
1535 /* An error, most probably. */
1536 break;
041c3194 1537 }
c5a04734
ZW
1538}
1539
93c80368
NB
1540/* Compare two tokens. */
1541int
1542_cpp_equiv_tokens (a, b)
1543 const cpp_token *a, *b;
c5a04734 1544{
93c80368
NB
1545 if (a->type == b->type && a->flags == b->flags)
1546 switch (TOKEN_SPELL (a))
1547 {
1548 default: /* Keep compiler happy. */
1549 case SPELL_OPERATOR:
1550 return 1;
1551 case SPELL_CHAR:
6c53ebff 1552 return a->val.c == b->val.c; /* Character. */
93c80368 1553 case SPELL_NONE:
56051c0a 1554 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1555 case SPELL_IDENT:
1556 return a->val.node == b->val.node;
47ad4138 1557 case SPELL_NUMBER:
93c80368
NB
1558 case SPELL_STRING:
1559 return (a->val.str.len == b->val.str.len
1560 && !memcmp (a->val.str.text, b->val.str.text,
1561 a->val.str.len));
1562 }
c5a04734 1563
041c3194
ZW
1564 return 0;
1565}
1566
93c80368
NB
1567/* Returns nonzero if a space should be inserted to avoid an
1568 accidental token paste for output. For simplicity, it is
1569 conservative, and occasionally advises a space where one is not
1570 needed, e.g. "." and ".2". */
93c80368
NB
1571int
1572cpp_avoid_paste (pfile, token1, token2)
c5a04734 1573 cpp_reader *pfile;
93c80368 1574 const cpp_token *token1, *token2;
c5a04734 1575{
93c80368
NB
1576 enum cpp_ttype a = token1->type, b = token2->type;
1577 cppchar_t c;
c5a04734 1578
93c80368
NB
1579 if (token1->flags & NAMED_OP)
1580 a = CPP_NAME;
1581 if (token2->flags & NAMED_OP)
1582 b = CPP_NAME;
c5a04734 1583
93c80368
NB
1584 c = EOF;
1585 if (token2->flags & DIGRAPH)
37b8524c 1586 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1587 else if (token_spellings[b].category == SPELL_OPERATOR)
1588 c = token_spellings[b].name[0];
c5a04734 1589
93c80368 1590 /* Quickly get everything that can paste with an '='. */
37b8524c 1591 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1592 return 1;
c5a04734 1593
93c80368 1594 switch (a)
c5a04734 1595 {
93c80368
NB
1596 case CPP_GREATER: return c == '>' || c == '?';
1597 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1598 case CPP_PLUS: return c == '+';
1599 case CPP_MINUS: return c == '-' || c == '>';
1600 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1601 case CPP_MOD: return c == ':' || c == '>';
1602 case CPP_AND: return c == '&';
1603 case CPP_OR: return c == '|';
1604 case CPP_COLON: return c == ':' || c == '>';
1605 case CPP_DEREF: return c == '*';
26ec42ee 1606 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1607 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1608 case CPP_NAME: return ((b == CPP_NUMBER
1609 && name_p (pfile, &token2->val.str))
1610 || b == CPP_NAME
1611 || b == CPP_CHAR || b == CPP_STRING); /* L */
1612 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1613 || c == '.' || c == '+' || c == '-');
1614 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1615 && token1->val.c == '@'
93c80368
NB
1616 && (b == CPP_NAME || b == CPP_STRING));
1617 default: break;
c5a04734 1618 }
c5a04734 1619
417f3e3a 1620 return 0;
c5a04734
ZW
1621}
1622
93c80368 1623/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1624 character, to FP. Leading whitespace is removed. If there are
1625 macros, special token padding is not performed. */
c5a04734 1626void
93c80368 1627cpp_output_line (pfile, fp)
c5a04734 1628 cpp_reader *pfile;
93c80368 1629 FILE *fp;
c5a04734 1630{
4ed5bcfb 1631 const cpp_token *token;
96be6998 1632
4ed5bcfb
NB
1633 token = cpp_get_token (pfile);
1634 while (token->type != CPP_EOF)
96be6998 1635 {
4ed5bcfb
NB
1636 cpp_output_token (token, fp);
1637 token = cpp_get_token (pfile);
1638 if (token->flags & PREV_WHITE)
1639 putc (' ', fp);
96be6998
ZW
1640 }
1641
93c80368 1642 putc ('\n', fp);
041c3194 1643}
c5a04734 1644
c8a96070
NB
1645/* Returns the value of a hexadecimal digit. */
1646static unsigned int
1647hex_digit_value (c)
1648 unsigned int c;
1649{
9e1ac915
KG
1650 if (hex_p (c))
1651 return hex_value (c);
1652 else
1653 abort ();
c8a96070
NB
1654}
1655
62729350
NB
1656/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1657 failure if cpplib is not parsing C++ or C99. Such failure is
1658 silent, and no variables are updated. Otherwise returns 0, and
1659 warns if -Wtraditional.
c8a96070
NB
1660
1661 [lex.charset]: The character designated by the universal character
1662 name \UNNNNNNNN is that character whose character short name in
1663 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1664 universal character name \uNNNN is that character whose character
1665 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1666 for a universal character name is less than 0x20 or in the range
1667 0x7F-0x9F (inclusive), or if the universal character name
1668 designates a character in the basic source character set, then the
1669 program is ill-formed.
1670
1671 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1672 mapping. Is this ever wrong?
c8a96070 1673
62729350
NB
1674 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1675 LIMIT is the end of the string or charconst. PSTR is updated to
1676 point after the UCS on return, and the UCS is written into PC. */
1677
1678static int
1679maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1680 cpp_reader *pfile;
1681 const unsigned char **pstr;
1682 const unsigned char *limit;
625458d0 1683 cppchar_t *pc;
c8a96070
NB
1684{
1685 const unsigned char *p = *pstr;
62729350
NB
1686 unsigned int code = 0;
1687 unsigned int c = *pc, length;
1688
1689 /* Only attempt to interpret a UCS for C++ and C99. */
1690 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1691 return 1;
c8a96070 1692
62729350 1693 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1694 cpp_error (pfile, DL_WARNING,
1695 "the meaning of '\\%c' is different in traditional C", c);
c8a96070 1696
f8710242
NB
1697 length = (c == 'u' ? 4: 8);
1698
1699 if ((size_t) (limit - p) < length)
1700 {
ebef4e8c 1701 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
f8710242
NB
1702 /* Skip to the end to avoid more diagnostics. */
1703 p = limit;
1704 }
1705 else
1706 {
1707 for (; length; length--, p++)
c8a96070 1708 {
f8710242
NB
1709 c = *p;
1710 if (ISXDIGIT (c))
1711 code = (code << 4) + hex_digit_value (c);
1712 else
1713 {
ebef4e8c 1714 cpp_error (pfile, DL_ERROR,
f8710242
NB
1715 "non-hex digit '%c' in universal-character-name", c);
1716 /* We shouldn't skip in case there are multibyte chars. */
1717 break;
1718 }
c8a96070 1719 }
c8a96070
NB
1720 }
1721
1722#ifdef TARGET_EBCDIC
ebef4e8c 1723 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
c8a96070
NB
1724 code = 0x3f; /* EBCDIC invalid character */
1725#else
f8710242
NB
1726 /* True extended characters are OK. */
1727 if (code >= 0xa0
1728 && !(code & 0x80000000)
1729 && !(code >= 0xD800 && code <= 0xDFFF))
1730 ;
1731 /* The standard permits $, @ and ` to be specified as UCNs. We use
1732 hex escapes so that this also works with EBCDIC hosts. */
1733 else if (code == 0x24 || code == 0x40 || code == 0x60)
1734 ;
1735 /* Don't give another error if one occurred above. */
1736 else if (length == 0)
ebef4e8c 1737 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
c8a96070
NB
1738#endif
1739
1740 *pstr = p;
62729350
NB
1741 *pc = code;
1742 return 0;
c8a96070
NB
1743}
1744
4268e8bb
NB
1745/* Returns the value of an escape sequence, truncated to the correct
1746 target precision. PSTR points to the input pointer, which is just
1747 after the backslash. LIMIT is how much text we have. WIDE is true
1748 if the escape sequence is part of a wide character constant or
1749 string literal. Handles all relevant diagnostics. */
1750cppchar_t
1751cpp_parse_escape (pfile, pstr, limit, wide)
c8a96070
NB
1752 cpp_reader *pfile;
1753 const unsigned char **pstr;
1754 const unsigned char *limit;
4268e8bb 1755 int wide;
c8a96070
NB
1756{
1757 int unknown = 0;
1758 const unsigned char *str = *pstr;
4268e8bb
NB
1759 cppchar_t c, mask;
1760 unsigned int width;
1761
1762 if (wide)
1763 width = CPP_OPTION (pfile, wchar_precision);
1764 else
1765 width = CPP_OPTION (pfile, char_precision);
1766 if (width < BITS_PER_CPPCHAR_T)
1767 mask = ((cppchar_t) 1 << width) - 1;
1768 else
1769 mask = ~0;
c8a96070 1770
4268e8bb 1771 c = *str++;
c8a96070
NB
1772 switch (c)
1773 {
1774 case '\\': case '\'': case '"': case '?': break;
1775 case 'b': c = TARGET_BS; break;
1776 case 'f': c = TARGET_FF; break;
1777 case 'n': c = TARGET_NEWLINE; break;
1778 case 'r': c = TARGET_CR; break;
1779 case 't': c = TARGET_TAB; break;
1780 case 'v': c = TARGET_VT; break;
1781
1782 case '(': case '{': case '[': case '%':
1783 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1784 '\%' is used to prevent SCCS from getting confused. */
1785 unknown = CPP_PEDANTIC (pfile);
1786 break;
1787
1788 case 'a':
1789 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1790 cpp_error (pfile, DL_WARNING,
1791 "the meaning of '\\a' is different in traditional C");
001e3fee 1792 c = TARGET_BELL;
c8a96070
NB
1793 break;
1794
1795 case 'e': case 'E':
1796 if (CPP_PEDANTIC (pfile))
ebef4e8c 1797 cpp_error (pfile, DL_PEDWARN,
625458d0 1798 "non-ISO-standard escape sequence, '\\%c'", (int) c);
c8a96070
NB
1799 c = TARGET_ESC;
1800 break;
df383483 1801
c8a96070 1802 case 'u': case 'U':
62729350 1803 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1804 break;
1805
1806 case 'x':
1807 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1808 cpp_error (pfile, DL_WARNING,
1809 "the meaning of '\\x' is different in traditional C");
c8a96070 1810
df383483
KH
1811 {
1812 cppchar_t i = 0, overflow = 0;
1813 int digits_found = 0;
c8a96070 1814
df383483
KH
1815 while (str < limit)
1816 {
1817 c = *str;
1818 if (! ISXDIGIT (c))
1819 break;
1820 str++;
1821 overflow |= i ^ (i << 4 >> 4);
1822 i = (i << 4) + hex_digit_value (c);
1823 digits_found = 1;
1824 }
c8a96070 1825
df383483
KH
1826 if (!digits_found)
1827 cpp_error (pfile, DL_ERROR,
ebef4e8c 1828 "\\x used with no following hex digits");
c8a96070 1829
df383483
KH
1830 if (overflow | (i != (i & mask)))
1831 {
1832 cpp_error (pfile, DL_PEDWARN,
1833 "hex escape sequence out of range");
1834 i &= mask;
1835 }
1836 c = i;
1837 }
c8a96070
NB
1838 break;
1839
1840 case '0': case '1': case '2': case '3':
1841 case '4': case '5': case '6': case '7':
1842 {
4268e8bb
NB
1843 size_t count = 0;
1844 cppchar_t i = c - '0';
c8a96070
NB
1845
1846 while (str < limit && ++count < 3)
1847 {
1848 c = *str;
1849 if (c < '0' || c > '7')
1850 break;
1851 str++;
1852 i = (i << 3) + c - '0';
1853 }
1854
1855 if (i != (i & mask))
1856 {
ebef4e8c
NB
1857 cpp_error (pfile, DL_PEDWARN,
1858 "octal escape sequence out of range");
c8a96070
NB
1859 i &= mask;
1860 }
1861 c = i;
1862 }
1863 break;
1864
1865 default:
1866 unknown = 1;
1867 break;
1868 }
1869
1870 if (unknown)
1871 {
1872 if (ISGRAPH (c))
625458d0
NB
1873 cpp_error (pfile, DL_PEDWARN,
1874 "unknown escape sequence '\\%c'", (int) c);
c8a96070 1875 else
625458d0
NB
1876 cpp_error (pfile, DL_PEDWARN,
1877 "unknown escape sequence: '\\%03o'", (int) c);
c8a96070
NB
1878 }
1879
62729350 1880 if (c > mask)
4268e8bb 1881 {
639e8b0c 1882 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
4268e8bb
NB
1883 c &= mask;
1884 }
62729350 1885
c8a96070
NB
1886 *pstr = str;
1887 return c;
1888}
1889
c8a96070 1890/* Interpret a (possibly wide) character constant in TOKEN.
4268e8bb
NB
1891 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1892 points to a variable that is filled in with the number of
1893 characters seen, and UNSIGNEDP to a variable that indicates whether
1894 the result has signed type. */
1895cppchar_t
a5a49440 1896cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
c8a96070
NB
1897 cpp_reader *pfile;
1898 const cpp_token *token;
c8a96070 1899 unsigned int *pchars_seen;
4268e8bb 1900 int *unsignedp;
c8a96070
NB
1901{
1902 const unsigned char *str = token->val.str.text;
1903 const unsigned char *limit = str + token->val.str.len;
1904 unsigned int chars_seen = 0;
639e8b0c 1905 size_t width, max_chars;
4268e8bb 1906 cppchar_t c, mask, result = 0;
a47ed310 1907 bool unsigned_p;
c8a96070
NB
1908
1909#ifdef MULTIBYTE_CHARS
1910 (void) local_mbtowc (NULL, NULL, 0);
1911#endif
1912
1913 /* Width in bits. */
1914 if (token->type == CPP_CHAR)
a47ed310 1915 {
4268e8bb 1916 width = CPP_OPTION (pfile, char_precision);
2443d4e1 1917 max_chars = CPP_OPTION (pfile, int_precision) / width;
44a147ad 1918 unsigned_p = CPP_OPTION (pfile, unsigned_char);
a47ed310 1919 }
c8a96070 1920 else
a47ed310 1921 {
4268e8bb 1922 width = CPP_OPTION (pfile, wchar_precision);
2443d4e1 1923 max_chars = 1;
44a147ad 1924 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
a47ed310 1925 }
c8a96070 1926
4268e8bb
NB
1927 if (width < BITS_PER_CPPCHAR_T)
1928 mask = ((cppchar_t) 1 << width) - 1;
c8a96070
NB
1929 else
1930 mask = ~0;
c8a96070
NB
1931
1932 while (str < limit)
1933 {
1934#ifdef MULTIBYTE_CHARS
1935 wchar_t wc;
1936 int char_len;
1937
1938 char_len = local_mbtowc (&wc, str, limit - str);
1939 if (char_len == -1)
1940 {
ebef4e8c
NB
1941 cpp_error (pfile, DL_WARNING,
1942 "ignoring invalid multibyte character");
c8a96070
NB
1943 c = *str++;
1944 }
1945 else
1946 {
1947 str += char_len;
1948 c = wc;
1949 }
1950#else
1951 c = *str++;
1952#endif
1953
1954 if (c == '\\')
4268e8bb 1955 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
c8a96070
NB
1956
1957#ifdef MAP_CHARACTER
1958 if (ISPRINT (c))
1959 c = MAP_CHARACTER (c);
1960#endif
df383483 1961
639e8b0c
NB
1962 chars_seen++;
1963
a5a49440
NB
1964 /* Truncate the character, scale the result and merge the two. */
1965 c &= mask;
639e8b0c 1966 if (width < BITS_PER_CPPCHAR_T)
a5a49440 1967 result = (result << width) | c;
639e8b0c
NB
1968 else
1969 result = c;
c8a96070
NB
1970 }
1971
1972 if (chars_seen == 0)
ebef4e8c 1973 cpp_error (pfile, DL_ERROR, "empty character constant");
639e8b0c 1974 else if (chars_seen > 1)
c8a96070 1975 {
639e8b0c
NB
1976 /* Multichar charconsts are of type int and therefore signed. */
1977 unsigned_p = 0;
a5a49440 1978
639e8b0c
NB
1979 if (chars_seen > max_chars)
1980 {
1981 chars_seen = max_chars;
1982 cpp_error (pfile, DL_WARNING,
1983 "character constant too long for its type");
1984 }
a5a49440 1985 else if (CPP_OPTION (pfile, warn_multichar))
639e8b0c 1986 cpp_error (pfile, DL_WARNING, "multi-character character constant");
c8a96070
NB
1987 }
1988
b9e2d17b
NB
1989 /* Sign-extend or truncate the constant to cppchar_t. The value is
1990 in WIDTH bits, but for multi-char charconsts it's value is the
1991 full target type's width. */
1992 if (chars_seen > 1)
1993 width *= max_chars;
1994 if (width < BITS_PER_CPPCHAR_T)
a5a49440 1995 {
b9e2d17b
NB
1996 mask = ((cppchar_t) 1 << width) - 1;
1997 if (unsigned_p || !(result & (1 << (width - 1))))
1998 result &= mask;
1999 else
2000 result |= ~mask;
a5a49440
NB
2001 }
2002
c8a96070 2003 *pchars_seen = chars_seen;
4268e8bb 2004 *unsignedp = unsigned_p;
c8a96070
NB
2005 return result;
2006}
2007
1e013d2e
NB
2008/* Memory buffers. Changing these three constants can have a dramatic
2009 effect on performance. The values here are reasonable defaults,
2010 but might be tuned. If you adjust them, be sure to test across a
2011 range of uses of cpplib, including heavy nested function-like macro
2012 expansion. Also check the change in peak memory usage (NJAMD is a
2013 good tool for this). */
2014#define MIN_BUFF_SIZE 8000
87062813 2015#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
2016#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2017 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 2018
87062813
NB
2019#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2020 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2021#endif
2022
c9e7a609
NB
2023/* Create a new allocation buffer. Place the control block at the end
2024 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5
NB
2025static _cpp_buff *
2026new_buff (len)
6142088c 2027 size_t len;
b8af0ca5
NB
2028{
2029 _cpp_buff *result;
ece54d54 2030 unsigned char *base;
b8af0ca5 2031
1e013d2e
NB
2032 if (len < MIN_BUFF_SIZE)
2033 len = MIN_BUFF_SIZE;
c70f6ed3 2034 len = CPP_ALIGN (len);
b8af0ca5
NB
2035
2036 base = xmalloc (len + sizeof (_cpp_buff));
2037 result = (_cpp_buff *) (base + len);
2038 result->base = base;
2039 result->cur = base;
2040 result->limit = base + len;
2041 result->next = NULL;
2042 return result;
2043}
2044
2045/* Place a chain of unwanted allocation buffers on the free list. */
2046void
2047_cpp_release_buff (pfile, buff)
2048 cpp_reader *pfile;
2049 _cpp_buff *buff;
2050{
2051 _cpp_buff *end = buff;
2052
2053 while (end->next)
2054 end = end->next;
2055 end->next = pfile->free_buffs;
2056 pfile->free_buffs = buff;
2057}
2058
2059/* Return a free buffer of size at least MIN_SIZE. */
2060_cpp_buff *
2061_cpp_get_buff (pfile, min_size)
2062 cpp_reader *pfile;
6142088c 2063 size_t min_size;
b8af0ca5
NB
2064{
2065 _cpp_buff *result, **p;
2066
2067 for (p = &pfile->free_buffs;; p = &(*p)->next)
2068 {
6142088c 2069 size_t size;
1e013d2e
NB
2070
2071 if (*p == NULL)
b8af0ca5 2072 return new_buff (min_size);
1e013d2e
NB
2073 result = *p;
2074 size = result->limit - result->base;
2075 /* Return a buffer that's big enough, but don't waste one that's
2076 way too big. */
34f5271d 2077 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
2078 break;
2079 }
2080
2081 *p = result->next;
2082 result->next = NULL;
2083 result->cur = result->base;
2084 return result;
2085}
2086
4fe9b91c 2087/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2088 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2089 the excess bytes to the new buffer. Chains the new buffer after
2090 BUFF, and returns the new buffer. */
b8af0ca5 2091_cpp_buff *
8c3b2693 2092_cpp_append_extend_buff (pfile, buff, min_extra)
b8af0ca5
NB
2093 cpp_reader *pfile;
2094 _cpp_buff *buff;
6142088c 2095 size_t min_extra;
b8af0ca5 2096{
6142088c 2097 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 2098 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 2099
8c3b2693
NB
2100 buff->next = new_buff;
2101 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2102 return new_buff;
2103}
2104
4fe9b91c 2105/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2106 remaining bytes of the buffer pointed to by BUFF, and at least
2107 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2108 Chains the new buffer before the buffer pointed to by BUFF, and
2109 updates the pointer to point to the new buffer. */
2110void
2111_cpp_extend_buff (pfile, pbuff, min_extra)
2112 cpp_reader *pfile;
2113 _cpp_buff **pbuff;
2114 size_t min_extra;
2115{
2116 _cpp_buff *new_buff, *old_buff = *pbuff;
2117 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2118
2119 new_buff = _cpp_get_buff (pfile, size);
2120 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2121 new_buff->next = old_buff;
2122 *pbuff = new_buff;
b8af0ca5
NB
2123}
2124
2125/* Free a chain of buffers starting at BUFF. */
2126void
2127_cpp_free_buff (buff)
2128 _cpp_buff *buff;
2129{
2130 _cpp_buff *next;
2131
2132 for (; buff; buff = next)
2133 {
2134 next = buff->next;
2135 free (buff->base);
2136 }
2137}
417f3e3a 2138
ece54d54
NB
2139/* Allocate permanent, unaligned storage of length LEN. */
2140unsigned char *
2141_cpp_unaligned_alloc (pfile, len)
2142 cpp_reader *pfile;
2143 size_t len;
2144{
2145 _cpp_buff *buff = pfile->u_buff;
2146 unsigned char *result = buff->cur;
2147
2148 if (len > (size_t) (buff->limit - result))
2149 {
2150 buff = _cpp_get_buff (pfile, len);
2151 buff->next = pfile->u_buff;
2152 pfile->u_buff = buff;
2153 result = buff->cur;
2154 }
2155
2156 buff->cur = result + len;
2157 return result;
2158}
2159
87062813
NB
2160/* Allocate permanent, unaligned storage of length LEN from a_buff.
2161 That buffer is used for growing allocations when saving macro
2162 replacement lists in a #define, and when parsing an answer to an
2163 assertion in #assert, #unassert or #if (and therefore possibly
2164 whilst expanding macros). It therefore must not be used by any
2165 code that they might call: specifically the lexer and the guts of
2166 the macro expander.
2167
2168 All existing other uses clearly fit this restriction: storing
2169 registered pragmas during initialization. */
93c80368 2170unsigned char *
8c3b2693
NB
2171_cpp_aligned_alloc (pfile, len)
2172 cpp_reader *pfile;
2173 size_t len;
3fef5b2b 2174{
8c3b2693
NB
2175 _cpp_buff *buff = pfile->a_buff;
2176 unsigned char *result = buff->cur;
3fef5b2b 2177
8c3b2693 2178 if (len > (size_t) (buff->limit - result))
3fef5b2b 2179 {
8c3b2693
NB
2180 buff = _cpp_get_buff (pfile, len);
2181 buff->next = pfile->a_buff;
2182 pfile->a_buff = buff;
2183 result = buff->cur;
3fef5b2b 2184 }
041c3194 2185
8c3b2693 2186 buff->cur = result + len;
93c80368 2187 return result;
041c3194 2188}
This page took 0.881753 seconds and 5 git commands to generate.