]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
Install proper version.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
5d8ebbd8 2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
45b966db
ZW
26#include "cpplib.h"
27#include "cpphash.h"
28
93c80368
NB
29/* Tokens with SPELL_STRING store their spelling in the token list,
30 and it's length in the token->val.name.len. */
31enum spell_type
f9a0e96c 32{
93c80368
NB
33 SPELL_OPERATOR = 0,
34 SPELL_CHAR,
35 SPELL_IDENT,
47ad4138 36 SPELL_NUMBER,
93c80368
NB
37 SPELL_STRING,
38 SPELL_NONE
f9a0e96c
ZW
39};
40
93c80368 41struct token_spelling
f9a0e96c 42{
93c80368
NB
43 enum spell_type category;
44 const unsigned char *name;
f9a0e96c
ZW
45};
46
8206c799
ZW
47static const unsigned char *const digraph_spellings[] =
48{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
49
50#define OP(e, s) { SPELL_OPERATOR, U s },
51#define TK(e, s) { s, U STRINGX (e) },
8206c799 52static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
53#undef OP
54#undef TK
55
56#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
57#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
480709cc 58#define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
f2d5f0cc 59
26aea073 60static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
29401c30 61static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 62
cbcff6df 63static int skip_line_comment PARAMS ((cpp_reader *));
26aea073 64static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6 65static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
562a5c27 66static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
df383483 67 unsigned int *));
10cf9bde 68static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
562a5c27 69static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
0d9f234d 70static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
562a5c27 71static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
477cdac7 72 cppchar_t));
93c80368 73static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350 74static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
625458d0 75 const unsigned char *, cppchar_t *));
5fddcffc 76static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 77
c8a96070 78static unsigned int hex_digit_value PARAMS ((unsigned int));
6142088c 79static _cpp_buff *new_buff PARAMS ((size_t));
15dad1d9 80
9d10c9a9 81
041c3194 82/* Utility routine:
9e62c811 83
bfb9dc7f
ZW
84 Compares, the token TOKEN to the NUL-terminated string STRING.
85 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 86int
bfb9dc7f
ZW
87cpp_ideq (token, string)
88 const cpp_token *token;
041c3194
ZW
89 const char *string;
90{
bfb9dc7f 91 if (token->type != CPP_NAME)
041c3194 92 return 0;
bfb9dc7f 93
562a5c27 94 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
15dad1d9 95}
1368ee70 96
26aea073
NB
97/* Record a note TYPE at byte POS into the current cleaned logical
98 line. */
87062813 99static void
26aea073
NB
100add_line_note (buffer, pos, type)
101 cpp_buffer *buffer;
102 const uchar *pos;
103 unsigned int type;
0d9f234d 104{
26aea073
NB
105 if (buffer->notes_used == buffer->notes_cap)
106 {
107 buffer->notes_cap = buffer->notes_cap * 2 + 200;
108 buffer->notes = (_cpp_line_note *)
109 xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
110 }
0d9f234d 111
26aea073
NB
112 buffer->notes[buffer->notes_used].pos = pos;
113 buffer->notes[buffer->notes_used].type = type;
114 buffer->notes_used++;
0d9f234d
NB
115}
116
26aea073
NB
117/* Returns with a logical line that contains no escaped newlines or
118 trigraphs. This is a time-critical inner loop. */
119void
120_cpp_clean_line (pfile)
45b966db 121 cpp_reader *pfile;
45b966db 122{
26aea073
NB
123 cpp_buffer *buffer;
124 const uchar *s;
125 uchar c, *d, *p;
87062813 126
26aea073
NB
127 buffer = pfile->buffer;
128 buffer->cur_note = buffer->notes_used = 0;
129 buffer->cur = buffer->line_base = buffer->next_line;
130 buffer->need_line = false;
131 s = buffer->next_line - 1;
87062813 132
26aea073 133 if (!buffer->from_stage3)
45b966db 134 {
26aea073
NB
135 d = (uchar *) s;
136
137 for (;;)
4a5b68a2 138 {
26aea073
NB
139 c = *++s;
140 *++d = c;
141
142 if (c == '\n' || c == '\r')
143 {
144 /* Handle DOS line endings. */
145 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
146 s++;
147 if (s == buffer->rlimit)
148 break;
149
150 /* Escaped? */
151 p = d;
152 while (p != buffer->next_line && is_nvspace (p[-1]))
153 p--;
154 if (p == buffer->next_line || p[-1] != '\\')
155 break;
156
157 add_line_note (buffer, p - 1,
158 p != d ? NOTE_ESC_SPACE_NL: NOTE_ESC_NL);
159 d = p - 2;
160 buffer->next_line = p - 1;
161 }
162 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
163 {
164 /* Add a note regardless, for the benefit of -Wtrigraphs. */
165 add_line_note (buffer, d, NOTE_TRIGRAPH);
166 if (CPP_OPTION (pfile, trigraphs))
167 {
168 *d = _cpp_trigraph_map[s[2]];
169 s += 2;
170 }
171 }
4a5b68a2 172 }
45b966db 173 }
26aea073
NB
174 else
175 {
176 do
177 s++;
178 while (*s != '\n' && *s != '\r');
179 d = (uchar *) s;
180
181 /* Handle DOS line endings. */
182 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
183 s++;
184 }
0d9f234d 185
26aea073
NB
186 *d = '\n';
187 add_line_note (buffer, d + 1, NOTE_NEWLINE);
188 buffer->next_line = s + 1;
45b966db
ZW
189}
190
26aea073
NB
191/* Process the notes created by add_line_note as far as the current
192 location. */
193void
194_cpp_process_line_notes (pfile, in_comment)
29401c30 195 cpp_reader *pfile;
26aea073 196 int in_comment;
45b966db 197{
29401c30
NB
198 cpp_buffer *buffer = pfile->buffer;
199
26aea073 200 for (;;)
041c3194 201 {
26aea073
NB
202 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
203 unsigned int col;
a5c3cccd 204
26aea073
NB
205 if (note->pos > buffer->cur)
206 break;
a5c3cccd 207
26aea073
NB
208 buffer->cur_note++;
209 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
4d6baafa 210
26aea073
NB
211 switch (note->type)
212 {
213 case NOTE_NEWLINE:
214 /* This note is a kind of sentinel we should never reach. */
215 abort ();
041c3194 216
26aea073
NB
217 case NOTE_TRIGRAPH:
218 if (!in_comment && CPP_OPTION (pfile, warn_trigraphs))
0d9f234d 219 {
26aea073
NB
220 if (CPP_OPTION (pfile, trigraphs))
221 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
222 "trigraph converted to %c",
223 (int) note->pos[0]);
224 else
225 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
226 "trigraph ??%c ignored",
227 (int) note->pos[2]);
0d9f234d 228 }
26aea073 229 break;
45b966db 230
26aea073
NB
231 case NOTE_ESC_SPACE_NL:
232 if (!in_comment)
233 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
234 "backslash and newline separated by space");
235 /* Fall through... */
236 case NOTE_ESC_NL:
237 if (buffer->next_line > buffer->rlimit)
87062813 238 {
26aea073
NB
239 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
240 "backslash-newline at end of file");
241 /* Prevent "no newline at end of file" warning. */
242 buffer->next_line = buffer->rlimit;
87062813 243 }
26aea073
NB
244
245 buffer->line_base = note->pos;
246 pfile->line++;
0d9f234d 247 }
041c3194 248 }
45b966db
ZW
249}
250
0d9f234d 251/* Obtain the next character, after trigraph conversion and skipping
87062813
NB
252 an arbitrarily long string of escaped newlines. The common case of
253 no trigraphs or escaped newlines falls through quickly. On return,
480709cc
NB
254 buffer->backup_to points to where to return to if the character is
255 not to be processed. */
0d9f234d 256static cppchar_t
29401c30
NB
257get_effective_char (pfile)
258 cpp_reader *pfile;
64aaf407 259{
480709cc 260 cpp_buffer *buffer = pfile->buffer;
0d9f234d 261
480709cc 262 buffer->backup_to = buffer->cur;
26aea073 263 return *buffer->cur++;
64aaf407
NB
264}
265
0d9f234d
NB
266/* Skip a C-style block comment. We find the end of the comment by
267 seeing if an asterisk is before every '/' we encounter. Returns
da7d8304 268 nonzero if comment terminated by EOF, zero otherwise. */
26aea073
NB
269bool
270_cpp_skip_block_comment (pfile)
45b966db
ZW
271 cpp_reader *pfile;
272{
041c3194 273 cpp_buffer *buffer = pfile->buffer;
26aea073 274 cppchar_t c;
0d9f234d 275
26aea073
NB
276 if (*buffer->cur == '/')
277 buffer->cur++;
0d9f234d 278
26aea073
NB
279 for (;;)
280 {
281 c = *buffer->cur++;
041c3194 282
0d9f234d
NB
283 /* People like decorating comments with '*', so check for '/'
284 instead for efficiency. */
041c3194 285 if (c == '/')
45b966db 286 {
26aea073 287 if (buffer->cur[-2] == '*')
0d9f234d 288 break;
041c3194 289
0d9f234d 290 /* Warn about potential nested comments, but not if the '/'
a1f300c0 291 comes immediately before the true comment delimiter.
041c3194 292 Don't bother to get it right across escaped newlines. */
0d9f234d 293 if (CPP_OPTION (pfile, warn_comments)
87062813 294 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
ebef4e8c
NB
295 cpp_error_with_line (pfile, DL_WARNING,
296 pfile->line, CPP_BUF_COL (buffer),
297 "\"/*\" within comment");
45b966db 298 }
26aea073
NB
299 else if (c == '\n')
300 {
301 buffer->cur--;
302 _cpp_process_line_notes (pfile, true);
303 if (buffer->next_line >= buffer->rlimit)
304 return true;
305 _cpp_clean_line (pfile);
306 pfile->line++;
307 }
45b966db 308 }
041c3194 309
26aea073 310 return false;
45b966db
ZW
311}
312
480709cc 313/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 314 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 315 if a multiline comment. */
041c3194 316static int
cbcff6df
NB
317skip_line_comment (pfile)
318 cpp_reader *pfile;
45b966db 319{
cbcff6df 320 cpp_buffer *buffer = pfile->buffer;
67821e3a 321 unsigned int orig_line = pfile->line;
041c3194 322
26aea073
NB
323 while (*buffer->cur != '\n')
324 buffer->cur++;
480709cc 325
26aea073 326 _cpp_process_line_notes (pfile, true);
67821e3a 327 return orig_line != pfile->line;
041c3194 328}
45b966db 329
26aea073 330/* Skips whitespace, saving the next non-whitespace character. */
52fadca8 331static void
0d9f234d 332skip_whitespace (pfile, c)
041c3194 333 cpp_reader *pfile;
0d9f234d 334 cppchar_t c;
041c3194
ZW
335{
336 cpp_buffer *buffer = pfile->buffer;
f7d151fb 337 bool saw_NUL = false;
45b966db 338
0d9f234d 339 do
041c3194 340 {
91fcd158 341 /* Horizontal space always OK. */
26aea073 342 if (c == ' ' || c == '\t')
0d9f234d 343 ;
0d9f234d 344 /* Just \f \v or \0 left. */
91fcd158 345 else if (c == '\0')
f7d151fb 346 saw_NUL = true;
93c80368 347 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
ebef4e8c
NB
348 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
349 CPP_BUF_COL (buffer),
350 "%s in preprocessing directive",
351 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 352
0d9f234d 353 c = *buffer->cur++;
45b966db 354 }
ec5c56db 355 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
356 while (is_nvspace (c));
357
f7d151fb
NB
358 if (saw_NUL)
359 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
360
480709cc 361 buffer->cur--;
041c3194 362}
45b966db 363
93c80368
NB
364/* See if the characters of a number token are valid in a name (no
365 '.', '+' or '-'). */
366static int
367name_p (pfile, string)
368 cpp_reader *pfile;
369 const cpp_string *string;
370{
371 unsigned int i;
372
373 for (i = 0; i < string->len; i++)
374 if (!is_idchar (string->text[i]))
375 return 0;
376
df383483 377 return 1;
93c80368
NB
378}
379
2c3fcba6
ZW
380/* Parse an identifier, skipping embedded backslash-newlines. This is
381 a critical inner loop. The common case is an identifier which has
382 not been split by backslash-newline, does not contain a dollar
383 sign, and has already been scanned (roughly 10:1 ratio of
384 seen:unseen identifiers in normal code; the distribution is
385 Poisson-like). Second most common case is a new identifier, not
386 split and no dollar sign. The other possibilities are rare and
10cf9bde 387 have been relegated to parse_slow. */
0d9f234d 388static cpp_hashnode *
2c3fcba6 389parse_identifier (pfile)
45b966db 390 cpp_reader *pfile;
45b966db 391{
93c80368 392 cpp_hashnode *result;
562a5c27 393 const uchar *cur, *base;
2c3fcba6
ZW
394
395 /* Fast-path loop. Skim over a normal identifier.
396 N.B. ISIDNUM does not include $. */
4d6baafa
NB
397 cur = pfile->buffer->cur;
398 while (ISIDNUM (*cur))
2c3fcba6 399 cur++;
2c3fcba6
ZW
400
401 /* Check for slow-path cases. */
26aea073 402 if (*cur == '$')
10cf9bde
NB
403 {
404 unsigned int len;
405
406 base = parse_slow (pfile, cur, 0, &len);
407 result = (cpp_hashnode *)
408 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
409 }
2c3fcba6
ZW
410 else
411 {
10cf9bde
NB
412 base = pfile->buffer->cur - 1;
413 pfile->buffer->cur = cur;
2c3fcba6
ZW
414 result = (cpp_hashnode *)
415 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
2c3fcba6
ZW
416 }
417
418 /* Rarely, identifiers require diagnostics when lexed.
419 XXX Has to be forced out of the fast path. */
420 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
421 && !pfile->state.skipping, 0))
422 {
423 /* It is allowed to poison the same identifier twice. */
424 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
ebef4e8c 425 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
426 NODE_NAME (result));
427
428 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
429 replacement list of a variadic macro. */
430 if (result == pfile->spec_nodes.n__VA_ARGS__
431 && !pfile->state.va_args_ok)
ebef4e8c 432 cpp_error (pfile, DL_PEDWARN,
2c3fcba6
ZW
433 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
434 }
435
436 return result;
437}
438
10cf9bde
NB
439/* Slow path. This handles numbers and identifiers which have been
440 split, or contain dollar signs. The part of the token from
441 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
442 1 if it's a number, and 2 if it has a leading period. Returns a
443 pointer to the token's NUL-terminated spelling in permanent
444 storage, and sets PLEN to its length. */
562a5c27 445static uchar *
10cf9bde 446parse_slow (pfile, cur, number_p, plen)
2c3fcba6 447 cpp_reader *pfile;
562a5c27 448 const uchar *cur;
10cf9bde
NB
449 int number_p;
450 unsigned int *plen;
2c3fcba6 451{
0d9f234d 452 cpp_buffer *buffer = pfile->buffer;
562a5c27 453 const uchar *base = buffer->cur - 1;
2a967f3d 454 struct obstack *stack = &pfile->hash_table->stack;
10cf9bde
NB
455 unsigned int c, prevc, saw_dollar = 0;
456
457 /* Place any leading period. */
458 if (number_p == 2)
459 obstack_1grow (stack, '.');
2c3fcba6
ZW
460
461 /* Copy the part of the token which is known to be okay. */
462 obstack_grow (stack, base, cur - base);
041c3194 463
2c3fcba6
ZW
464 /* Now process the part which isn't. We are looking at one of
465 '$', '\\', or '?' on entry to this loop. */
10cf9bde 466 prevc = cur[-1];
2c3fcba6
ZW
467 c = *cur++;
468 buffer->cur = cur;
10cf9bde 469 for (;;)
041c3194 470 {
10cf9bde
NB
471 /* Potential escaped newline? */
472 buffer->backup_to = buffer->cur - 1;
10cf9bde
NB
473
474 if (!is_idchar (c))
475 {
476 if (!number_p)
477 break;
478 if (c != '.' && !VALID_SIGN (c, prevc))
479 break;
480 }
481
482 /* Handle normal identifier characters in this loop. */
483 do
df383483 484 {
10cf9bde 485 prevc = c;
df383483 486 obstack_1grow (stack, c);
45b966db 487
df383483
KH
488 if (c == '$')
489 saw_dollar++;
ba89d661 490
df383483
KH
491 c = *buffer->cur++;
492 }
10cf9bde 493 while (is_idchar (c));
041c3194 494 }
0d9f234d 495
4d6baafa 496 /* Step back over the unwanted char. */
480709cc 497 BACKUP ();
93c80368 498
4fe9b91c 499 /* $ is not an identifier character in the standard, but is commonly
0d9f234d
NB
500 accepted as an extension. Don't warn about it in skipped
501 conditional blocks. */
cef0d199 502 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
ebef4e8c 503 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
0d9f234d 504
10cf9bde
NB
505 /* Identifiers and numbers are null-terminated. */
506 *plen = obstack_object_size (stack);
2a967f3d 507 obstack_1grow (stack, '\0');
10cf9bde 508 return obstack_finish (stack);
45b966db
ZW
509}
510
5d8ebbd8 511/* Parse a number, beginning with character C, skipping embedded
da7d8304 512 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
5d8ebbd8 513 before C. Place the result in NUMBER. */
45b966db 514static void
10cf9bde 515parse_number (pfile, number, leading_period)
45b966db 516 cpp_reader *pfile;
0d9f234d 517 cpp_string *number;
93c80368 518 int leading_period;
45b966db 519{
562a5c27 520 const uchar *cur;
45b966db 521
10cf9bde
NB
522 /* Fast-path loop. Skim over a normal number.
523 N.B. ISIDNUM does not include $. */
524 cur = pfile->buffer->cur;
525 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
526 cur++;
cbcff6df 527
10cf9bde 528 /* Check for slow-path cases. */
26aea073 529 if (*cur == '$')
10cf9bde
NB
530 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
531 else
041c3194 532 {
562a5c27
NB
533 const uchar *base = pfile->buffer->cur - 1;
534 uchar *dest;
0d9f234d 535
10cf9bde
NB
536 number->len = cur - base + leading_period;
537 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
538 dest[number->len] = '\0';
539 number->text = dest;
45b966db 540
10cf9bde
NB
541 if (leading_period)
542 *dest++ = '.';
543 memcpy (dest, base, cur - base);
544 pfile->buffer->cur = cur;
45b966db 545 }
0d9f234d
NB
546}
547
93c80368
NB
548/* Subroutine of parse_string. */
549static int
550unescaped_terminator_p (pfile, dest)
551 cpp_reader *pfile;
552 const unsigned char *dest;
553{
554 const unsigned char *start, *temp;
555
95bd1dd7 556 /* In #include-style directives, terminators are not escapable. */
93c80368
NB
557 if (pfile->state.angled_headers)
558 return 1;
559
ece54d54 560 start = BUFF_FRONT (pfile->u_buff);
93c80368
NB
561
562 /* An odd number of consecutive backslashes represents an escaped
563 terminator. */
564 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
565 ;
566
567 return ((dest - temp) & 1) == 0;
568}
569
0d9f234d 570/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
571 name. Handles embedded trigraphs and escaped newlines. The stored
572 string is guaranteed NUL-terminated, but it is not guaranteed that
573 this is the first NUL since embedded NULs are preserved.
45b966db 574
87062813
NB
575 When this function returns, buffer->cur points to the next
576 character to be processed. */
041c3194 577static void
0d9f234d 578parse_string (pfile, token, terminator)
45b966db 579 cpp_reader *pfile;
041c3194 580 cpp_token *token;
0d9f234d 581 cppchar_t terminator;
45b966db 582{
041c3194 583 cpp_buffer *buffer = pfile->buffer;
93c80368 584 unsigned char *dest, *limit;
0d9f234d 585 cppchar_t c;
d4e6133f 586 bool warned_nulls = false;
0d9f234d 587
ece54d54
NB
588 dest = BUFF_FRONT (pfile->u_buff);
589 limit = BUFF_LIMIT (pfile->u_buff);
93c80368 590
0d9f234d 591 for (;;)
45b966db 592 {
87062813 593 /* We need room for another char, possibly the terminating NUL. */
ece54d54
NB
594 if ((size_t) (limit - dest) < 1)
595 {
596 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 597 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
598 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
599 limit = BUFF_LIMIT (pfile->u_buff);
600 }
7868b4a2 601
87062813 602 c = *buffer->cur++;
45b966db 603
87062813 604 if (c == terminator)
45b966db 605 {
87062813
NB
606 if (unescaped_terminator_p (pfile, dest))
607 break;
0d9f234d 608 }
26aea073 609 else if (c == '\n')
0d9f234d 610 {
d4e6133f
NB
611 /* No string literal may extend over multiple lines. In
612 assembly language, suppress the error except for <>
613 includes. This is a kludge around not knowing where
614 comments are. */
d4e6133f 615 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
ebef4e8c 616 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
625458d0 617 (int) terminator);
d4e6133f
NB
618 buffer->cur--;
619 break;
0d9f234d 620 }
4d6baafa 621 else if (c == '\0')
0d9f234d 622 {
4d6baafa
NB
623 if (!warned_nulls)
624 {
625 warned_nulls = true;
ebef4e8c
NB
626 cpp_error (pfile, DL_WARNING,
627 "null character(s) preserved in literal");
4d6baafa 628 }
45b966db 629 }
64cdc383 630 *dest++ = c;
45b966db
ZW
631 }
632
7868b4a2 633 *dest = '\0';
45b966db 634
ece54d54
NB
635 token->val.str.text = BUFF_FRONT (pfile->u_buff);
636 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
637 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d 638}
041c3194 639
93c80368 640/* The stored comment includes the comment start and any terminator. */
9e62c811 641static void
477cdac7 642save_comment (pfile, token, from, type)
0d9f234d 643 cpp_reader *pfile;
041c3194
ZW
644 cpp_token *token;
645 const unsigned char *from;
477cdac7 646 cppchar_t type;
9e62c811 647{
041c3194 648 unsigned char *buffer;
477cdac7 649 unsigned int len, clen;
df383483 650
1c6d33ef 651 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 652
3542203b
NB
653 /* C++ comments probably (not definitely) have moved past a new
654 line, which we don't want to save in the comment. */
480709cc 655 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 656 len--;
477cdac7
JT
657
658 /* If we are currently in a directive, then we need to store all
659 C++ comments as C comments internally, and so we need to
660 allocate a little extra space in that case.
661
662 Note that the only time we encounter a directive here is
663 when we are saving comments in a "#define". */
664 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
665
666 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 667
041c3194 668 token->type = CPP_COMMENT;
477cdac7 669 token->val.str.len = clen;
0d9f234d 670 token->val.str.text = buffer;
45b966db 671
1c6d33ef
NB
672 buffer[0] = '/';
673 memcpy (buffer + 1, from, len - 1);
477cdac7 674
1eeeb6a4 675 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
676 if (pfile->state.in_directive && type == '/')
677 {
678 buffer[1] = '*';
679 buffer[clen - 2] = '*';
680 buffer[clen - 1] = '/';
681 }
0d9f234d 682}
45b966db 683
5fddcffc
NB
684/* Allocate COUNT tokens for RUN. */
685void
686_cpp_init_tokenrun (run, count)
687 tokenrun *run;
688 unsigned int count;
689{
690 run->base = xnewvec (cpp_token, count);
691 run->limit = run->base + count;
692 run->next = NULL;
693}
694
695/* Returns the next tokenrun, or creates one if there is none. */
696static tokenrun *
697next_tokenrun (run)
698 tokenrun *run;
699{
700 if (run->next == NULL)
701 {
702 run->next = xnew (tokenrun);
bdcbe496 703 run->next->prev = run;
5fddcffc
NB
704 _cpp_init_tokenrun (run->next, 250);
705 }
706
707 return run->next;
708}
709
4ed5bcfb
NB
710/* Allocate a single token that is invalidated at the same time as the
711 rest of the tokens on the line. Has its line and col set to the
712 same as the last lexed token, so that diagnostics appear in the
713 right place. */
714cpp_token *
715_cpp_temp_token (pfile)
716 cpp_reader *pfile;
717{
718 cpp_token *old, *result;
719
720 old = pfile->cur_token - 1;
721 if (pfile->cur_token == pfile->cur_run->limit)
722 {
723 pfile->cur_run = next_tokenrun (pfile->cur_run);
724 pfile->cur_token = pfile->cur_run->base;
725 }
726
727 result = pfile->cur_token++;
728 result->line = old->line;
729 result->col = old->col;
730 return result;
731}
732
14baae01
NB
733/* Lex a token into RESULT (external interface). Takes care of issues
734 like directive handling, token lookahead, multiple include
a1f300c0 735 optimization and skipping. */
345894b4
NB
736const cpp_token *
737_cpp_lex_token (pfile)
45b966db 738 cpp_reader *pfile;
5fddcffc 739{
bdcbe496 740 cpp_token *result;
5fddcffc 741
bdcbe496 742 for (;;)
5fddcffc 743 {
bdcbe496 744 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 745 {
bdcbe496
NB
746 pfile->cur_run = next_tokenrun (pfile->cur_run);
747 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
748 }
749
bdcbe496 750 if (pfile->lookaheads)
14baae01
NB
751 {
752 pfile->lookaheads--;
753 result = pfile->cur_token++;
754 }
bdcbe496 755 else
14baae01 756 result = _cpp_lex_direct (pfile);
bdcbe496
NB
757
758 if (result->flags & BOL)
5fddcffc 759 {
bdcbe496
NB
760 /* Is this a directive. If _cpp_handle_directive returns
761 false, it is an assembler #. */
762 if (result->type == CPP_HASH
e808ec9c
NB
763 /* 6.10.3 p 11: Directives in a list of macro arguments
764 gives undefined behavior. This implementation
765 handles the directive as normal. */
766 && pfile->state.parsing_args != 1
bdcbe496
NB
767 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
768 continue;
97293897
NB
769 if (pfile->cb.line_change && !pfile->state.skipping)
770 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 771 }
5fddcffc 772
bdcbe496
NB
773 /* We don't skip tokens in directives. */
774 if (pfile->state.in_directive)
775 break;
5fddcffc 776
bdcbe496 777 /* Outside a directive, invalidate controlling macros. At file
14baae01 778 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 779 get here and MI optimisation works. */
5fddcffc 780 pfile->mi_valid = false;
bdcbe496
NB
781
782 if (!pfile->state.skipping || result->type == CPP_EOF)
783 break;
5fddcffc
NB
784 }
785
345894b4 786 return result;
5fddcffc
NB
787}
788
26aea073
NB
789/* Returns true if a fresh line has been loaded. */
790bool
791_cpp_get_fresh_line (pfile)
004cb263
NB
792 cpp_reader *pfile;
793{
26aea073
NB
794 /* We can't get a new line until we leave the current directive. */
795 if (pfile->state.in_directive)
796 return false;
df383483 797
26aea073 798 for (;;)
1a76916c 799 {
26aea073 800 cpp_buffer *buffer = pfile->buffer;
1a76916c 801
26aea073
NB
802 if (!buffer->need_line)
803 return true;
804
805 if (buffer->next_line < buffer->rlimit)
004cb263 806 {
26aea073
NB
807 _cpp_clean_line (pfile);
808 return true;
809 }
004cb263 810
26aea073
NB
811 /* First, get out of parsing arguments state. */
812 if (pfile->state.parsing_args)
813 return false;
814
815 /* End of buffer. Non-empty files should end in a newline. */
816 if (buffer->buf != buffer->rlimit
817 && buffer->next_line > buffer->rlimit
818 && !buffer->from_stage3)
819 {
820 /* Only warn once. */
821 buffer->next_line = buffer->rlimit;
822 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
823 CPP_BUF_COLUMN (buffer, buffer->cur),
824 "no newline at end of file");
825 }
826
827 if (buffer->return_at_eof)
828 {
829 buffer->return_at_eof = false;
830 return false;
004cb263 831 }
004cb263 832
26aea073
NB
833 if (!buffer->prev)
834 return false;
835
836 _cpp_pop_buffer (pfile);
837 }
004cb263
NB
838}
839
480709cc
NB
840#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
841 do { \
842 if (get_effective_char (pfile) == CHAR) \
843 result->type = THEN_TYPE; \
844 else \
845 { \
846 BACKUP (); \
847 result->type = ELSE_TYPE; \
848 } \
849 } while (0)
850
14baae01
NB
851/* Lex a token into pfile->cur_token, which is also incremented, to
852 get diagnostics pointing to the correct location.
853
854 Does not handle issues such as token lookahead, multiple-include
855 optimisation, directives, skipping etc. This function is only
856 suitable for use by _cpp_lex_token, and in special cases like
857 lex_expansion_token which doesn't care for any of these issues.
858
859 When meeting a newline, returns CPP_EOF if parsing a directive,
860 otherwise returns to the start of the token buffer if permissible.
861 Returns the location of the lexed token. */
862cpp_token *
863_cpp_lex_direct (pfile)
5fddcffc 864 cpp_reader *pfile;
45b966db 865{
0d9f234d 866 cppchar_t c;
adb84b42 867 cpp_buffer *buffer;
0d9f234d 868 const unsigned char *comment_start;
14baae01 869 cpp_token *result = pfile->cur_token++;
9ec7291f 870
5fddcffc 871 fresh_line:
26aea073
NB
872 result->flags = 0;
873 if (pfile->buffer->need_line)
874 {
875 if (!_cpp_get_fresh_line (pfile))
876 {
877 result->type = CPP_EOF;
878 return result;
879 }
880 if (!pfile->keep_tokens)
881 {
882 pfile->cur_run = &pfile->base_run;
883 result = pfile->base_run.base;
884 pfile->cur_token = result + 1;
885 }
886 result->flags = BOL;
887 if (pfile->state.parsing_args == 2)
888 result->flags |= PREV_WHITE;
889 }
adb84b42 890 buffer = pfile->buffer;
5fddcffc 891 update_tokens_line:
1444f2ed 892 result->line = pfile->line;
041c3194 893
5fddcffc 894 skipped_white:
26aea073
NB
895 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
896 && !pfile->overlaid_buffer)
897 {
898 _cpp_process_line_notes (pfile, false);
899 result->line = pfile->line;
900 }
480709cc 901 c = *buffer->cur++;
5fddcffc 902 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
5fddcffc 903
0d9f234d 904 switch (c)
45b966db 905 {
4d6baafa
NB
906 case ' ': case '\t': case '\f': case '\v': case '\0':
907 result->flags |= PREV_WHITE;
26aea073
NB
908 skip_whitespace (pfile, c);
909 goto skipped_white;
0d9f234d 910
26aea073
NB
911 case '\n':
912 pfile->line++;
913 buffer->need_line = true;
914 goto fresh_line;
46d07497 915
0d9f234d
NB
916 case '0': case '1': case '2': case '3': case '4':
917 case '5': case '6': case '7': case '8': case '9':
918 result->type = CPP_NUMBER;
10cf9bde 919 parse_number (pfile, &result->val.str, 0);
0d9f234d 920 break;
46d07497 921
0abc6a6a
NB
922 case 'L':
923 /* 'L' may introduce wide characters or strings. */
df383483
KH
924 {
925 const unsigned char *pos = buffer->cur;
0d9f234d 926
df383483
KH
927 c = get_effective_char (pfile);
928 if (c == '\'' || c == '"')
929 {
930 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
931 parse_string (pfile, result, c);
932 break;
933 }
934 buffer->cur = pos;
935 }
936 /* Fall through. */
0abc6a6a
NB
937
938 start_ident:
0d9f234d
NB
939 case '_':
940 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
941 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
942 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
943 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
944 case 'y': case 'z':
945 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 946 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
947 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
948 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
949 case 'Y': case 'Z':
950 result->type = CPP_NAME;
2c3fcba6 951 result->val.node = parse_identifier (pfile);
0d9f234d 952
0d9f234d 953 /* Convert named operators to their proper types. */
0abc6a6a 954 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
955 {
956 result->flags |= NAMED_OP;
4977bab6 957 result->type = result->val.node->directive_index;
0d9f234d
NB
958 }
959 break;
960
961 case '\'':
962 case '"':
963 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
0d9f234d
NB
964 parse_string (pfile, result, c);
965 break;
041c3194 966
0d9f234d 967 case '/':
1c6d33ef
NB
968 /* A potential block or line comment. */
969 comment_start = buffer->cur;
29401c30 970 c = get_effective_char (pfile);
480709cc 971
1c6d33ef
NB
972 if (c == '*')
973 {
26aea073 974 if (_cpp_skip_block_comment (pfile))
ebef4e8c 975 cpp_error (pfile, DL_ERROR, "unterminated comment");
0d9f234d 976 }
480709cc
NB
977 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
978 || CPP_IN_SYSTEM_HEADER (pfile)))
0d9f234d 979 {
bdb05a7b
NB
980 /* Warn about comments only if pedantically GNUC89, and not
981 in system headers. */
982 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 983 && ! buffer->warned_cplusplus_comments)
041c3194 984 {
ebef4e8c 985 cpp_error (pfile, DL_PEDWARN,
56508306 986 "C++ style comments are not allowed in ISO C90");
ebef4e8c
NB
987 cpp_error (pfile, DL_PEDWARN,
988 "(this will be reported only once per input file)");
1c6d33ef
NB
989 buffer->warned_cplusplus_comments = 1;
990 }
0d9f234d 991
01ef6563 992 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
ebef4e8c 993 cpp_error (pfile, DL_WARNING, "multi-line comment");
1c6d33ef 994 }
480709cc
NB
995 else if (c == '=')
996 {
997 result->type = CPP_DIV_EQ;
998 break;
999 }
1000 else
1001 {
1002 BACKUP ();
1003 result->type = CPP_DIV;
1004 break;
1005 }
0d9f234d 1006
1c6d33ef
NB
1007 if (!pfile->state.save_comments)
1008 {
1009 result->flags |= PREV_WHITE;
5fddcffc 1010 goto update_tokens_line;
0d9f234d 1011 }
1c6d33ef
NB
1012
1013 /* Save the comment as a token in its own right. */
477cdac7 1014 save_comment (pfile, result, comment_start, c);
bdcbe496 1015 break;
0d9f234d
NB
1016
1017 case '<':
1018 if (pfile->state.angled_headers)
1019 {
1020 result->type = CPP_HEADER_NAME;
480709cc
NB
1021 parse_string (pfile, result, '>');
1022 break;
0d9f234d 1023 }
45b966db 1024
29401c30 1025 c = get_effective_char (pfile);
0d9f234d 1026 if (c == '=')
480709cc 1027 result->type = CPP_LESS_EQ;
0d9f234d 1028 else if (c == '<')
480709cc 1029 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 1030 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc 1031 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d
NB
1032 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1033 {
480709cc 1034 result->type = CPP_OPEN_SQUARE;
0d9f234d
NB
1035 result->flags |= DIGRAPH;
1036 }
1037 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1038 {
480709cc 1039 result->type = CPP_OPEN_BRACE;
0d9f234d
NB
1040 result->flags |= DIGRAPH;
1041 }
480709cc
NB
1042 else
1043 {
1044 BACKUP ();
1045 result->type = CPP_LESS;
1046 }
0d9f234d
NB
1047 break;
1048
1049 case '>':
29401c30 1050 c = get_effective_char (pfile);
0d9f234d 1051 if (c == '=')
480709cc 1052 result->type = CPP_GREATER_EQ;
0d9f234d 1053 else if (c == '>')
480709cc 1054 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
0d9f234d 1055 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc
NB
1056 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1057 else
0d9f234d 1058 {
480709cc
NB
1059 BACKUP ();
1060 result->type = CPP_GREATER;
0d9f234d
NB
1061 }
1062 break;
1063
cbcff6df 1064 case '%':
480709cc
NB
1065 c = get_effective_char (pfile);
1066 if (c == '=')
1067 result->type = CPP_MOD_EQ;
1068 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1069 {
1070 result->flags |= DIGRAPH;
1071 result->type = CPP_HASH;
1072 if (get_effective_char (pfile) == '%')
1073 {
1074 const unsigned char *pos = buffer->cur;
1075
1076 if (get_effective_char (pfile) == ':')
1077 result->type = CPP_PASTE;
1078 else
1079 buffer->cur = pos - 1;
1080 }
1081 else
1082 BACKUP ();
1083 }
1084 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1085 {
1086 result->flags |= DIGRAPH;
1087 result->type = CPP_CLOSE_BRACE;
1088 }
1089 else
1090 {
1091 BACKUP ();
1092 result->type = CPP_MOD;
1093 }
0d9f234d
NB
1094 break;
1095
cbcff6df 1096 case '.':
480709cc
NB
1097 result->type = CPP_DOT;
1098 c = get_effective_char (pfile);
1099 if (c == '.')
1100 {
1101 const unsigned char *pos = buffer->cur;
1102
1103 if (get_effective_char (pfile) == '.')
1104 result->type = CPP_ELLIPSIS;
1105 else
1106 buffer->cur = pos - 1;
1107 }
1108 /* All known character sets have 0...9 contiguous. */
0df6c2c7 1109 else if (ISDIGIT (c))
480709cc
NB
1110 {
1111 result->type = CPP_NUMBER;
10cf9bde 1112 parse_number (pfile, &result->val.str, 1);
480709cc
NB
1113 }
1114 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1115 result->type = CPP_DOT_STAR;
1116 else
1117 BACKUP ();
0d9f234d 1118 break;
45b966db 1119
0d9f234d 1120 case '+':
29401c30 1121 c = get_effective_char (pfile);
480709cc
NB
1122 if (c == '+')
1123 result->type = CPP_PLUS_PLUS;
1124 else if (c == '=')
1125 result->type = CPP_PLUS_EQ;
1126 else
1127 {
1128 BACKUP ();
1129 result->type = CPP_PLUS;
1130 }
0d9f234d 1131 break;
04e3ec78 1132
0d9f234d 1133 case '-':
29401c30 1134 c = get_effective_char (pfile);
0d9f234d
NB
1135 if (c == '>')
1136 {
480709cc
NB
1137 result->type = CPP_DEREF;
1138 if (CPP_OPTION (pfile, cplusplus))
1139 {
1140 if (get_effective_char (pfile) == '*')
1141 result->type = CPP_DEREF_STAR;
1142 else
1143 BACKUP ();
1144 }
0d9f234d 1145 }
0d9f234d 1146 else if (c == '-')
480709cc
NB
1147 result->type = CPP_MINUS_MINUS;
1148 else if (c == '=')
1149 result->type = CPP_MINUS_EQ;
1150 else
1151 {
1152 BACKUP ();
1153 result->type = CPP_MINUS;
1154 }
0d9f234d 1155 break;
45b966db 1156
0d9f234d 1157 case '&':
29401c30 1158 c = get_effective_char (pfile);
480709cc
NB
1159 if (c == '&')
1160 result->type = CPP_AND_AND;
1161 else if (c == '=')
1162 result->type = CPP_AND_EQ;
1163 else
1164 {
1165 BACKUP ();
1166 result->type = CPP_AND;
1167 }
0d9f234d 1168 break;
df383483 1169
0d9f234d 1170 case '|':
29401c30 1171 c = get_effective_char (pfile);
480709cc
NB
1172 if (c == '|')
1173 result->type = CPP_OR_OR;
1174 else if (c == '=')
1175 result->type = CPP_OR_EQ;
1176 else
1177 {
1178 BACKUP ();
1179 result->type = CPP_OR;
1180 }
0d9f234d 1181 break;
45b966db 1182
0d9f234d 1183 case ':':
29401c30 1184 c = get_effective_char (pfile);
0d9f234d 1185 if (c == ':' && CPP_OPTION (pfile, cplusplus))
480709cc 1186 result->type = CPP_SCOPE;
0d9f234d
NB
1187 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1188 {
1189 result->flags |= DIGRAPH;
480709cc
NB
1190 result->type = CPP_CLOSE_SQUARE;
1191 }
1192 else
1193 {
1194 BACKUP ();
1195 result->type = CPP_COLON;
0d9f234d
NB
1196 }
1197 break;
45b966db 1198
480709cc
NB
1199 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1200 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1201 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1202 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1203 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1204
26aea073 1205 case '?': result->type = CPP_QUERY; break;
0d9f234d
NB
1206 case '~': result->type = CPP_COMPL; break;
1207 case ',': result->type = CPP_COMMA; break;
1208 case '(': result->type = CPP_OPEN_PAREN; break;
1209 case ')': result->type = CPP_CLOSE_PAREN; break;
1210 case '[': result->type = CPP_OPEN_SQUARE; break;
1211 case ']': result->type = CPP_CLOSE_SQUARE; break;
1212 case '{': result->type = CPP_OPEN_BRACE; break;
1213 case '}': result->type = CPP_CLOSE_BRACE; break;
1214 case ';': result->type = CPP_SEMICOLON; break;
1215
40f03658 1216 /* @ is a punctuator in Objective-C. */
cc937581 1217 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1218
0abc6a6a
NB
1219 case '$':
1220 if (CPP_OPTION (pfile, dollars_in_ident))
1221 goto start_ident;
1222 /* Fall through... */
1223
0d9f234d
NB
1224 default:
1225 result->type = CPP_OTHER;
6c53ebff 1226 result->val.c = c;
0d9f234d
NB
1227 break;
1228 }
bdcbe496
NB
1229
1230 return result;
0d9f234d
NB
1231}
1232
5d8ebbd8 1233/* An upper bound on the number of bytes needed to spell TOKEN,
93c80368
NB
1234 including preceding whitespace. */
1235unsigned int
1236cpp_token_len (token)
1237 const cpp_token *token;
0d9f234d 1238{
93c80368 1239 unsigned int len;
6d2c2047 1240
93c80368 1241 switch (TOKEN_SPELL (token))
041c3194 1242 {
a28c5035 1243 default: len = 0; break;
47ad4138 1244 case SPELL_NUMBER:
a28c5035
NB
1245 case SPELL_STRING: len = token->val.str.len; break;
1246 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1247 }
47ad4138 1248 /* 1 for whitespace, 4 for comment delimiters. */
93c80368 1249 return len + 5;
6d2c2047
ZW
1250}
1251
041c3194 1252/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1253 already contain the enough space to hold the token's spelling.
1254 Returns a pointer to the character after the last character
1255 written. */
93c80368
NB
1256unsigned char *
1257cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1258 cpp_reader *pfile; /* Would be nice to be rid of this... */
1259 const cpp_token *token;
1260 unsigned char *buffer;
1261{
96be6998 1262 switch (TOKEN_SPELL (token))
041c3194
ZW
1263 {
1264 case SPELL_OPERATOR:
1265 {
1266 const unsigned char *spelling;
1267 unsigned char c;
d6d5f795 1268
041c3194 1269 if (token->flags & DIGRAPH)
37b8524c
JDA
1270 spelling
1271 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1272 else if (token->flags & NAMED_OP)
1273 goto spell_ident;
041c3194 1274 else
96be6998 1275 spelling = TOKEN_NAME (token);
df383483 1276
041c3194
ZW
1277 while ((c = *spelling++) != '\0')
1278 *buffer++ = c;
1279 }
1280 break;
d6d5f795 1281
47ad4138
ZW
1282 case SPELL_CHAR:
1283 *buffer++ = token->val.c;
1284 break;
1285
1286 spell_ident:
041c3194 1287 case SPELL_IDENT:
a28c5035
NB
1288 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1289 buffer += NODE_LEN (token->val.node);
041c3194 1290 break;
d6d5f795 1291
47ad4138
ZW
1292 case SPELL_NUMBER:
1293 memcpy (buffer, token->val.str.text, token->val.str.len);
1294 buffer += token->val.str.len;
1295 break;
1296
041c3194
ZW
1297 case SPELL_STRING:
1298 {
ba89d661
ZW
1299 int left, right, tag;
1300 switch (token->type)
1301 {
1302 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1303 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1304 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1305 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1306 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138 1307 default:
ebef4e8c
NB
1308 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1309 TOKEN_NAME (token));
47ad4138 1310 return buffer;
ba89d661
ZW
1311 }
1312 if (tag) *buffer++ = tag;
47ad4138 1313 *buffer++ = left;
bfb9dc7f
ZW
1314 memcpy (buffer, token->val.str.text, token->val.str.len);
1315 buffer += token->val.str.len;
47ad4138 1316 *buffer++ = right;
041c3194
ZW
1317 }
1318 break;
d6d5f795 1319
041c3194 1320 case SPELL_NONE:
ebef4e8c 1321 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1322 break;
1323 }
d6d5f795 1324
041c3194
ZW
1325 return buffer;
1326}
d6d5f795 1327
5d8ebbd8
NB
1328/* Returns TOKEN spelt as a null-terminated string. The string is
1329 freed when the reader is destroyed. Useful for diagnostics. */
93c80368
NB
1330unsigned char *
1331cpp_token_as_text (pfile, token)
c5a04734 1332 cpp_reader *pfile;
041c3194 1333 const cpp_token *token;
c5a04734 1334{
93c80368 1335 unsigned int len = cpp_token_len (token);
ece54d54 1336 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1337
93c80368
NB
1338 end = cpp_spell_token (pfile, token, start);
1339 end[0] = '\0';
c5a04734 1340
93c80368
NB
1341 return start;
1342}
c5a04734 1343
5d8ebbd8
NB
1344/* Used by C front ends, which really should move to using
1345 cpp_token_as_text. */
93c80368
NB
1346const char *
1347cpp_type2name (type)
1348 enum cpp_ttype type;
1349{
1350 return (const char *) token_spellings[type].name;
1351}
c5a04734 1352
4ed5bcfb
NB
1353/* Writes the spelling of token to FP, without any preceding space.
1354 Separated from cpp_spell_token for efficiency - to avoid stdio
1355 double-buffering. */
93c80368
NB
1356void
1357cpp_output_token (token, fp)
1358 const cpp_token *token;
1359 FILE *fp;
1360{
93c80368 1361 switch (TOKEN_SPELL (token))
c5a04734 1362 {
93c80368
NB
1363 case SPELL_OPERATOR:
1364 {
1365 const unsigned char *spelling;
3b681e9d 1366 int c;
c5a04734 1367
93c80368 1368 if (token->flags & DIGRAPH)
37b8524c
JDA
1369 spelling
1370 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1371 else if (token->flags & NAMED_OP)
1372 goto spell_ident;
1373 else
1374 spelling = TOKEN_NAME (token);
041c3194 1375
3b681e9d
ZW
1376 c = *spelling;
1377 do
1378 putc (c, fp);
1379 while ((c = *++spelling) != '\0');
93c80368
NB
1380 }
1381 break;
041c3194 1382
47ad4138
ZW
1383 case SPELL_CHAR:
1384 putc (token->val.c, fp);
1385 break;
1386
93c80368
NB
1387 spell_ident:
1388 case SPELL_IDENT:
3b681e9d 1389 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1390 break;
041c3194 1391
47ad4138
ZW
1392 case SPELL_NUMBER:
1393 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1394 break;
1395
93c80368
NB
1396 case SPELL_STRING:
1397 {
1398 int left, right, tag;
1399 switch (token->type)
1400 {
1401 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1402 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1403 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1404 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1405 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1406 default:
1407 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1408 return;
93c80368
NB
1409 }
1410 if (tag) putc (tag, fp);
47ad4138 1411 putc (left, fp);
93c80368 1412 fwrite (token->val.str.text, 1, token->val.str.len, fp);
47ad4138 1413 putc (right, fp);
93c80368
NB
1414 }
1415 break;
c5a04734 1416
93c80368
NB
1417 case SPELL_NONE:
1418 /* An error, most probably. */
1419 break;
041c3194 1420 }
c5a04734
ZW
1421}
1422
93c80368
NB
1423/* Compare two tokens. */
1424int
1425_cpp_equiv_tokens (a, b)
1426 const cpp_token *a, *b;
c5a04734 1427{
93c80368
NB
1428 if (a->type == b->type && a->flags == b->flags)
1429 switch (TOKEN_SPELL (a))
1430 {
1431 default: /* Keep compiler happy. */
1432 case SPELL_OPERATOR:
1433 return 1;
1434 case SPELL_CHAR:
6c53ebff 1435 return a->val.c == b->val.c; /* Character. */
93c80368 1436 case SPELL_NONE:
56051c0a 1437 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1438 case SPELL_IDENT:
1439 return a->val.node == b->val.node;
47ad4138 1440 case SPELL_NUMBER:
93c80368
NB
1441 case SPELL_STRING:
1442 return (a->val.str.len == b->val.str.len
1443 && !memcmp (a->val.str.text, b->val.str.text,
1444 a->val.str.len));
1445 }
c5a04734 1446
041c3194
ZW
1447 return 0;
1448}
1449
93c80368
NB
1450/* Returns nonzero if a space should be inserted to avoid an
1451 accidental token paste for output. For simplicity, it is
1452 conservative, and occasionally advises a space where one is not
1453 needed, e.g. "." and ".2". */
93c80368
NB
1454int
1455cpp_avoid_paste (pfile, token1, token2)
c5a04734 1456 cpp_reader *pfile;
93c80368 1457 const cpp_token *token1, *token2;
c5a04734 1458{
93c80368
NB
1459 enum cpp_ttype a = token1->type, b = token2->type;
1460 cppchar_t c;
c5a04734 1461
93c80368
NB
1462 if (token1->flags & NAMED_OP)
1463 a = CPP_NAME;
1464 if (token2->flags & NAMED_OP)
1465 b = CPP_NAME;
c5a04734 1466
93c80368
NB
1467 c = EOF;
1468 if (token2->flags & DIGRAPH)
37b8524c 1469 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1470 else if (token_spellings[b].category == SPELL_OPERATOR)
1471 c = token_spellings[b].name[0];
c5a04734 1472
93c80368 1473 /* Quickly get everything that can paste with an '='. */
37b8524c 1474 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1475 return 1;
c5a04734 1476
93c80368 1477 switch (a)
c5a04734 1478 {
93c80368
NB
1479 case CPP_GREATER: return c == '>' || c == '?';
1480 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1481 case CPP_PLUS: return c == '+';
1482 case CPP_MINUS: return c == '-' || c == '>';
1483 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1484 case CPP_MOD: return c == ':' || c == '>';
1485 case CPP_AND: return c == '&';
1486 case CPP_OR: return c == '|';
1487 case CPP_COLON: return c == ':' || c == '>';
1488 case CPP_DEREF: return c == '*';
26ec42ee 1489 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1490 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1491 case CPP_NAME: return ((b == CPP_NUMBER
1492 && name_p (pfile, &token2->val.str))
1493 || b == CPP_NAME
1494 || b == CPP_CHAR || b == CPP_STRING); /* L */
1495 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1496 || c == '.' || c == '+' || c == '-');
1497 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1498 && token1->val.c == '@'
93c80368
NB
1499 && (b == CPP_NAME || b == CPP_STRING));
1500 default: break;
c5a04734 1501 }
c5a04734 1502
417f3e3a 1503 return 0;
c5a04734
ZW
1504}
1505
93c80368 1506/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1507 character, to FP. Leading whitespace is removed. If there are
1508 macros, special token padding is not performed. */
c5a04734 1509void
93c80368 1510cpp_output_line (pfile, fp)
c5a04734 1511 cpp_reader *pfile;
93c80368 1512 FILE *fp;
c5a04734 1513{
4ed5bcfb 1514 const cpp_token *token;
96be6998 1515
4ed5bcfb
NB
1516 token = cpp_get_token (pfile);
1517 while (token->type != CPP_EOF)
96be6998 1518 {
4ed5bcfb
NB
1519 cpp_output_token (token, fp);
1520 token = cpp_get_token (pfile);
1521 if (token->flags & PREV_WHITE)
1522 putc (' ', fp);
96be6998
ZW
1523 }
1524
93c80368 1525 putc ('\n', fp);
041c3194 1526}
c5a04734 1527
c8a96070
NB
1528/* Returns the value of a hexadecimal digit. */
1529static unsigned int
1530hex_digit_value (c)
1531 unsigned int c;
1532{
9e1ac915
KG
1533 if (hex_p (c))
1534 return hex_value (c);
1535 else
1536 abort ();
c8a96070
NB
1537}
1538
62729350
NB
1539/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1540 failure if cpplib is not parsing C++ or C99. Such failure is
1541 silent, and no variables are updated. Otherwise returns 0, and
1542 warns if -Wtraditional.
c8a96070
NB
1543
1544 [lex.charset]: The character designated by the universal character
1545 name \UNNNNNNNN is that character whose character short name in
1546 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1547 universal character name \uNNNN is that character whose character
1548 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1549 for a universal character name is less than 0x20 or in the range
1550 0x7F-0x9F (inclusive), or if the universal character name
1551 designates a character in the basic source character set, then the
1552 program is ill-formed.
1553
1554 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1555 mapping. Is this ever wrong?
c8a96070 1556
62729350
NB
1557 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1558 LIMIT is the end of the string or charconst. PSTR is updated to
1559 point after the UCS on return, and the UCS is written into PC. */
1560
1561static int
1562maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1563 cpp_reader *pfile;
1564 const unsigned char **pstr;
1565 const unsigned char *limit;
625458d0 1566 cppchar_t *pc;
c8a96070
NB
1567{
1568 const unsigned char *p = *pstr;
62729350
NB
1569 unsigned int code = 0;
1570 unsigned int c = *pc, length;
1571
1572 /* Only attempt to interpret a UCS for C++ and C99. */
1573 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1574 return 1;
c8a96070 1575
62729350 1576 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1577 cpp_error (pfile, DL_WARNING,
1578 "the meaning of '\\%c' is different in traditional C", c);
c8a96070 1579
f8710242
NB
1580 length = (c == 'u' ? 4: 8);
1581
1582 if ((size_t) (limit - p) < length)
1583 {
ebef4e8c 1584 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
f8710242
NB
1585 /* Skip to the end to avoid more diagnostics. */
1586 p = limit;
1587 }
1588 else
1589 {
1590 for (; length; length--, p++)
c8a96070 1591 {
f8710242
NB
1592 c = *p;
1593 if (ISXDIGIT (c))
1594 code = (code << 4) + hex_digit_value (c);
1595 else
1596 {
ebef4e8c 1597 cpp_error (pfile, DL_ERROR,
f8710242
NB
1598 "non-hex digit '%c' in universal-character-name", c);
1599 /* We shouldn't skip in case there are multibyte chars. */
1600 break;
1601 }
c8a96070 1602 }
c8a96070
NB
1603 }
1604
783e2989
NB
1605 if (CPP_OPTION (pfile, EBCDIC))
1606 {
1607 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1608 code = 0x3f; /* EBCDIC invalid character */
1609 }
1610 /* True extended characters are OK. */
1611 else if (code >= 0xa0
1612 && !(code & 0x80000000)
1613 && !(code >= 0xD800 && code <= 0xDFFF))
f8710242
NB
1614 ;
1615 /* The standard permits $, @ and ` to be specified as UCNs. We use
1616 hex escapes so that this also works with EBCDIC hosts. */
1617 else if (code == 0x24 || code == 0x40 || code == 0x60)
1618 ;
1619 /* Don't give another error if one occurred above. */
1620 else if (length == 0)
ebef4e8c 1621 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
c8a96070
NB
1622
1623 *pstr = p;
62729350
NB
1624 *pc = code;
1625 return 0;
c8a96070
NB
1626}
1627
4268e8bb
NB
1628/* Returns the value of an escape sequence, truncated to the correct
1629 target precision. PSTR points to the input pointer, which is just
1630 after the backslash. LIMIT is how much text we have. WIDE is true
1631 if the escape sequence is part of a wide character constant or
1632 string literal. Handles all relevant diagnostics. */
1633cppchar_t
1634cpp_parse_escape (pfile, pstr, limit, wide)
c8a96070
NB
1635 cpp_reader *pfile;
1636 const unsigned char **pstr;
1637 const unsigned char *limit;
4268e8bb 1638 int wide;
c8a96070 1639{
783e2989
NB
1640 /* Values of \a \b \e \f \n \r \t \v respectively. */
1641 static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
1642 static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
1643
c8a96070 1644 int unknown = 0;
783e2989 1645 const unsigned char *str = *pstr, *charconsts;
4268e8bb
NB
1646 cppchar_t c, mask;
1647 unsigned int width;
1648
783e2989
NB
1649 if (CPP_OPTION (pfile, EBCDIC))
1650 charconsts = ebcdic;
1651 else
1652 charconsts = ascii;
1653
4268e8bb
NB
1654 if (wide)
1655 width = CPP_OPTION (pfile, wchar_precision);
1656 else
1657 width = CPP_OPTION (pfile, char_precision);
1658 if (width < BITS_PER_CPPCHAR_T)
1659 mask = ((cppchar_t) 1 << width) - 1;
1660 else
1661 mask = ~0;
c8a96070 1662
4268e8bb 1663 c = *str++;
c8a96070
NB
1664 switch (c)
1665 {
1666 case '\\': case '\'': case '"': case '?': break;
783e2989
NB
1667 case 'b': c = charconsts[1]; break;
1668 case 'f': c = charconsts[3]; break;
1669 case 'n': c = charconsts[4]; break;
1670 case 'r': c = charconsts[5]; break;
1671 case 't': c = charconsts[6]; break;
1672 case 'v': c = charconsts[7]; break;
c8a96070
NB
1673
1674 case '(': case '{': case '[': case '%':
1675 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1676 '\%' is used to prevent SCCS from getting confused. */
1677 unknown = CPP_PEDANTIC (pfile);
1678 break;
1679
1680 case 'a':
1681 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1682 cpp_error (pfile, DL_WARNING,
1683 "the meaning of '\\a' is different in traditional C");
783e2989 1684 c = charconsts[0];
c8a96070
NB
1685 break;
1686
1687 case 'e': case 'E':
1688 if (CPP_PEDANTIC (pfile))
ebef4e8c 1689 cpp_error (pfile, DL_PEDWARN,
625458d0 1690 "non-ISO-standard escape sequence, '\\%c'", (int) c);
783e2989 1691 c = charconsts[2];
c8a96070 1692 break;
df383483 1693
c8a96070 1694 case 'u': case 'U':
62729350 1695 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1696 break;
1697
1698 case 'x':
1699 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1700 cpp_error (pfile, DL_WARNING,
1701 "the meaning of '\\x' is different in traditional C");
c8a96070 1702
df383483
KH
1703 {
1704 cppchar_t i = 0, overflow = 0;
1705 int digits_found = 0;
c8a96070 1706
df383483
KH
1707 while (str < limit)
1708 {
1709 c = *str;
1710 if (! ISXDIGIT (c))
1711 break;
1712 str++;
1713 overflow |= i ^ (i << 4 >> 4);
1714 i = (i << 4) + hex_digit_value (c);
1715 digits_found = 1;
1716 }
c8a96070 1717
df383483
KH
1718 if (!digits_found)
1719 cpp_error (pfile, DL_ERROR,
ebef4e8c 1720 "\\x used with no following hex digits");
c8a96070 1721
df383483
KH
1722 if (overflow | (i != (i & mask)))
1723 {
1724 cpp_error (pfile, DL_PEDWARN,
1725 "hex escape sequence out of range");
1726 i &= mask;
1727 }
1728 c = i;
1729 }
c8a96070
NB
1730 break;
1731
1732 case '0': case '1': case '2': case '3':
1733 case '4': case '5': case '6': case '7':
1734 {
4268e8bb
NB
1735 size_t count = 0;
1736 cppchar_t i = c - '0';
c8a96070
NB
1737
1738 while (str < limit && ++count < 3)
1739 {
1740 c = *str;
1741 if (c < '0' || c > '7')
1742 break;
1743 str++;
1744 i = (i << 3) + c - '0';
1745 }
1746
1747 if (i != (i & mask))
1748 {
ebef4e8c
NB
1749 cpp_error (pfile, DL_PEDWARN,
1750 "octal escape sequence out of range");
c8a96070
NB
1751 i &= mask;
1752 }
1753 c = i;
1754 }
1755 break;
1756
1757 default:
1758 unknown = 1;
1759 break;
1760 }
1761
1762 if (unknown)
1763 {
1764 if (ISGRAPH (c))
625458d0
NB
1765 cpp_error (pfile, DL_PEDWARN,
1766 "unknown escape sequence '\\%c'", (int) c);
c8a96070 1767 else
625458d0
NB
1768 cpp_error (pfile, DL_PEDWARN,
1769 "unknown escape sequence: '\\%03o'", (int) c);
c8a96070
NB
1770 }
1771
62729350 1772 if (c > mask)
4268e8bb 1773 {
639e8b0c 1774 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
4268e8bb
NB
1775 c &= mask;
1776 }
62729350 1777
c8a96070
NB
1778 *pstr = str;
1779 return c;
1780}
1781
c8a96070 1782/* Interpret a (possibly wide) character constant in TOKEN.
4268e8bb
NB
1783 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1784 points to a variable that is filled in with the number of
1785 characters seen, and UNSIGNEDP to a variable that indicates whether
1786 the result has signed type. */
1787cppchar_t
a5a49440 1788cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
c8a96070
NB
1789 cpp_reader *pfile;
1790 const cpp_token *token;
c8a96070 1791 unsigned int *pchars_seen;
4268e8bb 1792 int *unsignedp;
c8a96070
NB
1793{
1794 const unsigned char *str = token->val.str.text;
1795 const unsigned char *limit = str + token->val.str.len;
1796 unsigned int chars_seen = 0;
639e8b0c 1797 size_t width, max_chars;
4268e8bb 1798 cppchar_t c, mask, result = 0;
a47ed310 1799 bool unsigned_p;
c8a96070 1800
c8a96070
NB
1801 /* Width in bits. */
1802 if (token->type == CPP_CHAR)
a47ed310 1803 {
4268e8bb 1804 width = CPP_OPTION (pfile, char_precision);
2443d4e1 1805 max_chars = CPP_OPTION (pfile, int_precision) / width;
44a147ad 1806 unsigned_p = CPP_OPTION (pfile, unsigned_char);
a47ed310 1807 }
c8a96070 1808 else
a47ed310 1809 {
4268e8bb 1810 width = CPP_OPTION (pfile, wchar_precision);
2443d4e1 1811 max_chars = 1;
44a147ad 1812 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
a47ed310 1813 }
c8a96070 1814
4268e8bb
NB
1815 if (width < BITS_PER_CPPCHAR_T)
1816 mask = ((cppchar_t) 1 << width) - 1;
c8a96070
NB
1817 else
1818 mask = ~0;
c8a96070
NB
1819
1820 while (str < limit)
1821 {
c8a96070 1822 c = *str++;
c8a96070
NB
1823
1824 if (c == '\\')
4268e8bb 1825 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
c8a96070
NB
1826
1827#ifdef MAP_CHARACTER
1828 if (ISPRINT (c))
1829 c = MAP_CHARACTER (c);
1830#endif
df383483 1831
639e8b0c
NB
1832 chars_seen++;
1833
a5a49440
NB
1834 /* Truncate the character, scale the result and merge the two. */
1835 c &= mask;
639e8b0c 1836 if (width < BITS_PER_CPPCHAR_T)
a5a49440 1837 result = (result << width) | c;
639e8b0c
NB
1838 else
1839 result = c;
c8a96070
NB
1840 }
1841
1842 if (chars_seen == 0)
ebef4e8c 1843 cpp_error (pfile, DL_ERROR, "empty character constant");
639e8b0c 1844 else if (chars_seen > 1)
c8a96070 1845 {
639e8b0c
NB
1846 /* Multichar charconsts are of type int and therefore signed. */
1847 unsigned_p = 0;
a5a49440 1848
639e8b0c
NB
1849 if (chars_seen > max_chars)
1850 {
1851 chars_seen = max_chars;
1852 cpp_error (pfile, DL_WARNING,
1853 "character constant too long for its type");
1854 }
a5a49440 1855 else if (CPP_OPTION (pfile, warn_multichar))
639e8b0c 1856 cpp_error (pfile, DL_WARNING, "multi-character character constant");
c8a96070
NB
1857 }
1858
b9e2d17b
NB
1859 /* Sign-extend or truncate the constant to cppchar_t. The value is
1860 in WIDTH bits, but for multi-char charconsts it's value is the
1861 full target type's width. */
1862 if (chars_seen > 1)
1863 width *= max_chars;
1864 if (width < BITS_PER_CPPCHAR_T)
a5a49440 1865 {
b9e2d17b
NB
1866 mask = ((cppchar_t) 1 << width) - 1;
1867 if (unsigned_p || !(result & (1 << (width - 1))))
1868 result &= mask;
1869 else
1870 result |= ~mask;
a5a49440
NB
1871 }
1872
c8a96070 1873 *pchars_seen = chars_seen;
4268e8bb 1874 *unsignedp = unsigned_p;
c8a96070
NB
1875 return result;
1876}
1877
1e013d2e
NB
1878/* Memory buffers. Changing these three constants can have a dramatic
1879 effect on performance. The values here are reasonable defaults,
1880 but might be tuned. If you adjust them, be sure to test across a
1881 range of uses of cpplib, including heavy nested function-like macro
1882 expansion. Also check the change in peak memory usage (NJAMD is a
1883 good tool for this). */
1884#define MIN_BUFF_SIZE 8000
87062813 1885#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1886#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1887 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1888
87062813
NB
1889#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1890 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1891#endif
1892
c9e7a609
NB
1893/* Create a new allocation buffer. Place the control block at the end
1894 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5
NB
1895static _cpp_buff *
1896new_buff (len)
6142088c 1897 size_t len;
b8af0ca5
NB
1898{
1899 _cpp_buff *result;
ece54d54 1900 unsigned char *base;
b8af0ca5 1901
1e013d2e
NB
1902 if (len < MIN_BUFF_SIZE)
1903 len = MIN_BUFF_SIZE;
c70f6ed3 1904 len = CPP_ALIGN (len);
b8af0ca5
NB
1905
1906 base = xmalloc (len + sizeof (_cpp_buff));
1907 result = (_cpp_buff *) (base + len);
1908 result->base = base;
1909 result->cur = base;
1910 result->limit = base + len;
1911 result->next = NULL;
1912 return result;
1913}
1914
1915/* Place a chain of unwanted allocation buffers on the free list. */
1916void
1917_cpp_release_buff (pfile, buff)
1918 cpp_reader *pfile;
1919 _cpp_buff *buff;
1920{
1921 _cpp_buff *end = buff;
1922
1923 while (end->next)
1924 end = end->next;
1925 end->next = pfile->free_buffs;
1926 pfile->free_buffs = buff;
1927}
1928
1929/* Return a free buffer of size at least MIN_SIZE. */
1930_cpp_buff *
1931_cpp_get_buff (pfile, min_size)
1932 cpp_reader *pfile;
6142088c 1933 size_t min_size;
b8af0ca5
NB
1934{
1935 _cpp_buff *result, **p;
1936
1937 for (p = &pfile->free_buffs;; p = &(*p)->next)
1938 {
6142088c 1939 size_t size;
1e013d2e
NB
1940
1941 if (*p == NULL)
b8af0ca5 1942 return new_buff (min_size);
1e013d2e
NB
1943 result = *p;
1944 size = result->limit - result->base;
1945 /* Return a buffer that's big enough, but don't waste one that's
1946 way too big. */
34f5271d 1947 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
1948 break;
1949 }
1950
1951 *p = result->next;
1952 result->next = NULL;
1953 result->cur = result->base;
1954 return result;
1955}
1956
4fe9b91c 1957/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1958 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1959 the excess bytes to the new buffer. Chains the new buffer after
1960 BUFF, and returns the new buffer. */
b8af0ca5 1961_cpp_buff *
8c3b2693 1962_cpp_append_extend_buff (pfile, buff, min_extra)
b8af0ca5
NB
1963 cpp_reader *pfile;
1964 _cpp_buff *buff;
6142088c 1965 size_t min_extra;
b8af0ca5 1966{
6142088c 1967 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 1968 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 1969
8c3b2693
NB
1970 buff->next = new_buff;
1971 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1972 return new_buff;
1973}
1974
4fe9b91c 1975/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1976 remaining bytes of the buffer pointed to by BUFF, and at least
1977 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1978 Chains the new buffer before the buffer pointed to by BUFF, and
1979 updates the pointer to point to the new buffer. */
1980void
1981_cpp_extend_buff (pfile, pbuff, min_extra)
1982 cpp_reader *pfile;
1983 _cpp_buff **pbuff;
1984 size_t min_extra;
1985{
1986 _cpp_buff *new_buff, *old_buff = *pbuff;
1987 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1988
1989 new_buff = _cpp_get_buff (pfile, size);
1990 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1991 new_buff->next = old_buff;
1992 *pbuff = new_buff;
b8af0ca5
NB
1993}
1994
1995/* Free a chain of buffers starting at BUFF. */
1996void
1997_cpp_free_buff (buff)
1998 _cpp_buff *buff;
1999{
2000 _cpp_buff *next;
2001
2002 for (; buff; buff = next)
2003 {
2004 next = buff->next;
2005 free (buff->base);
2006 }
2007}
417f3e3a 2008
ece54d54
NB
2009/* Allocate permanent, unaligned storage of length LEN. */
2010unsigned char *
2011_cpp_unaligned_alloc (pfile, len)
2012 cpp_reader *pfile;
2013 size_t len;
2014{
2015 _cpp_buff *buff = pfile->u_buff;
2016 unsigned char *result = buff->cur;
2017
2018 if (len > (size_t) (buff->limit - result))
2019 {
2020 buff = _cpp_get_buff (pfile, len);
2021 buff->next = pfile->u_buff;
2022 pfile->u_buff = buff;
2023 result = buff->cur;
2024 }
2025
2026 buff->cur = result + len;
2027 return result;
2028}
2029
87062813
NB
2030/* Allocate permanent, unaligned storage of length LEN from a_buff.
2031 That buffer is used for growing allocations when saving macro
2032 replacement lists in a #define, and when parsing an answer to an
2033 assertion in #assert, #unassert or #if (and therefore possibly
2034 whilst expanding macros). It therefore must not be used by any
2035 code that they might call: specifically the lexer and the guts of
2036 the macro expander.
2037
2038 All existing other uses clearly fit this restriction: storing
2039 registered pragmas during initialization. */
93c80368 2040unsigned char *
8c3b2693
NB
2041_cpp_aligned_alloc (pfile, len)
2042 cpp_reader *pfile;
2043 size_t len;
3fef5b2b 2044{
8c3b2693
NB
2045 _cpp_buff *buff = pfile->a_buff;
2046 unsigned char *result = buff->cur;
3fef5b2b 2047
8c3b2693 2048 if (len > (size_t) (buff->limit - result))
3fef5b2b 2049 {
8c3b2693
NB
2050 buff = _cpp_get_buff (pfile, len);
2051 buff->next = pfile->a_buff;
2052 pfile->a_buff = buff;
2053 result = buff->cur;
3fef5b2b 2054 }
041c3194 2055
8c3b2693 2056 buff->cur = result + len;
93c80368 2057 return result;
041c3194 2058}
This page took 0.979904 seconds and 5 git commands to generate.