1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
38 enum spell_type category
;
39 const unsigned char *name
;
42 static const unsigned char *const digraph_spellings
[] =
43 { UC
"%:", UC
"%:%:", UC
"<:", UC
":>", UC
"<%", UC
"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, UC s },
46 #define TK(e, s) { SPELL_ ## s, UC #e },
47 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
55 static int skip_line_comment (cpp_reader
*);
56 static void skip_whitespace (cpp_reader
*, cppchar_t
);
57 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
58 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
59 static void store_comment (cpp_reader
*, cpp_token
*);
60 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
61 unsigned int, enum cpp_ttype
);
62 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
63 static int name_p (cpp_reader
*, const cpp_string
*);
64 static tokenrun
*next_tokenrun (tokenrun
*);
66 static _cpp_buff
*new_buff (size_t);
71 Compares, the token TOKEN to the NUL-terminated string STRING.
72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
74 cpp_ideq (const cpp_token
*token
, const char *string
)
76 if (token
->type
!= CPP_NAME
)
79 return !ustrcmp (NODE_NAME (token
->val
.node
.node
), (const uchar
*) string
);
82 /* Record a note TYPE at byte POS into the current cleaned logical
85 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
87 if (buffer
->notes_used
== buffer
->notes_cap
)
89 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
90 buffer
->notes
= XRESIZEVEC (_cpp_line_note
, buffer
->notes
,
94 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
95 buffer
->notes
[buffer
->notes_used
].type
= type
;
99 /* Returns with a logical line that contains no escaped newlines or
100 trigraphs. This is a time-critical inner loop. */
102 _cpp_clean_line (cpp_reader
*pfile
)
108 buffer
= pfile
->buffer
;
109 buffer
->cur_note
= buffer
->notes_used
= 0;
110 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
111 buffer
->need_line
= false;
112 s
= buffer
->next_line
- 1;
114 if (!buffer
->from_stage3
)
116 const uchar
*pbackslash
= NULL
;
118 /* Short circuit for the common case of an un-escaped line with
119 no trigraphs. The primary win here is by not writing any
120 data back to memory until we have to. */
124 if (__builtin_expect (c
== '\n', false)
125 || __builtin_expect (c
== '\r', false))
129 if (__builtin_expect (s
== buffer
->rlimit
, false))
132 /* DOS line ending? */
133 if (__builtin_expect (c
== '\r', false)
137 if (s
== buffer
->rlimit
)
141 if (__builtin_expect (pbackslash
== NULL
, true))
144 /* Check for escaped newline. */
146 while (is_nvspace (p
[-1]))
148 if (p
- 1 != pbackslash
)
151 /* Have an escaped newline; process it and proceed to
153 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
155 buffer
->next_line
= p
- 1;
158 if (__builtin_expect (c
== '\\', false))
160 else if (__builtin_expect (c
== '?', false)
161 && __builtin_expect (s
[1] == '?', false)
162 && _cpp_trigraph_map
[s
[2]])
164 /* Have a trigraph. We may or may not have to convert
165 it. Add a line note regardless, for -Wtrigraphs. */
166 add_line_note (buffer
, s
, s
[2]);
167 if (CPP_OPTION (pfile
, trigraphs
))
169 /* We do, and that means we have to switch to the
172 *d
= _cpp_trigraph_map
[s
[2]];
185 if (c
== '\n' || c
== '\r')
187 /* Handle DOS line endings. */
188 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
190 if (s
== buffer
->rlimit
)
195 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
197 if (p
== buffer
->next_line
|| p
[-1] != '\\')
200 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
202 buffer
->next_line
= p
- 1;
204 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
206 /* Add a note regardless, for the benefit of -Wtrigraphs. */
207 add_line_note (buffer
, d
, s
[2]);
208 if (CPP_OPTION (pfile
, trigraphs
))
210 *d
= _cpp_trigraph_map
[s
[2]];
220 while (*s
!= '\n' && *s
!= '\r');
223 /* Handle DOS line endings. */
224 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
230 /* A sentinel note that should never be processed. */
231 add_line_note (buffer
, d
+ 1, '\n');
232 buffer
->next_line
= s
+ 1;
235 /* Return true if the trigraph indicated by NOTE should be warned
236 about in a comment. */
238 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
242 /* Within comments we don't warn about trigraphs, unless the
243 trigraph forms an escaped newline, as that may change
245 if (note
->type
!= '/')
248 /* If -trigraphs, then this was an escaped newline iff the next note
250 if (CPP_OPTION (pfile
, trigraphs
))
251 return note
[1].pos
== note
->pos
;
253 /* Otherwise, see if this forms an escaped newline. */
255 while (is_nvspace (*p
))
258 /* There might have been escaped newlines between the trigraph and the
259 newline we found. Hence the position test. */
260 return (*p
== '\n' && p
< note
[1].pos
);
263 /* Process the notes created by add_line_note as far as the current
266 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
268 cpp_buffer
*buffer
= pfile
->buffer
;
272 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
275 if (note
->pos
> buffer
->cur
)
279 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
281 if (note
->type
== '\\' || note
->type
== ' ')
283 if (note
->type
== ' ' && !in_comment
)
284 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
285 "backslash and newline separated by space");
287 if (buffer
->next_line
> buffer
->rlimit
)
289 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
290 "backslash-newline at end of file");
291 /* Prevent "no newline at end of file" warning. */
292 buffer
->next_line
= buffer
->rlimit
;
295 buffer
->line_base
= note
->pos
;
296 CPP_INCREMENT_LINE (pfile
, 0);
298 else if (_cpp_trigraph_map
[note
->type
])
300 if (CPP_OPTION (pfile
, warn_trigraphs
)
301 && (!in_comment
|| warn_in_comment (pfile
, note
)))
303 if (CPP_OPTION (pfile
, trigraphs
))
304 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
305 "trigraph ??%c converted to %c",
307 (int) _cpp_trigraph_map
[note
->type
]);
311 (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
312 "trigraph ??%c ignored, use -trigraphs to enable",
322 /* Skip a C-style block comment. We find the end of the comment by
323 seeing if an asterisk is before every '/' we encounter. Returns
324 nonzero if comment terminated by EOF, zero otherwise.
326 Buffer->cur points to the initial asterisk of the comment. */
328 _cpp_skip_block_comment (cpp_reader
*pfile
)
330 cpp_buffer
*buffer
= pfile
->buffer
;
331 const uchar
*cur
= buffer
->cur
;
340 /* People like decorating comments with '*', so check for '/'
341 instead for efficiency. */
349 /* Warn about potential nested comments, but not if the '/'
350 comes immediately before the true comment delimiter.
351 Don't bother to get it right across escaped newlines. */
352 if (CPP_OPTION (pfile
, warn_comments
)
353 && cur
[0] == '*' && cur
[1] != '/')
356 cpp_error_with_line (pfile
, CPP_DL_WARNING
,
357 pfile
->line_table
->highest_line
, CPP_BUF_COL (buffer
),
358 "\"/*\" within comment");
364 buffer
->cur
= cur
- 1;
365 _cpp_process_line_notes (pfile
, true);
366 if (buffer
->next_line
>= buffer
->rlimit
)
368 _cpp_clean_line (pfile
);
370 cols
= buffer
->next_line
- buffer
->line_base
;
371 CPP_INCREMENT_LINE (pfile
, cols
);
378 _cpp_process_line_notes (pfile
, true);
382 /* Skip a C++ line comment, leaving buffer->cur pointing to the
383 terminating newline. Handles escaped newlines. Returns nonzero
384 if a multiline comment. */
386 skip_line_comment (cpp_reader
*pfile
)
388 cpp_buffer
*buffer
= pfile
->buffer
;
389 source_location orig_line
= pfile
->line_table
->highest_line
;
391 while (*buffer
->cur
!= '\n')
394 _cpp_process_line_notes (pfile
, true);
395 return orig_line
!= pfile
->line_table
->highest_line
;
398 /* Skips whitespace, saving the next non-whitespace character. */
400 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
402 cpp_buffer
*buffer
= pfile
->buffer
;
403 bool saw_NUL
= false;
407 /* Horizontal space always OK. */
408 if (c
== ' ' || c
== '\t')
410 /* Just \f \v or \0 left. */
413 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
414 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
415 CPP_BUF_COL (buffer
),
416 "%s in preprocessing directive",
417 c
== '\f' ? "form feed" : "vertical tab");
421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
422 while (is_nvspace (c
));
425 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
430 /* See if the characters of a number token are valid in a name (no
433 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
437 for (i
= 0; i
< string
->len
; i
++)
438 if (!is_idchar (string
->text
[i
]))
444 /* After parsing an identifier or other sequence, produce a warning about
445 sequences not in NFC/NFKC. */
447 warn_about_normalization (cpp_reader
*pfile
,
448 const cpp_token
*token
,
449 const struct normalize_state
*s
)
451 if (CPP_OPTION (pfile
, warn_normalize
) < NORMALIZE_STATE_RESULT (s
)
452 && !pfile
->state
.skipping
)
454 /* Make sure that the token is printed using UCNs, even
455 if we'd otherwise happily print UTF-8. */
456 unsigned char *buf
= XNEWVEC (unsigned char, cpp_token_len (token
));
459 sz
= cpp_spell_token (pfile
, token
, buf
, false) - buf
;
460 if (NORMALIZE_STATE_RESULT (s
) == normalized_C
)
461 cpp_error_with_line (pfile
, CPP_DL_WARNING
, token
->src_loc
, 0,
462 "`%.*s' is not in NFKC", (int) sz
, buf
);
464 cpp_error_with_line (pfile
, CPP_DL_WARNING
, token
->src_loc
, 0,
465 "`%.*s' is not in NFC", (int) sz
, buf
);
469 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
470 an identifier. FIRST is TRUE if this starts an identifier. */
472 forms_identifier_p (cpp_reader
*pfile
, int first
,
473 struct normalize_state
*state
)
475 cpp_buffer
*buffer
= pfile
->buffer
;
477 if (*buffer
->cur
== '$')
479 if (!CPP_OPTION (pfile
, dollars_in_ident
))
483 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
485 CPP_OPTION (pfile
, warn_dollars
) = 0;
486 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
492 /* Is this a syntactically valid UCN? */
493 if (CPP_OPTION (pfile
, extended_identifiers
)
494 && *buffer
->cur
== '\\'
495 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
498 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
507 /* Lex an identifier starting at BUFFER->CUR - 1. */
508 static cpp_hashnode
*
509 lex_identifier (cpp_reader
*pfile
, const uchar
*base
, bool starts_ucn
,
510 struct normalize_state
*nst
)
512 cpp_hashnode
*result
;
515 unsigned int hash
= HT_HASHSTEP (0, *base
);
517 cur
= pfile
->buffer
->cur
;
519 while (ISIDNUM (*cur
))
521 hash
= HT_HASHSTEP (hash
, *cur
);
524 pfile
->buffer
->cur
= cur
;
525 if (starts_ucn
|| forms_identifier_p (pfile
, false, nst
))
527 /* Slower version for identifiers containing UCNs (or $). */
529 while (ISIDNUM (*pfile
->buffer
->cur
))
531 pfile
->buffer
->cur
++;
532 NORMALIZE_STATE_UPDATE_IDNUM (nst
);
534 } while (forms_identifier_p (pfile
, false, nst
));
535 result
= _cpp_interpret_identifier (pfile
, base
,
536 pfile
->buffer
->cur
- base
);
541 hash
= HT_HASHFINISH (hash
, len
);
543 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
544 base
, len
, hash
, HT_ALLOC
));
547 /* Rarely, identifiers require diagnostics when lexed. */
548 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
549 && !pfile
->state
.skipping
, 0))
551 /* It is allowed to poison the same identifier twice. */
552 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
553 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
556 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
557 replacement list of a variadic macro. */
558 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
559 && !pfile
->state
.va_args_ok
)
560 cpp_error (pfile
, CPP_DL_PEDWARN
,
561 "__VA_ARGS__ can only appear in the expansion"
562 " of a C99 variadic macro");
564 /* For -Wc++-compat, warn about use of C++ named operators. */
565 if (result
->flags
& NODE_WARN_OPERATOR
)
566 cpp_error (pfile
, CPP_DL_WARNING
,
567 "identifier \"%s\" is a special operator name in C++",
574 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
576 lex_number (cpp_reader
*pfile
, cpp_string
*number
,
577 struct normalize_state
*nst
)
583 base
= pfile
->buffer
->cur
- 1;
586 cur
= pfile
->buffer
->cur
;
588 /* N.B. ISIDNUM does not include $. */
589 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
592 NORMALIZE_STATE_UPDATE_IDNUM (nst
);
595 pfile
->buffer
->cur
= cur
;
597 while (forms_identifier_p (pfile
, false, nst
));
599 number
->len
= cur
- base
;
600 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
601 memcpy (dest
, base
, number
->len
);
602 dest
[number
->len
] = '\0';
606 /* Create a token of type TYPE with a literal spelling. */
608 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
609 unsigned int len
, enum cpp_ttype type
)
611 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
613 memcpy (dest
, base
, len
);
616 token
->val
.str
.len
= len
;
617 token
->val
.str
.text
= dest
;
620 /* Lexes a raw string. The stored string contains the spelling, including
621 double quotes, delimiter string, '[' and ']', any leading
622 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
623 literal, or CPP_OTHER if it was not properly terminated.
625 The spelling is NUL-terminated, but it is not guaranteed that this
626 is the first NUL since embedded NULs are preserved. */
629 lex_raw_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
632 source_location saw_NUL
= 0;
633 const uchar
*raw_prefix
;
634 unsigned int raw_prefix_len
= 0;
636 size_t total_len
= 0;
637 _cpp_buff
*first_buff
= NULL
, *last_buff
= NULL
;
639 type
= (*base
== 'L' ? CPP_WSTRING
:
640 *base
== 'U' ? CPP_STRING32
:
641 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
644 raw_prefix
= cur
+ 1;
645 while (raw_prefix_len
< 16)
647 switch (raw_prefix
[raw_prefix_len
])
649 case ' ': case '[': case ']': case '\t':
650 case '\v': case '\f': case '\n': default:
652 /* Basic source charset except the above chars. */
653 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
654 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
655 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
656 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
658 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
659 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
660 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
661 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
663 case '0': case '1': case '2': case '3': case '4': case '5':
664 case '6': case '7': case '8': case '9':
665 case '_': case '{': case '}': case '#': case '(': case ')':
666 case '<': case '>': case '%': case ':': case ';': case '.':
667 case '?': case '*': case '+': case '-': case '/': case '^':
668 case '&': case '|': case '~': case '!': case '=': case ',':
669 case '\\': case '"': case '\'':
676 if (raw_prefix
[raw_prefix_len
] != '[')
678 int col
= CPP_BUF_COLUMN (pfile
->buffer
, raw_prefix
+ raw_prefix_len
)
680 if (raw_prefix_len
== 16)
681 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, col
,
682 "raw string delimiter longer than 16 characters");
684 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, col
,
685 "invalid character '%c' in raw string delimiter",
686 (int) raw_prefix
[raw_prefix_len
]);
687 pfile
->buffer
->cur
= raw_prefix
- 1;
688 create_literal (pfile
, token
, base
, raw_prefix
- 1 - base
, CPP_OTHER
);
692 cur
= raw_prefix
+ raw_prefix_len
+ 1;
695 cppchar_t c
= *cur
++;
698 && strncmp ((const char *) cur
, (const char *) raw_prefix
,
700 && cur
[raw_prefix_len
] == '"')
702 cur
+= raw_prefix_len
+ 1;
707 if (pfile
->state
.in_directive
708 || pfile
->state
.parsing_args
709 || pfile
->state
.in_deferred_pragma
)
713 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, 0,
714 "unterminated raw string");
718 /* raw strings allow embedded non-escaped newlines, which
719 complicates this routine a lot. */
720 if (first_buff
== NULL
)
722 total_len
= cur
- base
;
723 first_buff
= last_buff
= _cpp_get_buff (pfile
, total_len
);
724 memcpy (BUFF_FRONT (last_buff
), base
, total_len
);
725 raw_prefix
= BUFF_FRONT (last_buff
) + (raw_prefix
- base
);
726 BUFF_FRONT (last_buff
) += total_len
;
730 size_t len
= cur
- base
;
731 size_t cur_len
= len
> BUFF_ROOM (last_buff
)
732 ? BUFF_ROOM (last_buff
) : len
;
735 memcpy (BUFF_FRONT (last_buff
), base
, cur_len
);
736 BUFF_FRONT (last_buff
) += cur_len
;
739 last_buff
= _cpp_append_extend_buff (pfile
, last_buff
,
741 memcpy (BUFF_FRONT (last_buff
), base
+ cur_len
,
743 BUFF_FRONT (last_buff
) += len
- cur_len
;
747 if (pfile
->buffer
->cur
< pfile
->buffer
->rlimit
)
748 CPP_INCREMENT_LINE (pfile
, 0);
749 pfile
->buffer
->need_line
= true;
751 if (!_cpp_get_fresh_line (pfile
))
753 source_location src_loc
= token
->src_loc
;
754 token
->type
= CPP_EOF
;
755 /* Tell the compiler the line number of the EOF token. */
756 token
->src_loc
= pfile
->line_table
->highest_line
;
758 if (first_buff
!= NULL
)
759 _cpp_release_buff (pfile
, first_buff
);
760 cpp_error_with_line (pfile
, CPP_DL_ERROR
, src_loc
, 0,
761 "unterminated raw string");
765 cur
= base
= pfile
->buffer
->cur
;
767 else if (c
== '\0' && !saw_NUL
)
768 LINEMAP_POSITION_FOR_COLUMN (saw_NUL
, pfile
->line_table
,
769 CPP_BUF_COLUMN (pfile
->buffer
, cur
));
772 if (saw_NUL
&& !pfile
->state
.skipping
)
773 cpp_error_with_line (pfile
, CPP_DL_WARNING
, saw_NUL
, 0,
774 "null character(s) preserved in literal");
776 pfile
->buffer
->cur
= cur
;
777 if (first_buff
== NULL
)
778 create_literal (pfile
, token
, base
, cur
- base
, type
);
781 uchar
*dest
= _cpp_unaligned_alloc (pfile
, total_len
+ (cur
- base
) + 1);
784 token
->val
.str
.len
= total_len
+ (cur
- base
);
785 token
->val
.str
.text
= dest
;
786 last_buff
= first_buff
;
787 while (last_buff
!= NULL
)
789 memcpy (dest
, last_buff
->base
,
790 BUFF_FRONT (last_buff
) - last_buff
->base
);
791 dest
+= BUFF_FRONT (last_buff
) - last_buff
->base
;
792 last_buff
= last_buff
->next
;
794 _cpp_release_buff (pfile
, first_buff
);
795 memcpy (dest
, base
, cur
- base
);
796 dest
[cur
- base
] = '\0';
800 /* Lexes a string, character constant, or angle-bracketed header file
801 name. The stored string contains the spelling, including opening
802 quote and any leading 'L', 'u', 'U' or 'u8' and optional
803 'R' modifier. It returns the type of the literal, or CPP_OTHER
804 if it was not properly terminated, or CPP_LESS for an unterminated
805 header name which must be relexed as normal tokens.
807 The spelling is NUL-terminated, but it is not guaranteed that this
808 is the first NUL since embedded NULs are preserved. */
810 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
812 bool saw_NUL
= false;
814 cppchar_t terminator
;
819 if (terminator
== 'L' || terminator
== 'U')
821 else if (terminator
== 'u')
824 if (terminator
== '8')
827 if (terminator
== 'R')
829 lex_raw_string (pfile
, token
, base
, cur
);
832 if (terminator
== '"')
833 type
= (*base
== 'L' ? CPP_WSTRING
:
834 *base
== 'U' ? CPP_STRING32
:
835 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
837 else if (terminator
== '\'')
838 type
= (*base
== 'L' ? CPP_WCHAR
:
839 *base
== 'U' ? CPP_CHAR32
:
840 *base
== 'u' ? CPP_CHAR16
: CPP_CHAR
);
842 terminator
= '>', type
= CPP_HEADER_NAME
;
846 cppchar_t c
= *cur
++;
848 /* In #include-style directives, terminators are not escapable. */
849 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
851 else if (c
== terminator
)
856 /* Unmatched quotes always yield undefined behavior, but
857 greedy lexing means that what appears to be an unterminated
858 header name may actually be a legitimate sequence of tokens. */
859 if (terminator
== '>')
861 token
->type
= CPP_LESS
;
871 if (saw_NUL
&& !pfile
->state
.skipping
)
872 cpp_error (pfile
, CPP_DL_WARNING
,
873 "null character(s) preserved in literal");
875 if (type
== CPP_OTHER
&& CPP_OPTION (pfile
, lang
) != CLK_ASM
)
876 cpp_error (pfile
, CPP_DL_PEDWARN
, "missing terminating %c character",
879 pfile
->buffer
->cur
= cur
;
880 create_literal (pfile
, token
, base
, cur
- base
, type
);
883 /* Return the comment table. The client may not make any assumption
884 about the ordering of the table. */
886 cpp_get_comments (cpp_reader
*pfile
)
888 return &pfile
->comments
;
891 /* Append a comment to the end of the comment table. */
893 store_comment (cpp_reader
*pfile
, cpp_token
*token
)
897 if (pfile
->comments
.allocated
== 0)
899 pfile
->comments
.allocated
= 256;
900 pfile
->comments
.entries
= (cpp_comment
*) xmalloc
901 (pfile
->comments
.allocated
* sizeof (cpp_comment
));
904 if (pfile
->comments
.count
== pfile
->comments
.allocated
)
906 pfile
->comments
.allocated
*= 2;
907 pfile
->comments
.entries
= (cpp_comment
*) xrealloc
908 (pfile
->comments
.entries
,
909 pfile
->comments
.allocated
* sizeof (cpp_comment
));
912 len
= token
->val
.str
.len
;
914 /* Copy comment. Note, token may not be NULL terminated. */
915 pfile
->comments
.entries
[pfile
->comments
.count
].comment
=
916 (char *) xmalloc (sizeof (char) * (len
+ 1));
917 memcpy (pfile
->comments
.entries
[pfile
->comments
.count
].comment
,
918 token
->val
.str
.text
, len
);
919 pfile
->comments
.entries
[pfile
->comments
.count
].comment
[len
] = '\0';
921 /* Set source location. */
922 pfile
->comments
.entries
[pfile
->comments
.count
].sloc
= token
->src_loc
;
924 /* Increment the count of entries in the comment table. */
925 pfile
->comments
.count
++;
928 /* The stored comment includes the comment start and any terminator. */
930 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
933 unsigned char *buffer
;
934 unsigned int len
, clen
;
936 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
938 /* C++ comments probably (not definitely) have moved past a new
939 line, which we don't want to save in the comment. */
940 if (is_vspace (pfile
->buffer
->cur
[-1]))
943 /* If we are currently in a directive, then we need to store all
944 C++ comments as C comments internally, and so we need to
945 allocate a little extra space in that case.
947 Note that the only time we encounter a directive here is
948 when we are saving comments in a "#define". */
949 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
951 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
953 token
->type
= CPP_COMMENT
;
954 token
->val
.str
.len
= clen
;
955 token
->val
.str
.text
= buffer
;
958 memcpy (buffer
+ 1, from
, len
- 1);
960 /* Finish conversion to a C comment, if necessary. */
961 if (pfile
->state
.in_directive
&& type
== '/')
964 buffer
[clen
- 2] = '*';
965 buffer
[clen
- 1] = '/';
968 /* Finally store this comment for use by clients of libcpp. */
969 store_comment (pfile
, token
);
972 /* Allocate COUNT tokens for RUN. */
974 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
976 run
->base
= XNEWVEC (cpp_token
, count
);
977 run
->limit
= run
->base
+ count
;
981 /* Returns the next tokenrun, or creates one if there is none. */
983 next_tokenrun (tokenrun
*run
)
985 if (run
->next
== NULL
)
987 run
->next
= XNEW (tokenrun
);
988 run
->next
->prev
= run
;
989 _cpp_init_tokenrun (run
->next
, 250);
995 /* Look ahead in the input stream. */
997 cpp_peek_token (cpp_reader
*pfile
, int index
)
999 cpp_context
*context
= pfile
->context
;
1000 const cpp_token
*peektok
;
1003 /* First, scan through any pending cpp_context objects. */
1004 while (context
->prev
)
1006 ptrdiff_t sz
= (context
->direct_p
1007 ? LAST (context
).token
- FIRST (context
).token
1008 : LAST (context
).ptoken
- FIRST (context
).ptoken
);
1010 if (index
< (int) sz
)
1011 return (context
->direct_p
1012 ? FIRST (context
).token
+ index
1013 : *(FIRST (context
).ptoken
+ index
));
1016 context
= context
->prev
;
1019 /* We will have to read some new tokens after all (and do so
1020 without invalidating preceding tokens). */
1022 pfile
->keep_tokens
++;
1026 peektok
= _cpp_lex_token (pfile
);
1027 if (peektok
->type
== CPP_EOF
)
1032 _cpp_backup_tokens_direct (pfile
, count
+ 1);
1033 pfile
->keep_tokens
--;
1038 /* Allocate a single token that is invalidated at the same time as the
1039 rest of the tokens on the line. Has its line and col set to the
1040 same as the last lexed token, so that diagnostics appear in the
1043 _cpp_temp_token (cpp_reader
*pfile
)
1045 cpp_token
*old
, *result
;
1046 ptrdiff_t sz
= pfile
->cur_run
->limit
- pfile
->cur_token
;
1047 ptrdiff_t la
= (ptrdiff_t) pfile
->lookaheads
;
1049 old
= pfile
->cur_token
- 1;
1050 /* Any pre-existing lookaheads must not be clobbered. */
1055 tokenrun
*next
= next_tokenrun (pfile
->cur_run
);
1058 memmove (next
->base
+ 1, next
->base
,
1059 (la
- sz
) * sizeof (cpp_token
));
1061 next
->base
[0] = pfile
->cur_run
->limit
[-1];
1065 memmove (pfile
->cur_token
+ 1, pfile
->cur_token
,
1066 MIN (la
, sz
- 1) * sizeof (cpp_token
));
1069 if (!sz
&& pfile
->cur_token
== pfile
->cur_run
->limit
)
1071 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
1072 pfile
->cur_token
= pfile
->cur_run
->base
;
1075 result
= pfile
->cur_token
++;
1076 result
->src_loc
= old
->src_loc
;
1080 /* Lex a token into RESULT (external interface). Takes care of issues
1081 like directive handling, token lookahead, multiple include
1082 optimization and skipping. */
1084 _cpp_lex_token (cpp_reader
*pfile
)
1090 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
1092 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
1093 pfile
->cur_token
= pfile
->cur_run
->base
;
1095 /* We assume that the current token is somewhere in the current
1097 if (pfile
->cur_token
< pfile
->cur_run
->base
1098 || pfile
->cur_token
>= pfile
->cur_run
->limit
)
1101 if (pfile
->lookaheads
)
1103 pfile
->lookaheads
--;
1104 result
= pfile
->cur_token
++;
1107 result
= _cpp_lex_direct (pfile
);
1109 if (result
->flags
& BOL
)
1111 /* Is this a directive. If _cpp_handle_directive returns
1112 false, it is an assembler #. */
1113 if (result
->type
== CPP_HASH
1114 /* 6.10.3 p 11: Directives in a list of macro arguments
1115 gives undefined behavior. This implementation
1116 handles the directive as normal. */
1117 && pfile
->state
.parsing_args
!= 1)
1119 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
1121 if (pfile
->directive_result
.type
== CPP_PADDING
)
1123 result
= &pfile
->directive_result
;
1126 else if (pfile
->state
.in_deferred_pragma
)
1127 result
= &pfile
->directive_result
;
1129 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
1130 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
1133 /* We don't skip tokens in directives. */
1134 if (pfile
->state
.in_directive
|| pfile
->state
.in_deferred_pragma
)
1137 /* Outside a directive, invalidate controlling macros. At file
1138 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1139 get here and MI optimization works. */
1140 pfile
->mi_valid
= false;
1142 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
1149 /* Returns true if a fresh line has been loaded. */
1151 _cpp_get_fresh_line (cpp_reader
*pfile
)
1155 /* We can't get a new line until we leave the current directive. */
1156 if (pfile
->state
.in_directive
)
1161 cpp_buffer
*buffer
= pfile
->buffer
;
1163 if (!buffer
->need_line
)
1166 if (buffer
->next_line
< buffer
->rlimit
)
1168 _cpp_clean_line (pfile
);
1172 /* First, get out of parsing arguments state. */
1173 if (pfile
->state
.parsing_args
)
1176 /* End of buffer. Non-empty files should end in a newline. */
1177 if (buffer
->buf
!= buffer
->rlimit
1178 && buffer
->next_line
> buffer
->rlimit
1179 && !buffer
->from_stage3
)
1181 /* Clip to buffer size. */
1182 buffer
->next_line
= buffer
->rlimit
;
1185 return_at_eof
= buffer
->return_at_eof
;
1186 _cpp_pop_buffer (pfile
);
1187 if (pfile
->buffer
== NULL
|| return_at_eof
)
1192 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1195 result->type = ELSE_TYPE; \
1196 if (*buffer->cur == CHAR) \
1197 buffer->cur++, result->type = THEN_TYPE; \
1201 /* Lex a token into pfile->cur_token, which is also incremented, to
1202 get diagnostics pointing to the correct location.
1204 Does not handle issues such as token lookahead, multiple-include
1205 optimization, directives, skipping etc. This function is only
1206 suitable for use by _cpp_lex_token, and in special cases like
1207 lex_expansion_token which doesn't care for any of these issues.
1209 When meeting a newline, returns CPP_EOF if parsing a directive,
1210 otherwise returns to the start of the token buffer if permissible.
1211 Returns the location of the lexed token. */
1213 _cpp_lex_direct (cpp_reader
*pfile
)
1217 const unsigned char *comment_start
;
1218 cpp_token
*result
= pfile
->cur_token
++;
1222 buffer
= pfile
->buffer
;
1223 if (buffer
->need_line
)
1225 if (pfile
->state
.in_deferred_pragma
)
1227 result
->type
= CPP_PRAGMA_EOL
;
1228 pfile
->state
.in_deferred_pragma
= false;
1229 if (!pfile
->state
.pragma_allow_expansion
)
1230 pfile
->state
.prevent_expansion
--;
1233 if (!_cpp_get_fresh_line (pfile
))
1235 result
->type
= CPP_EOF
;
1236 if (!pfile
->state
.in_directive
)
1238 /* Tell the compiler the line number of the EOF token. */
1239 result
->src_loc
= pfile
->line_table
->highest_line
;
1240 result
->flags
= BOL
;
1244 if (!pfile
->keep_tokens
)
1246 pfile
->cur_run
= &pfile
->base_run
;
1247 result
= pfile
->base_run
.base
;
1248 pfile
->cur_token
= result
+ 1;
1250 result
->flags
= BOL
;
1251 if (pfile
->state
.parsing_args
== 2)
1252 result
->flags
|= PREV_WHITE
;
1254 buffer
= pfile
->buffer
;
1256 result
->src_loc
= pfile
->line_table
->highest_line
;
1259 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
1260 && !pfile
->overlaid_buffer
)
1262 _cpp_process_line_notes (pfile
, false);
1263 result
->src_loc
= pfile
->line_table
->highest_line
;
1267 LINEMAP_POSITION_FOR_COLUMN (result
->src_loc
, pfile
->line_table
,
1268 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
1272 case ' ': case '\t': case '\f': case '\v': case '\0':
1273 result
->flags
|= PREV_WHITE
;
1274 skip_whitespace (pfile
, c
);
1278 if (buffer
->cur
< buffer
->rlimit
)
1279 CPP_INCREMENT_LINE (pfile
, 0);
1280 buffer
->need_line
= true;
1283 case '0': case '1': case '2': case '3': case '4':
1284 case '5': case '6': case '7': case '8': case '9':
1286 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
1287 result
->type
= CPP_NUMBER
;
1288 lex_number (pfile
, &result
->val
.str
, &nst
);
1289 warn_about_normalization (pfile
, result
, &nst
);
1297 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1298 wide strings or raw strings. */
1299 if (c
== 'L' || CPP_OPTION (pfile
, uliterals
))
1301 if ((*buffer
->cur
== '\'' && c
!= 'R')
1302 || *buffer
->cur
== '"'
1303 || (*buffer
->cur
== 'R'
1305 && buffer
->cur
[1] == '"'
1306 && CPP_OPTION (pfile
, uliterals
))
1307 || (*buffer
->cur
== '8'
1309 && (buffer
->cur
[1] == '"'
1310 || (buffer
->cur
[1] == 'R' && buffer
->cur
[2] == '"'))))
1312 lex_string (pfile
, result
, buffer
->cur
- 1);
1319 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1320 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1321 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1322 case 's': case 't': case 'v': case 'w': case 'x':
1324 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1325 case 'G': case 'H': case 'I': case 'J': case 'K':
1326 case 'M': case 'N': case 'O': case 'P': case 'Q':
1327 case 'S': case 'T': case 'V': case 'W': case 'X':
1329 result
->type
= CPP_NAME
;
1331 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
1332 result
->val
.node
.node
= lex_identifier (pfile
, buffer
->cur
- 1, false,
1334 warn_about_normalization (pfile
, result
, &nst
);
1337 /* Convert named operators to their proper types. */
1338 if (result
->val
.node
.node
->flags
& NODE_OPERATOR
)
1340 result
->flags
|= NAMED_OP
;
1341 result
->type
= (enum cpp_ttype
) result
->val
.node
.node
->directive_index
;
1347 lex_string (pfile
, result
, buffer
->cur
- 1);
1351 /* A potential block or line comment. */
1352 comment_start
= buffer
->cur
;
1357 if (_cpp_skip_block_comment (pfile
))
1358 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
1360 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
1361 || cpp_in_system_header (pfile
)))
1363 /* Warn about comments only if pedantically GNUC89, and not
1364 in system headers. */
1365 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1366 && ! buffer
->warned_cplusplus_comments
)
1368 cpp_error (pfile
, CPP_DL_PEDWARN
,
1369 "C++ style comments are not allowed in ISO C90");
1370 cpp_error (pfile
, CPP_DL_PEDWARN
,
1371 "(this will be reported only once per input file)");
1372 buffer
->warned_cplusplus_comments
= 1;
1375 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1376 cpp_error (pfile
, CPP_DL_WARNING
, "multi-line comment");
1381 result
->type
= CPP_DIV_EQ
;
1386 result
->type
= CPP_DIV
;
1390 if (!pfile
->state
.save_comments
)
1392 result
->flags
|= PREV_WHITE
;
1393 goto update_tokens_line
;
1396 /* Save the comment as a token in its own right. */
1397 save_comment (pfile
, result
, comment_start
, c
);
1401 if (pfile
->state
.angled_headers
)
1403 lex_string (pfile
, result
, buffer
->cur
- 1);
1404 if (result
->type
!= CPP_LESS
)
1408 result
->type
= CPP_LESS
;
1409 if (*buffer
->cur
== '=')
1410 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
1411 else if (*buffer
->cur
== '<')
1414 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
1416 else if (CPP_OPTION (pfile
, digraphs
))
1418 if (*buffer
->cur
== ':')
1421 result
->flags
|= DIGRAPH
;
1422 result
->type
= CPP_OPEN_SQUARE
;
1424 else if (*buffer
->cur
== '%')
1427 result
->flags
|= DIGRAPH
;
1428 result
->type
= CPP_OPEN_BRACE
;
1434 result
->type
= CPP_GREATER
;
1435 if (*buffer
->cur
== '=')
1436 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
1437 else if (*buffer
->cur
== '>')
1440 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1445 result
->type
= CPP_MOD
;
1446 if (*buffer
->cur
== '=')
1447 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
1448 else if (CPP_OPTION (pfile
, digraphs
))
1450 if (*buffer
->cur
== ':')
1453 result
->flags
|= DIGRAPH
;
1454 result
->type
= CPP_HASH
;
1455 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
1456 buffer
->cur
+= 2, result
->type
= CPP_PASTE
, result
->val
.token_no
= 0;
1458 else if (*buffer
->cur
== '>')
1461 result
->flags
|= DIGRAPH
;
1462 result
->type
= CPP_CLOSE_BRACE
;
1468 result
->type
= CPP_DOT
;
1469 if (ISDIGIT (*buffer
->cur
))
1471 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
1472 result
->type
= CPP_NUMBER
;
1473 lex_number (pfile
, &result
->val
.str
, &nst
);
1474 warn_about_normalization (pfile
, result
, &nst
);
1476 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
1477 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
1478 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1479 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
1483 result
->type
= CPP_PLUS
;
1484 if (*buffer
->cur
== '+')
1485 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
1486 else if (*buffer
->cur
== '=')
1487 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
1491 result
->type
= CPP_MINUS
;
1492 if (*buffer
->cur
== '>')
1495 result
->type
= CPP_DEREF
;
1496 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1497 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
1499 else if (*buffer
->cur
== '-')
1500 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
1501 else if (*buffer
->cur
== '=')
1502 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
1506 result
->type
= CPP_AND
;
1507 if (*buffer
->cur
== '&')
1508 buffer
->cur
++, result
->type
= CPP_AND_AND
;
1509 else if (*buffer
->cur
== '=')
1510 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
1514 result
->type
= CPP_OR
;
1515 if (*buffer
->cur
== '|')
1516 buffer
->cur
++, result
->type
= CPP_OR_OR
;
1517 else if (*buffer
->cur
== '=')
1518 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
1522 result
->type
= CPP_COLON
;
1523 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
1524 buffer
->cur
++, result
->type
= CPP_SCOPE
;
1525 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
1528 result
->flags
|= DIGRAPH
;
1529 result
->type
= CPP_CLOSE_SQUARE
;
1533 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1534 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1535 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1536 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1537 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); result
->val
.token_no
= 0; break;
1539 case '?': result
->type
= CPP_QUERY
; break;
1540 case '~': result
->type
= CPP_COMPL
; break;
1541 case ',': result
->type
= CPP_COMMA
; break;
1542 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1543 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1544 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1545 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1546 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1547 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1548 case ';': result
->type
= CPP_SEMICOLON
; break;
1550 /* @ is a punctuator in Objective-C. */
1551 case '@': result
->type
= CPP_ATSIGN
; break;
1556 const uchar
*base
= --buffer
->cur
;
1557 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
1559 if (forms_identifier_p (pfile
, true, &nst
))
1561 result
->type
= CPP_NAME
;
1562 result
->val
.node
.node
= lex_identifier (pfile
, base
, true, &nst
);
1563 warn_about_normalization (pfile
, result
, &nst
);
1570 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
1577 /* An upper bound on the number of bytes needed to spell TOKEN.
1578 Does not include preceding whitespace. */
1580 cpp_token_len (const cpp_token
*token
)
1584 switch (TOKEN_SPELL (token
))
1586 default: len
= 6; break;
1587 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
1588 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
.node
) * 10; break;
1594 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1595 Return the number of bytes read out of NAME. (There are always
1596 10 bytes written to BUFFER.) */
1599 utf8_to_ucn (unsigned char *buffer
, const unsigned char *name
)
1605 unsigned long utf32
;
1607 /* Compute the length of the UTF-8 sequence. */
1608 for (t
= *name
; t
& 0x80; t
<<= 1)
1611 utf32
= *name
& (0x7F >> ucn_len
);
1612 for (ucn_len_c
= 1; ucn_len_c
< ucn_len
; ucn_len_c
++)
1614 utf32
= (utf32
<< 6) | (*++name
& 0x3F);
1616 /* Ill-formed UTF-8. */
1617 if ((*name
& ~0x3F) != 0x80)
1623 for (j
= 7; j
>= 0; j
--)
1624 *buffer
++ = "0123456789abcdef"[(utf32
>> (4 * j
)) & 0xF];
1628 /* Given a token TYPE corresponding to a digraph, return a pointer to
1629 the spelling of the digraph. */
1630 static const unsigned char *
1631 cpp_digraph2name (enum cpp_ttype type
)
1633 return digraph_spellings
[(int) type
- (int) CPP_FIRST_DIGRAPH
];
1636 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1637 already contain the enough space to hold the token's spelling.
1638 Returns a pointer to the character after the last character written.
1639 FORSTRING is true if this is to be the spelling after translation
1640 phase 1 (this is different for UCNs).
1641 FIXME: Would be nice if we didn't need the PFILE argument. */
1643 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
1644 unsigned char *buffer
, bool forstring
)
1646 switch (TOKEN_SPELL (token
))
1648 case SPELL_OPERATOR
:
1650 const unsigned char *spelling
;
1653 if (token
->flags
& DIGRAPH
)
1654 spelling
= cpp_digraph2name (token
->type
);
1655 else if (token
->flags
& NAMED_OP
)
1658 spelling
= TOKEN_NAME (token
);
1660 while ((c
= *spelling
++) != '\0')
1669 memcpy (buffer
, NODE_NAME (token
->val
.node
.node
),
1670 NODE_LEN (token
->val
.node
.node
));
1671 buffer
+= NODE_LEN (token
->val
.node
.node
);
1676 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
1678 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
1679 if (name
[i
] & ~0x7F)
1681 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
1685 *buffer
++ = NODE_NAME (token
->val
.node
.node
)[i
];
1690 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1691 buffer
+= token
->val
.str
.len
;
1695 cpp_error (pfile
, CPP_DL_ICE
,
1696 "unspellable token %s", TOKEN_NAME (token
));
1703 /* Returns TOKEN spelt as a null-terminated string. The string is
1704 freed when the reader is destroyed. Useful for diagnostics. */
1706 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
1708 unsigned int len
= cpp_token_len (token
) + 1;
1709 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1711 end
= cpp_spell_token (pfile
, token
, start
, false);
1717 /* Returns a pointer to a string which spells the token defined by
1718 TYPE and FLAGS. Used by C front ends, which really should move to
1719 using cpp_token_as_text. */
1721 cpp_type2name (enum cpp_ttype type
, unsigned char flags
)
1723 if (flags
& DIGRAPH
)
1724 return (const char *) cpp_digraph2name (type
);
1725 else if (flags
& NAMED_OP
)
1726 return cpp_named_operator2name (type
);
1728 return (const char *) token_spellings
[type
].name
;
1731 /* Writes the spelling of token to FP, without any preceding space.
1732 Separated from cpp_spell_token for efficiency - to avoid stdio
1733 double-buffering. */
1735 cpp_output_token (const cpp_token
*token
, FILE *fp
)
1737 switch (TOKEN_SPELL (token
))
1739 case SPELL_OPERATOR
:
1741 const unsigned char *spelling
;
1744 if (token
->flags
& DIGRAPH
)
1745 spelling
= cpp_digraph2name (token
->type
);
1746 else if (token
->flags
& NAMED_OP
)
1749 spelling
= TOKEN_NAME (token
);
1754 while ((c
= *++spelling
) != '\0');
1762 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
1764 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
1765 if (name
[i
] & ~0x7F)
1767 unsigned char buffer
[10];
1768 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
1769 fwrite (buffer
, 1, 10, fp
);
1772 fputc (NODE_NAME (token
->val
.node
.node
)[i
], fp
);
1777 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1781 /* An error, most probably. */
1786 /* Compare two tokens. */
1788 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
1790 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1791 switch (TOKEN_SPELL (a
))
1793 default: /* Keep compiler happy. */
1794 case SPELL_OPERATOR
:
1795 /* token_no is used to track where multiple consecutive ##
1796 tokens were originally located. */
1797 return (a
->type
!= CPP_PASTE
|| a
->val
.token_no
== b
->val
.token_no
);
1799 return (a
->type
!= CPP_MACRO_ARG
1800 || a
->val
.macro_arg
.arg_no
== b
->val
.macro_arg
.arg_no
);
1802 return a
->val
.node
.node
== b
->val
.node
.node
;
1804 return (a
->val
.str
.len
== b
->val
.str
.len
1805 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1812 /* Returns nonzero if a space should be inserted to avoid an
1813 accidental token paste for output. For simplicity, it is
1814 conservative, and occasionally advises a space where one is not
1815 needed, e.g. "." and ".2". */
1817 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
1818 const cpp_token
*token2
)
1820 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1823 if (token1
->flags
& NAMED_OP
)
1825 if (token2
->flags
& NAMED_OP
)
1829 if (token2
->flags
& DIGRAPH
)
1830 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1831 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1832 c
= token_spellings
[b
].name
[0];
1834 /* Quickly get everything that can paste with an '='. */
1835 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1840 case CPP_GREATER
: return c
== '>';
1841 case CPP_LESS
: return c
== '<' || c
== '%' || c
== ':';
1842 case CPP_PLUS
: return c
== '+';
1843 case CPP_MINUS
: return c
== '-' || c
== '>';
1844 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1845 case CPP_MOD
: return c
== ':' || c
== '>';
1846 case CPP_AND
: return c
== '&';
1847 case CPP_OR
: return c
== '|';
1848 case CPP_COLON
: return c
== ':' || c
== '>';
1849 case CPP_DEREF
: return c
== '*';
1850 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1851 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1852 case CPP_NAME
: return ((b
== CPP_NUMBER
1853 && name_p (pfile
, &token2
->val
.str
))
1855 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1856 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1857 || c
== '.' || c
== '+' || c
== '-');
1859 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
1861 || (CPP_OPTION (pfile
, objc
)
1862 && token1
->val
.str
.text
[0] == '@'
1863 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
1870 /* Output all the remaining tokens on the current line, and a newline
1871 character, to FP. Leading whitespace is removed. If there are
1872 macros, special token padding is not performed. */
1874 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
1876 const cpp_token
*token
;
1878 token
= cpp_get_token (pfile
);
1879 while (token
->type
!= CPP_EOF
)
1881 cpp_output_token (token
, fp
);
1882 token
= cpp_get_token (pfile
);
1883 if (token
->flags
& PREV_WHITE
)
1890 /* Return a string representation of all the remaining tokens on the
1891 current line. The result is allocated using xmalloc and must be
1892 freed by the caller. */
1894 cpp_output_line_to_string (cpp_reader
*pfile
, const unsigned char *dir_name
)
1896 const cpp_token
*token
;
1897 unsigned int out
= dir_name
? ustrlen (dir_name
) : 0;
1898 unsigned int alloced
= 120 + out
;
1899 unsigned char *result
= (unsigned char *) xmalloc (alloced
);
1901 /* If DIR_NAME is empty, there are no initial contents. */
1904 sprintf ((char *) result
, "#%s ", dir_name
);
1908 token
= cpp_get_token (pfile
);
1909 while (token
->type
!= CPP_EOF
)
1911 unsigned char *last
;
1912 /* Include room for a possible space and the terminating nul. */
1913 unsigned int len
= cpp_token_len (token
) + 2;
1915 if (out
+ len
> alloced
)
1918 if (out
+ len
> alloced
)
1919 alloced
= out
+ len
;
1920 result
= (unsigned char *) xrealloc (result
, alloced
);
1923 last
= cpp_spell_token (pfile
, token
, &result
[out
], 0);
1924 out
= last
- result
;
1926 token
= cpp_get_token (pfile
);
1927 if (token
->flags
& PREV_WHITE
)
1928 result
[out
++] = ' ';
1935 /* Memory buffers. Changing these three constants can have a dramatic
1936 effect on performance. The values here are reasonable defaults,
1937 but might be tuned. If you adjust them, be sure to test across a
1938 range of uses of cpplib, including heavy nested function-like macro
1939 expansion. Also check the change in peak memory usage (NJAMD is a
1940 good tool for this). */
1941 #define MIN_BUFF_SIZE 8000
1942 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1943 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1944 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1946 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1947 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1950 /* Create a new allocation buffer. Place the control block at the end
1951 of the buffer, so that buffer overflows will cause immediate chaos. */
1953 new_buff (size_t len
)
1956 unsigned char *base
;
1958 if (len
< MIN_BUFF_SIZE
)
1959 len
= MIN_BUFF_SIZE
;
1960 len
= CPP_ALIGN (len
);
1962 base
= XNEWVEC (unsigned char, len
+ sizeof (_cpp_buff
));
1963 result
= (_cpp_buff
*) (base
+ len
);
1964 result
->base
= base
;
1966 result
->limit
= base
+ len
;
1967 result
->next
= NULL
;
1971 /* Place a chain of unwanted allocation buffers on the free list. */
1973 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
1975 _cpp_buff
*end
= buff
;
1979 end
->next
= pfile
->free_buffs
;
1980 pfile
->free_buffs
= buff
;
1983 /* Return a free buffer of size at least MIN_SIZE. */
1985 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
1987 _cpp_buff
*result
, **p
;
1989 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
1994 return new_buff (min_size
);
1996 size
= result
->limit
- result
->base
;
1997 /* Return a buffer that's big enough, but don't waste one that's
1999 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
2004 result
->next
= NULL
;
2005 result
->cur
= result
->base
;
2009 /* Creates a new buffer with enough space to hold the uncommitted
2010 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2011 the excess bytes to the new buffer. Chains the new buffer after
2012 BUFF, and returns the new buffer. */
2014 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
2016 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
2017 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
2019 buff
->next
= new_buff
;
2020 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
2024 /* Creates a new buffer with enough space to hold the uncommitted
2025 remaining bytes of the buffer pointed to by BUFF, and at least
2026 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2027 Chains the new buffer before the buffer pointed to by BUFF, and
2028 updates the pointer to point to the new buffer. */
2030 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
2032 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
2033 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
2035 new_buff
= _cpp_get_buff (pfile
, size
);
2036 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
2037 new_buff
->next
= old_buff
;
2041 /* Free a chain of buffers starting at BUFF. */
2043 _cpp_free_buff (_cpp_buff
*buff
)
2047 for (; buff
; buff
= next
)
2054 /* Allocate permanent, unaligned storage of length LEN. */
2056 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
2058 _cpp_buff
*buff
= pfile
->u_buff
;
2059 unsigned char *result
= buff
->cur
;
2061 if (len
> (size_t) (buff
->limit
- result
))
2063 buff
= _cpp_get_buff (pfile
, len
);
2064 buff
->next
= pfile
->u_buff
;
2065 pfile
->u_buff
= buff
;
2069 buff
->cur
= result
+ len
;
2073 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2074 That buffer is used for growing allocations when saving macro
2075 replacement lists in a #define, and when parsing an answer to an
2076 assertion in #assert, #unassert or #if (and therefore possibly
2077 whilst expanding macros). It therefore must not be used by any
2078 code that they might call: specifically the lexer and the guts of
2081 All existing other uses clearly fit this restriction: storing
2082 registered pragmas during initialization. */
2084 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
2086 _cpp_buff
*buff
= pfile
->a_buff
;
2087 unsigned char *result
= buff
->cur
;
2089 if (len
> (size_t) (buff
->limit
- result
))
2091 buff
= _cpp_get_buff (pfile
, len
);
2092 buff
->next
= pfile
->a_buff
;
2093 pfile
->a_buff
= buff
;
2097 buff
->cur
= result
+ len
;
2101 /* Say which field of TOK is in use. */
2103 enum cpp_token_fld_kind
2104 cpp_token_val_index (cpp_token
*tok
)
2106 switch (TOKEN_SPELL (tok
))
2109 return CPP_TOKEN_FLD_NODE
;
2111 return CPP_TOKEN_FLD_STR
;
2112 case SPELL_OPERATOR
:
2113 if (tok
->type
== CPP_PASTE
)
2114 return CPP_TOKEN_FLD_TOKEN_NO
;
2116 return CPP_TOKEN_FLD_NONE
;
2118 if (tok
->type
== CPP_MACRO_ARG
)
2119 return CPP_TOKEN_FLD_ARG_NO
;
2120 else if (tok
->type
== CPP_PADDING
)
2121 return CPP_TOKEN_FLD_SOURCE
;
2122 else if (tok
->type
== CPP_PRAGMA
)
2123 return CPP_TOKEN_FLD_PRAGMA
;
2124 /* else fall through */
2126 return CPP_TOKEN_FLD_NONE
;