1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000-2021 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { UC
"%:", UC
"%:%:", UC
"<:", UC
":>", UC
"<%", UC
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
57 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
58 static void store_comment (cpp_reader
*, cpp_token
*);
59 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
60 unsigned int, enum cpp_ttype
);
61 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
62 static int name_p (cpp_reader
*, const cpp_string
*);
63 static tokenrun
*next_tokenrun (tokenrun
*);
65 static _cpp_buff
*new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token
*token
, const char *string
)
75 if (token
->type
!= CPP_NAME
)
78 return !ustrcmp (NODE_NAME (token
->val
.node
.node
), (const uchar
*) string
);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
86 if (buffer
->notes_used
== buffer
->notes_cap
)
88 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
89 buffer
->notes
= XRESIZEVEC (_cpp_line_note
, buffer
->notes
,
93 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
94 buffer
->notes
[buffer
->notes_used
].type
= type
;
99 /* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
104 One of the paths through the ifdefs should provide
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
115 /* Configure gives us an ifdef test. */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
120 /* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
125 typedef unsigned int word_type
__attribute__((__mode__(__word__
)));
127 typedef unsigned long word_type
;
130 /* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132 typedef char check_word_type_size
133 [(sizeof(word_type
) == 8 || sizeof(word_type
) == 4) * 2 - 1];
135 /* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
138 static inline word_type
139 acc_char_mask_misalign (word_type val
, unsigned int n
)
149 /* Return X replicated to all byte positions within WORD_TYPE. */
151 static inline word_type
152 acc_char_replicate (uchar x
)
156 ret
= (x
<< 24) | (x
<< 16) | (x
<< 8) | x
;
157 if (sizeof(word_type
) == 8)
158 ret
= (ret
<< 16 << 16) | ret
;
162 /* Return non-zero if some byte of VAL is (probably) C. */
164 static inline word_type
165 acc_char_cmp (word_type val
, word_type c
)
167 #if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val
^ c
);
172 word_type magic
= 0x7efefefeU
;
173 if (sizeof(word_type
) == 8)
174 magic
= (magic
<< 16 << 16) | 0xfefefefeU
;
178 return ((val
+ magic
) ^ ~val
) & ~magic
;
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED
,
187 word_type val ATTRIBUTE_UNUSED
)
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp
);
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i
= 0; i
< sizeof(word_type
); ++i
)
202 c
= (val
>> (sizeof(word_type
) - i
- 1) * 8) & 0xff;
204 c
= (val
>> i
* 8) & 0xff;
206 if (c
== '\n' || c
== '\r' || c
== '\\' || c
== '?')
214 /* A version of the fast scanner using bit fiddling techniques.
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
225 static const uchar
* search_line_acc_char (const uchar
*, const uchar
*)
229 search_line_acc_char (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
231 const word_type repl_nl
= acc_char_replicate ('\n');
232 const word_type repl_cr
= acc_char_replicate ('\r');
233 const word_type repl_bs
= acc_char_replicate ('\\');
234 const word_type repl_qm
= acc_char_replicate ('?');
236 unsigned int misalign
;
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p
= (word_type
*)((uintptr_t)s
& -sizeof(word_type
));
243 misalign
= (uintptr_t)s
& (sizeof(word_type
) - 1);
245 val
= acc_char_mask_misalign (val
, misalign
);
250 t
= acc_char_cmp (val
, repl_nl
);
251 t
|= acc_char_cmp (val
, repl_cr
);
252 t
|= acc_char_cmp (val
, repl_bs
);
253 t
|= acc_char_cmp (val
, repl_qm
);
255 if (__builtin_expect (t
!= 0, 0))
257 int i
= acc_char_index (t
, val
);
259 return (const uchar
*)p
+ i
;
266 /* Disable on Solaris 2/x86 until the following problem can be properly
269 The Solaris 10+ assembler tags objects with the instruction set
270 extensions used, so SSE4.2 executables cannot run on machines that
271 don't support that extension. */
273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
275 /* Replicated character data to be shared between implementations.
276 Recall that outside of a context with vector support we can't
277 define compatible vector types, therefore these are all defined
278 in terms of raw characters. */
279 static const char repl_chars
[4][16] __attribute__((aligned(16))) = {
280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286 { '?', '?', '?', '?', '?', '?', '?', '?',
287 '?', '?', '?', '?', '?', '?', '?', '?' },
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
293 which was packaged into SSE1; it is also present in the AMD MMX
294 extension. Mark the function as using "sse" so that we emit a real
295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
299 __attribute__((__target__("sse")))
301 search_line_mmx (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
303 typedef char v8qi
__attribute__ ((__vector_size__ (8)));
304 typedef int __m64
__attribute__ ((__vector_size__ (8), __may_alias__
));
306 const v8qi repl_nl
= *(const v8qi
*)repl_chars
[0];
307 const v8qi repl_cr
= *(const v8qi
*)repl_chars
[1];
308 const v8qi repl_bs
= *(const v8qi
*)repl_chars
[2];
309 const v8qi repl_qm
= *(const v8qi
*)repl_chars
[3];
311 unsigned int misalign
, found
, mask
;
315 /* Align the source pointer. While MMX doesn't generate unaligned data
316 faults, this allows us to safely scan to the end of the buffer without
317 reading beyond the end of the last page. */
318 misalign
= (uintptr_t)s
& 7;
319 p
= (const v8qi
*)((uintptr_t)s
& -8);
322 /* Create a mask for the bytes that are valid within the first
323 16-byte block. The Idea here is that the AND with the mask
324 within the loop is "free", since we need some AND or TEST
325 insn in order to set the flags for the branch anyway. */
326 mask
= -1u << misalign
;
328 /* Main loop processing 8 bytes at a time. */
336 t
= __builtin_ia32_pcmpeqb(data
, repl_nl
);
337 c
= __builtin_ia32_pcmpeqb(data
, repl_cr
);
338 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
339 c
= __builtin_ia32_pcmpeqb(data
, repl_bs
);
340 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
341 c
= __builtin_ia32_pcmpeqb(data
, repl_qm
);
342 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
343 found
= __builtin_ia32_pmovmskb (t
);
348 __builtin_ia32_emms ();
350 /* FOUND contains 1 in bits for which we matched a relevant
351 character. Conversion to the byte index is trivial. */
352 found
= __builtin_ctz(found
);
353 return (const uchar
*)p
+ found
;
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
360 __attribute__((__target__("sse2")))
362 search_line_sse2 (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
364 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
366 const v16qi repl_nl
= *(const v16qi
*)repl_chars
[0];
367 const v16qi repl_cr
= *(const v16qi
*)repl_chars
[1];
368 const v16qi repl_bs
= *(const v16qi
*)repl_chars
[2];
369 const v16qi repl_qm
= *(const v16qi
*)repl_chars
[3];
371 unsigned int misalign
, found
, mask
;
375 /* Align the source pointer. */
376 misalign
= (uintptr_t)s
& 15;
377 p
= (const v16qi
*)((uintptr_t)s
& -16);
380 /* Create a mask for the bytes that are valid within the first
381 16-byte block. The Idea here is that the AND with the mask
382 within the loop is "free", since we need some AND or TEST
383 insn in order to set the flags for the branch anyway. */
384 mask
= -1u << misalign
;
386 /* Main loop processing 16 bytes at a time. */
395 t
|= data
== repl_cr
;
396 t
|= data
== repl_bs
;
397 t
|= data
== repl_qm
;
398 found
= __builtin_ia32_pmovmskb128 (t
);
403 /* FOUND contains 1 in bits for which we matched a relevant
404 character. Conversion to the byte index is trivial. */
405 found
= __builtin_ctz(found
);
406 return (const uchar
*)p
+ found
;
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
414 __attribute__((__target__("sse4.2")))
416 search_line_sse42 (const uchar
*s
, const uchar
*end
)
418 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
419 static const v16qi search
= { '\n', '\r', '?', '\\' };
421 uintptr_t si
= (uintptr_t)s
;
424 /* Check for unaligned input. */
429 if (__builtin_expect (end
- s
< 16, 0)
430 && __builtin_expect ((si
& 0xfff) > 0xff0, 0))
432 /* There are less than 16 bytes left in the buffer, and less
433 than 16 bytes left on the page. Reading 16 bytes at this
434 point might generate a spurious page fault. Defer to the
435 SSE2 implementation, which already handles alignment. */
436 return search_line_sse2 (s
, end
);
439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 memory need not be aligned. */
441 sv
= __builtin_ia32_loaddqu ((const char *) s
);
442 index
= __builtin_ia32_pcmpestri128 (search
, 4, sv
, 16, 0);
444 if (__builtin_expect (index
< 16, 0))
447 /* Advance the pointer to an aligned address. We will re-scan a
448 few bytes, but we no longer need care for reading past the
449 end of a page, since we're guaranteed a match. */
450 s
= (const uchar
*)((si
+ 15) & -16);
453 /* Main loop, processing 16 bytes at a time. */
454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
459 /* By using inline assembly instead of the builtin,
460 we can use the result, as well as the flags set. */
461 __asm ("%vpcmpestri\t$0, %2, %3"
462 : "=c"(index
), "=@ccc"(f
)
463 : "m"(*s
), "x"(search
), "a"(4), "d"(16));
471 /* By doing the whole loop in inline assembly,
472 we can make proper use of the flags set. */
473 __asm ( ".balign 16\n"
475 " %vpcmpestri\t$0, (%1), %2\n"
477 : "=&c"(index
), "+r"(s
)
478 : "x"(search
), "a"(4), "d"(16));
486 /* Work around out-dated assemblers without sse4 support. */
487 #define search_line_sse42 search_line_sse2
490 /* Check the CPU capabilities. */
492 #include "../gcc/config/i386/cpuid.h"
494 typedef const uchar
* (*search_line_fast_type
) (const uchar
*, const uchar
*);
495 static search_line_fast_type search_line_fast
;
497 #define HAVE_init_vectorized_lexer 1
499 init_vectorized_lexer (void)
501 unsigned dummy
, ecx
= 0, edx
= 0;
502 search_line_fast_type impl
= search_line_acc_char
;
505 #if defined(__SSE4_2__)
507 #elif defined(__SSE2__)
509 #elif defined(__SSE__)
514 impl
= search_line_sse42
;
515 else if (__get_cpuid (1, &dummy
, &dummy
, &ecx
, &edx
) || minimum
== 2)
517 if (minimum
== 3 || (ecx
& bit_SSE4_2
))
518 impl
= search_line_sse42
;
519 else if (minimum
== 2 || (edx
& bit_SSE2
))
520 impl
= search_line_sse2
;
521 else if (minimum
== 1 || (edx
& bit_SSE
))
522 impl
= search_line_mmx
;
524 else if (__get_cpuid (0x80000001, &dummy
, &dummy
, &dummy
, &edx
))
527 || (edx
& (bit_MMXEXT
| bit_CMOV
)) == (bit_MMXEXT
| bit_CMOV
))
528 impl
= search_line_mmx
;
531 search_line_fast
= impl
;
534 #elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
536 /* A vection of the fast scanner using AltiVec vectorized byte compares
537 and VSX unaligned loads (when VSX is available). This is otherwise
538 the same as the AltiVec version. */
540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
542 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
544 typedef __attribute__((altivec(vector
))) unsigned char vc
;
547 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
551 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
555 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
559 '?', '?', '?', '?', '?', '?', '?', '?',
560 '?', '?', '?', '?', '?', '?', '?', '?',
562 const vc zero
= { 0 };
566 /* Main loop processing 16 bytes at a time. */
569 vc m_nl
, m_cr
, m_bs
, m_qm
;
571 data
= __builtin_vec_vsx_ld (0, s
);
574 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
575 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
576 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
577 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
578 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
580 /* T now contains 0xff in bytes for which we matched one of the relevant
581 characters. We want to exit the loop if any byte in T is non-zero.
582 Below is the expansion of vec_any_ne(t, zero). */
584 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
586 /* Restore s to to point to the 16 bytes we just processed. */
590 #define N (sizeof(vc) / sizeof(long))
594 /* Statically assert that N is 2 or 4. */
595 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
597 unsigned long l
, i
= 0;
601 /* Find the first word of T that is non-zero. */
608 s
+= sizeof(unsigned long);
612 s
+= sizeof(unsigned long);
618 s
+= sizeof(unsigned long);
622 /* L now contains 0xff in bytes for which we matched one of the
623 relevant characters. We can find the byte index by finding
624 its bit index and dividing by 8. */
625 #ifdef __BIG_ENDIAN__
626 l
= __builtin_clzl(l
) >> 3;
628 l
= __builtin_ctzl(l
) >> 3;
636 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
638 /* A vection of the fast scanner using AltiVec vectorized byte compares.
639 This cannot be used for little endian because vec_lvsl/lvsr are
640 deprecated for little endian and the code won't work properly. */
641 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
642 so we can't compile this function without -maltivec on the command line
643 (or implied by some other switch). */
646 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
648 typedef __attribute__((altivec(vector
))) unsigned char vc
;
651 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
652 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
655 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
656 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
659 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
660 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
663 '?', '?', '?', '?', '?', '?', '?', '?',
664 '?', '?', '?', '?', '?', '?', '?', '?',
667 -1, -1, -1, -1, -1, -1, -1, -1,
668 -1, -1, -1, -1, -1, -1, -1, -1,
670 const vc zero
= { 0 };
674 /* Altivec loads automatically mask addresses with -16. This lets us
675 issue the first load as early as possible. */
676 data
= __builtin_vec_ld(0, (const vc
*)s
);
678 /* Discard bytes before the beginning of the buffer. Do this by
679 beginning with all ones and shifting in zeros according to the
680 mis-alignment. The LVSR instruction pulls the exact shift we
681 want from the address. */
682 mask
= __builtin_vec_lvsr(0, s
);
683 mask
= __builtin_vec_perm(zero
, ones
, mask
);
686 /* While altivec loads mask addresses, we still need to align S so
687 that the offset we compute at the end is correct. */
688 s
= (const uchar
*)((uintptr_t)s
& -16);
690 /* Main loop processing 16 bytes at a time. */
694 vc m_nl
, m_cr
, m_bs
, m_qm
;
697 data
= __builtin_vec_ld(0, (const vc
*)s
);
700 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
701 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
702 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
703 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
704 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
706 /* T now contains 0xff in bytes for which we matched one of the relevant
707 characters. We want to exit the loop if any byte in T is non-zero.
708 Below is the expansion of vec_any_ne(t, zero). */
710 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
713 #define N (sizeof(vc) / sizeof(long))
717 /* Statically assert that N is 2 or 4. */
718 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
720 unsigned long l
, i
= 0;
724 /* Find the first word of T that is non-zero. */
731 s
+= sizeof(unsigned long);
735 s
+= sizeof(unsigned long);
741 s
+= sizeof(unsigned long);
745 /* L now contains 0xff in bytes for which we matched one of the
746 relevant characters. We can find the byte index by finding
747 its bit index and dividing by 8. */
748 l
= __builtin_clzl(l
) >> 3;
755 #elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
756 #include "arm_neon.h"
758 /* This doesn't have to be the exact page size, but no system may use
759 a size smaller than this. ARMv8 requires a minimum page size of
760 4k. The impact of being conservative here is a small number of
761 cases will take the slightly slower entry path into the main
764 #define AARCH64_MIN_PAGE_SIZE 4096
767 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
769 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
770 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
771 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
772 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
773 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
775 #ifdef __ARM_BIG_ENDIAN
776 const int16x8_t shift
= {8, 8, 8, 8, 0, 0, 0, 0};
778 const int16x8_t shift
= {0, 0, 0, 0, 8, 8, 8, 8};
788 /* Align the source pointer. */
789 p
= (const uint8_t *)((uintptr_t)s
& -16);
791 /* Assuming random string start positions, with a 4k page size we'll take
792 the slow path about 0.37% of the time. */
793 if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
794 - (((uintptr_t) s
) & (AARCH64_MIN_PAGE_SIZE
- 1)))
797 /* Slow path: the string starts near a possible page boundary. */
798 uint32_t misalign
, mask
;
800 misalign
= (uintptr_t)s
& 15;
801 mask
= (-1u << misalign
) & 0xffff;
803 t
= vceqq_u8 (data
, repl_nl
);
804 u
= vceqq_u8 (data
, repl_cr
);
805 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
806 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
808 t
= vandq_u8 (t
, xmask
);
810 m
= vshlq_u16 (m
, shift
);
811 found
= vaddvq_u16 (m
);
814 return (const uchar
*)p
+ __builtin_ctz (found
);
818 data
= vld1q_u8 ((const uint8_t *) s
);
819 t
= vceqq_u8 (data
, repl_nl
);
820 u
= vceqq_u8 (data
, repl_cr
);
821 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
822 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
824 if (__builtin_expect (vpaddd_u64 ((uint64x2_t
)t
) != 0, 0))
832 t
= vceqq_u8 (data
, repl_nl
);
833 u
= vceqq_u8 (data
, repl_cr
);
834 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
835 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
837 } while (!vpaddd_u64 ((uint64x2_t
)t
));
840 /* Now that we've found the terminating substring, work out precisely where
842 t
= vandq_u8 (t
, xmask
);
844 m
= vshlq_u16 (m
, shift
);
845 found
= vaddvq_u16 (m
);
846 return (((((uintptr_t) p
) < (uintptr_t) s
) ? s
: (const uchar
*)p
)
847 + __builtin_ctz (found
));
850 #elif defined (__ARM_NEON)
851 #include "arm_neon.h"
854 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
856 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
857 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
858 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
859 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
860 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
862 unsigned int misalign
, found
, mask
;
866 /* Align the source pointer. */
867 misalign
= (uintptr_t)s
& 15;
868 p
= (const uint8_t *)((uintptr_t)s
& -16);
871 /* Create a mask for the bytes that are valid within the first
872 16-byte block. The Idea here is that the AND with the mask
873 within the loop is "free", since we need some AND or TEST
874 insn in order to set the flags for the branch anyway. */
875 mask
= (-1u << misalign
) & 0xffff;
877 /* Main loop, processing 16 bytes at a time. */
885 uint8x16_t t
, u
, v
, w
;
892 t
= vceqq_u8 (data
, repl_nl
);
893 u
= vceqq_u8 (data
, repl_cr
);
894 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
895 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
896 t
= vandq_u8 (vorrq_u8 (v
, w
), xmask
);
897 l
= vpadd_u8 (vget_low_u8 (t
), vget_high_u8 (t
));
901 found
= vget_lane_u32 ((uint32x2_t
) vorr_u64 ((uint64x1_t
) n
,
902 vshr_n_u64 ((uint64x1_t
) n
, 24)), 0);
907 /* FOUND contains 1 in bits for which we matched a relevant
908 character. Conversion to the byte index is trivial. */
909 found
= __builtin_ctz (found
);
910 return (const uchar
*)p
+ found
;
915 /* We only have one accelerated alternative. Use a direct call so that
916 we encourage inlining. */
918 #define search_line_fast search_line_acc_char
922 /* Initialize the lexer if needed. */
925 _cpp_init_lexer (void)
927 #ifdef HAVE_init_vectorized_lexer
928 init_vectorized_lexer ();
932 /* Returns with a logical line that contains no escaped newlines or
933 trigraphs. This is a time-critical inner loop. */
935 _cpp_clean_line (cpp_reader
*pfile
)
941 buffer
= pfile
->buffer
;
942 buffer
->cur_note
= buffer
->notes_used
= 0;
943 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
944 buffer
->need_line
= false;
945 s
= buffer
->next_line
;
947 if (!buffer
->from_stage3
)
949 const uchar
*pbackslash
= NULL
;
951 /* Fast path. This is the common case of an un-escaped line with
952 no trigraphs. The primary win here is by not writing any
953 data back to memory until we have to. */
956 /* Perform an optimized search for \n, \r, \\, ?. */
957 s
= search_line_fast (s
, buffer
->rlimit
);
962 /* Record the location of the backslash and continue. */
965 else if (__builtin_expect (c
== '?', 0))
967 if (__builtin_expect (s
[1] == '?', false)
968 && _cpp_trigraph_map
[s
[2]])
970 /* Have a trigraph. We may or may not have to convert
971 it. Add a line note regardless, for -Wtrigraphs. */
972 add_line_note (buffer
, s
, s
[2]);
973 if (CPP_OPTION (pfile
, trigraphs
))
975 /* We do, and that means we have to switch to the
978 *d
= _cpp_trigraph_map
[s
[2]];
983 /* Not a trigraph. Continue on fast-path. */
990 /* This must be \r or \n. We're either done, or we'll be forced
991 to write back to the buffer and continue on the slow path. */
994 if (__builtin_expect (s
== buffer
->rlimit
, false))
997 /* DOS line ending? */
998 if (__builtin_expect (c
== '\r', false) && s
[1] == '\n')
1001 if (s
== buffer
->rlimit
)
1005 if (__builtin_expect (pbackslash
== NULL
, true))
1008 /* Check for escaped newline. */
1010 while (is_nvspace (p
[-1]))
1012 if (p
- 1 != pbackslash
)
1015 /* Have an escaped newline; process it and proceed to
1017 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
1019 buffer
->next_line
= p
- 1;
1027 if (c
== '\n' || c
== '\r')
1029 /* Handle DOS line endings. */
1030 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
1032 if (s
== buffer
->rlimit
)
1037 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
1039 if (p
== buffer
->next_line
|| p
[-1] != '\\')
1042 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
1044 buffer
->next_line
= p
- 1;
1046 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
1048 /* Add a note regardless, for the benefit of -Wtrigraphs. */
1049 add_line_note (buffer
, d
, s
[2]);
1050 if (CPP_OPTION (pfile
, trigraphs
))
1052 *d
= _cpp_trigraph_map
[s
[2]];
1060 while (*s
!= '\n' && *s
!= '\r')
1064 /* Handle DOS line endings. */
1065 if (*s
== '\r' && s
+ 1 != buffer
->rlimit
&& s
[1] == '\n')
1071 /* A sentinel note that should never be processed. */
1072 add_line_note (buffer
, d
+ 1, '\n');
1073 buffer
->next_line
= s
+ 1;
1076 /* Return true if the trigraph indicated by NOTE should be warned
1077 about in a comment. */
1079 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
1083 /* Within comments we don't warn about trigraphs, unless the
1084 trigraph forms an escaped newline, as that may change
1086 if (note
->type
!= '/')
1089 /* If -trigraphs, then this was an escaped newline iff the next note
1091 if (CPP_OPTION (pfile
, trigraphs
))
1092 return note
[1].pos
== note
->pos
;
1094 /* Otherwise, see if this forms an escaped newline. */
1096 while (is_nvspace (*p
))
1099 /* There might have been escaped newlines between the trigraph and the
1100 newline we found. Hence the position test. */
1101 return (*p
== '\n' && p
< note
[1].pos
);
1104 /* Process the notes created by add_line_note as far as the current
1107 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
1109 cpp_buffer
*buffer
= pfile
->buffer
;
1113 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
1116 if (note
->pos
> buffer
->cur
)
1120 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
1122 if (note
->type
== '\\' || note
->type
== ' ')
1124 if (note
->type
== ' ' && !in_comment
)
1125 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
1126 "backslash and newline separated by space");
1128 if (buffer
->next_line
> buffer
->rlimit
)
1130 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
1131 "backslash-newline at end of file");
1132 /* Prevent "no newline at end of file" warning. */
1133 buffer
->next_line
= buffer
->rlimit
;
1136 buffer
->line_base
= note
->pos
;
1137 CPP_INCREMENT_LINE (pfile
, 0);
1139 else if (_cpp_trigraph_map
[note
->type
])
1141 if (CPP_OPTION (pfile
, warn_trigraphs
)
1142 && (!in_comment
|| warn_in_comment (pfile
, note
)))
1144 if (CPP_OPTION (pfile
, trigraphs
))
1145 cpp_warning_with_line (pfile
, CPP_W_TRIGRAPHS
,
1146 pfile
->line_table
->highest_line
, col
,
1147 "trigraph ??%c converted to %c",
1149 (int) _cpp_trigraph_map
[note
->type
]);
1152 cpp_warning_with_line
1153 (pfile
, CPP_W_TRIGRAPHS
,
1154 pfile
->line_table
->highest_line
, col
,
1155 "trigraph ??%c ignored, use -trigraphs to enable",
1160 else if (note
->type
== 0)
1161 /* Already processed in lex_raw_string. */;
1167 /* Skip a C-style block comment. We find the end of the comment by
1168 seeing if an asterisk is before every '/' we encounter. Returns
1169 nonzero if comment terminated by EOF, zero otherwise.
1171 Buffer->cur points to the initial asterisk of the comment. */
1173 _cpp_skip_block_comment (cpp_reader
*pfile
)
1175 cpp_buffer
*buffer
= pfile
->buffer
;
1176 const uchar
*cur
= buffer
->cur
;
1185 /* People like decorating comments with '*', so check for '/'
1186 instead for efficiency. */
1194 /* Warn about potential nested comments, but not if the '/'
1195 comes immediately before the true comment delimiter.
1196 Don't bother to get it right across escaped newlines. */
1197 if (CPP_OPTION (pfile
, warn_comments
)
1198 && cur
[0] == '*' && cur
[1] != '/')
1201 cpp_warning_with_line (pfile
, CPP_W_COMMENTS
,
1202 pfile
->line_table
->highest_line
,
1203 CPP_BUF_COL (buffer
),
1204 "\"/*\" within comment");
1210 buffer
->cur
= cur
- 1;
1211 _cpp_process_line_notes (pfile
, true);
1212 if (buffer
->next_line
>= buffer
->rlimit
)
1214 _cpp_clean_line (pfile
);
1216 cols
= buffer
->next_line
- buffer
->line_base
;
1217 CPP_INCREMENT_LINE (pfile
, cols
);
1224 _cpp_process_line_notes (pfile
, true);
1228 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1229 terminating newline. Handles escaped newlines. Returns nonzero
1230 if a multiline comment. */
1232 skip_line_comment (cpp_reader
*pfile
)
1234 cpp_buffer
*buffer
= pfile
->buffer
;
1235 location_t orig_line
= pfile
->line_table
->highest_line
;
1237 while (*buffer
->cur
!= '\n')
1240 _cpp_process_line_notes (pfile
, true);
1241 return orig_line
!= pfile
->line_table
->highest_line
;
1244 /* Skips whitespace, saving the next non-whitespace character. */
1246 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
1248 cpp_buffer
*buffer
= pfile
->buffer
;
1249 bool saw_NUL
= false;
1253 /* Horizontal space always OK. */
1254 if (c
== ' ' || c
== '\t')
1256 /* Just \f \v or \0 left. */
1259 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
1260 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
1261 CPP_BUF_COL (buffer
),
1262 "%s in preprocessing directive",
1263 c
== '\f' ? "form feed" : "vertical tab");
1267 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1268 while (is_nvspace (c
));
1271 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
1276 /* See if the characters of a number token are valid in a name (no
1277 '.', '+' or '-'). */
1279 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
1283 for (i
= 0; i
< string
->len
; i
++)
1284 if (!is_idchar (string
->text
[i
]))
1290 /* After parsing an identifier or other sequence, produce a warning about
1291 sequences not in NFC/NFKC. */
1293 warn_about_normalization (cpp_reader
*pfile
,
1294 const cpp_token
*token
,
1295 const struct normalize_state
*s
)
1297 if (CPP_OPTION (pfile
, warn_normalize
) < NORMALIZE_STATE_RESULT (s
)
1298 && !pfile
->state
.skipping
)
1300 /* Make sure that the token is printed using UCNs, even
1301 if we'd otherwise happily print UTF-8. */
1302 unsigned char *buf
= XNEWVEC (unsigned char, cpp_token_len (token
));
1305 sz
= cpp_spell_token (pfile
, token
, buf
, false) - buf
;
1306 if (NORMALIZE_STATE_RESULT (s
) == normalized_C
)
1307 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1308 "`%.*s' is not in NFKC", (int) sz
, buf
);
1310 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1311 "`%.*s' is not in NFC", (int) sz
, buf
);
1316 static const cppchar_t utf8_signifier
= 0xC0;
1318 /* Returns TRUE if the sequence starting at buffer->cur is valid in
1319 an identifier. FIRST is TRUE if this starts an identifier. */
1321 forms_identifier_p (cpp_reader
*pfile
, int first
,
1322 struct normalize_state
*state
)
1324 cpp_buffer
*buffer
= pfile
->buffer
;
1326 if (*buffer
->cur
== '$')
1328 if (!CPP_OPTION (pfile
, dollars_in_ident
))
1332 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
1334 CPP_OPTION (pfile
, warn_dollars
) = 0;
1335 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
1341 /* Is this a syntactically valid UCN or a valid UTF-8 char? */
1342 if (CPP_OPTION (pfile
, extended_identifiers
))
1345 if (*buffer
->cur
>= utf8_signifier
)
1347 if (_cpp_valid_utf8 (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1351 else if (*buffer
->cur
== '\\'
1352 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
1355 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1356 state
, &s
, NULL
, NULL
))
1365 /* Helper function to issue error about improper __VA_OPT__ use. */
1367 maybe_va_opt_error (cpp_reader
*pfile
)
1369 if (CPP_PEDANTIC (pfile
) && !CPP_OPTION (pfile
, va_opt
))
1371 /* __VA_OPT__ should not be accepted at all, but allow it in
1373 if (!_cpp_in_system_header (pfile
))
1374 cpp_error (pfile
, CPP_DL_PEDWARN
,
1375 "__VA_OPT__ is not available until C++20");
1377 else if (!pfile
->state
.va_args_ok
)
1379 /* __VA_OPT__ should only appear in the replacement list of a
1381 cpp_error (pfile
, CPP_DL_PEDWARN
,
1382 "__VA_OPT__ can only appear in the expansion"
1383 " of a C++20 variadic macro");
1387 /* Helper function to get the cpp_hashnode of the identifier BASE. */
1388 static cpp_hashnode
*
1389 lex_identifier_intern (cpp_reader
*pfile
, const uchar
*base
)
1391 cpp_hashnode
*result
;
1394 unsigned int hash
= HT_HASHSTEP (0, *base
);
1397 while (ISIDNUM (*cur
))
1399 hash
= HT_HASHSTEP (hash
, *cur
);
1403 hash
= HT_HASHFINISH (hash
, len
);
1404 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1405 base
, len
, hash
, HT_ALLOC
));
1407 /* Rarely, identifiers require diagnostics when lexed. */
1408 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1409 && !pfile
->state
.skipping
, 0))
1411 /* It is allowed to poison the same identifier twice. */
1412 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1413 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1414 NODE_NAME (result
));
1416 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1417 replacement list of a variadic macro. */
1418 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1419 && !pfile
->state
.va_args_ok
)
1421 if (CPP_OPTION (pfile
, cplusplus
))
1422 cpp_error (pfile
, CPP_DL_PEDWARN
,
1423 "__VA_ARGS__ can only appear in the expansion"
1424 " of a C++11 variadic macro");
1426 cpp_error (pfile
, CPP_DL_PEDWARN
,
1427 "__VA_ARGS__ can only appear in the expansion"
1428 " of a C99 variadic macro");
1431 if (result
== pfile
->spec_nodes
.n__VA_OPT__
)
1432 maybe_va_opt_error (pfile
);
1434 /* For -Wc++-compat, warn about use of C++ named operators. */
1435 if (result
->flags
& NODE_WARN_OPERATOR
)
1436 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1437 "identifier \"%s\" is a special operator name in C++",
1438 NODE_NAME (result
));
1444 /* Get the cpp_hashnode of an identifier specified by NAME in
1445 the current cpp_reader object. If none is found, NULL is returned. */
1447 _cpp_lex_identifier (cpp_reader
*pfile
, const char *name
)
1449 cpp_hashnode
*result
;
1450 result
= lex_identifier_intern (pfile
, (uchar
*) name
);
1454 /* Lex an identifier starting at BUFFER->CUR - 1. */
1455 static cpp_hashnode
*
1456 lex_identifier (cpp_reader
*pfile
, const uchar
*base
, bool starts_ucn
,
1457 struct normalize_state
*nst
, cpp_hashnode
**spelling
)
1459 cpp_hashnode
*result
;
1462 unsigned int hash
= HT_HASHSTEP (0, *base
);
1464 cur
= pfile
->buffer
->cur
;
1467 while (ISIDNUM (*cur
))
1469 hash
= HT_HASHSTEP (hash
, *cur
);
1472 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *(cur
- 1));
1474 pfile
->buffer
->cur
= cur
;
1475 if (starts_ucn
|| forms_identifier_p (pfile
, false, nst
))
1477 /* Slower version for identifiers containing UCNs
1478 or extended chars (including $). */
1480 while (ISIDNUM (*pfile
->buffer
->cur
))
1482 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *pfile
->buffer
->cur
);
1483 pfile
->buffer
->cur
++;
1485 } while (forms_identifier_p (pfile
, false, nst
));
1486 result
= _cpp_interpret_identifier (pfile
, base
,
1487 pfile
->buffer
->cur
- base
);
1488 *spelling
= cpp_lookup (pfile
, base
, pfile
->buffer
->cur
- base
);
1493 hash
= HT_HASHFINISH (hash
, len
);
1495 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1496 base
, len
, hash
, HT_ALLOC
));
1500 /* Rarely, identifiers require diagnostics when lexed. */
1501 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1502 && !pfile
->state
.skipping
, 0))
1504 /* It is allowed to poison the same identifier twice. */
1505 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1506 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1507 NODE_NAME (result
));
1509 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1510 replacement list of a variadic macro. */
1511 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1512 && !pfile
->state
.va_args_ok
)
1514 if (CPP_OPTION (pfile
, cplusplus
))
1515 cpp_error (pfile
, CPP_DL_PEDWARN
,
1516 "__VA_ARGS__ can only appear in the expansion"
1517 " of a C++11 variadic macro");
1519 cpp_error (pfile
, CPP_DL_PEDWARN
,
1520 "__VA_ARGS__ can only appear in the expansion"
1521 " of a C99 variadic macro");
1524 /* __VA_OPT__ should only appear in the replacement list of a
1526 if (result
== pfile
->spec_nodes
.n__VA_OPT__
)
1527 maybe_va_opt_error (pfile
);
1529 /* For -Wc++-compat, warn about use of C++ named operators. */
1530 if (result
->flags
& NODE_WARN_OPERATOR
)
1531 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1532 "identifier \"%s\" is a special operator name in C++",
1533 NODE_NAME (result
));
1539 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
1541 lex_number (cpp_reader
*pfile
, cpp_string
*number
,
1542 struct normalize_state
*nst
)
1548 base
= pfile
->buffer
->cur
- 1;
1551 const uchar
*adj_digit_sep
= NULL
;
1552 cur
= pfile
->buffer
->cur
;
1554 /* N.B. ISIDNUM does not include $. */
1555 while (ISIDNUM (*cur
)
1556 || (*cur
== '.' && !DIGIT_SEP (cur
[-1]))
1558 || (VALID_SIGN (*cur
, cur
[-1]) && !DIGIT_SEP (cur
[-2])))
1560 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *cur
);
1561 /* Adjacent digit separators do not form part of the pp-number syntax.
1562 However, they can safely be diagnosed here as an error, since '' is
1563 not a valid preprocessing token. */
1564 if (DIGIT_SEP (*cur
) && DIGIT_SEP (cur
[-1]) && !adj_digit_sep
)
1565 adj_digit_sep
= cur
;
1568 /* A number can't end with a digit separator. */
1569 while (cur
> pfile
->buffer
->cur
&& DIGIT_SEP (cur
[-1]))
1571 if (adj_digit_sep
&& adj_digit_sep
< cur
)
1572 cpp_error (pfile
, CPP_DL_ERROR
, "adjacent digit separators");
1574 pfile
->buffer
->cur
= cur
;
1576 while (forms_identifier_p (pfile
, false, nst
));
1578 number
->len
= cur
- base
;
1579 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
1580 memcpy (dest
, base
, number
->len
);
1581 dest
[number
->len
] = '\0';
1582 number
->text
= dest
;
1585 /* Create a token of type TYPE with a literal spelling. */
1587 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1588 unsigned int len
, enum cpp_ttype type
)
1591 token
->val
.str
.len
= len
;
1592 token
->val
.str
.text
= cpp_alloc_token_string (pfile
, base
, len
);
1596 cpp_alloc_token_string (cpp_reader
*pfile
,
1597 const unsigned char *ptr
, unsigned len
)
1599 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
1602 memcpy (dest
, ptr
, len
);
1606 /* A pair of raw buffer pointers. The currently open one is [1], the
1607 first one is [0]. Used for string literal lexing. */
1615 : first (NULL
), last (NULL
), rpos (0), accum (0)
1619 void append (cpp_reader
*, const uchar
*, size_t);
1621 void read_begin (cpp_reader
*);
1622 bool reading_p () const
1624 return rpos
!= NULL
;
1629 if (rpos
== BUFF_FRONT (last
))
1635 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1636 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1639 lit_accum::append (cpp_reader
*pfile
, const uchar
*base
, size_t len
)
1643 first
= last
= _cpp_get_buff (pfile
, len
);
1644 else if (len
> BUFF_ROOM (last
))
1646 /* There is insufficient room in the buffer. Copy what we can,
1647 and then either extend or create a new one. */
1648 size_t room
= BUFF_ROOM (last
);
1649 memcpy (BUFF_FRONT (last
), base
, room
);
1650 BUFF_FRONT (last
) += room
;
1655 gcc_checking_assert (!rpos
);
1657 last
= _cpp_append_extend_buff (pfile
, last
, len
);
1660 memcpy (BUFF_FRONT (last
), base
, len
);
1661 BUFF_FRONT (last
) += len
;
1666 lit_accum::read_begin (cpp_reader
*pfile
)
1668 /* We never accumulate more than 4 chars to read. */
1669 if (BUFF_ROOM (last
) < 4)
1671 last
= _cpp_append_extend_buff (pfile
, last
, 4);
1672 rpos
= BUFF_FRONT (last
);
1675 /* Returns true if a macro has been defined.
1676 This might not work if compile with -save-temps,
1677 or preprocess separately from compilation. */
1680 is_macro(cpp_reader
*pfile
, const uchar
*base
)
1682 const uchar
*cur
= base
;
1683 if (! ISIDST (*cur
))
1685 unsigned int hash
= HT_HASHSTEP (0, *cur
);
1687 while (ISIDNUM (*cur
))
1689 hash
= HT_HASHSTEP (hash
, *cur
);
1692 hash
= HT_HASHFINISH (hash
, cur
- base
);
1694 cpp_hashnode
*result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1695 base
, cur
- base
, hash
, HT_NO_INSERT
));
1697 return result
&& cpp_macro_p (result
);
1700 /* Returns true if a literal suffix does not have the expected form
1701 and is defined as a macro. */
1704 is_macro_not_literal_suffix(cpp_reader
*pfile
, const uchar
*base
)
1706 /* User-defined literals outside of namespace std must start with a single
1707 underscore, so assume anything of that form really is a UDL suffix.
1708 We don't need to worry about UDLs defined inside namespace std because
1709 their names are reserved, so cannot be used as macro names in valid
1711 if (base
[0] == '_' && base
[1] != '_')
1713 return is_macro (pfile
, base
);
1716 /* Lexes a raw string. The stored string contains the spelling,
1717 including double quotes, delimiter string, '(' and ')', any leading
1718 'L', 'u', 'U' or 'u8' and 'R' modifier. The created token contains
1719 the type of the literal, or CPP_OTHER if it was not properly
1722 BASE is the start of the token. Updates pfile->buffer->cur to just
1723 after the lexed string.
1725 The spelling is NUL-terminated, but it is not guaranteed that this
1726 is the first NUL since embedded NULs are preserved. */
1729 lex_raw_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
1731 const uchar
*pos
= base
;
1733 /* 'tis a pity this information isn't passed down from the lexer's
1734 initial categorization of the token. */
1735 enum cpp_ttype type
= CPP_STRING
;
1742 else if (*pos
== 'U')
1744 type
= CPP_STRING32
;
1747 else if (*pos
== 'u')
1751 type
= CPP_UTF8STRING
;
1755 type
= CPP_STRING16
;
1759 gcc_checking_assert (pos
[0] == 'R' && pos
[1] == '"');
1762 _cpp_line_note
*note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1764 /* Skip notes before the ". */
1765 while (note
->pos
< pos
)
1771 unsigned prefix_len
= 0;
1777 } phase
= PHASE_PREFIX
;
1781 gcc_checking_assert (note
->pos
>= pos
);
1783 /* Undo any escaped newlines and trigraphs. */
1784 if (!accum
.reading_p () && note
->pos
== pos
)
1789 /* Restore backslash followed by newline. */
1790 accum
.append (pfile
, base
, pos
- base
);
1792 accum
.read_begin (pfile
);
1793 accum
.append (pfile
, UC
"\\", 1);
1796 if (note
->type
== ' ')
1797 /* GNU backslash whitespace newline extension. FIXME
1798 could be any sequence of non-vertical space. When we
1799 can properly restore any such sequence, we should
1800 mark this note as handled so _cpp_process_line_notes
1802 accum
.append (pfile
, UC
" ", 1);
1804 accum
.append (pfile
, UC
"\n", 1);
1809 /* This can happen for ??/<NEWLINE> when trigraphs are not
1810 being interpretted. */
1811 gcc_checking_assert (!CPP_OPTION (pfile
, trigraphs
));
1817 gcc_checking_assert (_cpp_trigraph_map
[note
->type
]);
1819 /* Don't warn about this trigraph in
1820 _cpp_process_line_notes, since trigraphs show up as
1821 trigraphs in raw strings. */
1822 uchar type
= note
->type
;
1825 if (CPP_OPTION (pfile
, trigraphs
))
1827 accum
.append (pfile
, base
, pos
- base
);
1829 accum
.read_begin (pfile
);
1830 accum
.append (pfile
, UC
"??", 2);
1831 accum
.append (pfile
, &type
, 1);
1833 /* ??/ followed by newline gets two line notes, one for
1834 the trigraph and one for the backslash/newline. */
1835 if (type
== '/' && note
[1].pos
== pos
)
1838 gcc_assert (note
->type
== '\\' || note
->type
== ' ');
1839 goto after_backslash
;
1841 /* Skip the replacement character. */
1849 /* Now get a char to process. Either from an expanded note, or
1850 from the line buffer. */
1851 bool read_note
= accum
.reading_p ();
1852 char c
= read_note
? accum
.read_char () : *pos
++;
1854 if (phase
== PHASE_PREFIX
)
1860 prefix
[prefix_len
++] = '"';
1862 else if (prefix_len
< 16
1863 /* Prefix chars are any of the basic character set,
1864 [lex.charset] except for '
1865 ()\\\t\v\f\n'. Optimized for a contiguous
1867 /* Unlike a switch, this collapses down to one or
1868 two shift and bitmask operations on an ASCII
1869 system, with an outlier or two. */
1870 && (('Z' - 'A' == 25
1871 ? ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z'))
1873 || (c
>= '0' && c
<= '9')
1874 || c
== '_' || c
== '{' || c
== '}'
1875 || c
== '[' || c
== ']' || c
== '#'
1876 || c
== '<' || c
== '>' || c
== '%'
1877 || c
== ':' || c
== ';' || c
== '.' || c
== '?'
1878 || c
== '*' || c
== '+' || c
== '-' || c
== '/'
1879 || c
== '^' || c
== '&' || c
== '|' || c
== '~'
1880 || c
== '!' || c
== '=' || c
== ','
1881 || c
== '"' || c
== '\''))
1882 prefix
[prefix_len
++] = c
;
1885 /* Something is wrong. */
1886 int col
= CPP_BUF_COLUMN (pfile
->buffer
, pos
) + read_note
;
1887 if (prefix_len
== 16)
1888 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1889 col
, "raw string delimiter longer "
1890 "than 16 characters");
1892 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1893 col
, "invalid new-line in raw "
1894 "string delimiter");
1896 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1897 col
, "invalid character '%c' in "
1898 "raw string delimiter", c
);
1901 /* Continue until we get a close quote, that's probably
1902 the best failure mode. */
1909 if (phase
!= PHASE_NONE
)
1911 if (prefix
[phase
] != c
)
1913 else if (unsigned (phase
+ 1) == prefix_len
)
1917 phase
= Phase (phase
+ 1);
1922 if (!prefix_len
&& c
== '"')
1923 /* Failure mode lexing. */
1925 else if (prefix_len
&& c
== ')')
1926 phase
= PHASE_SUFFIX
;
1927 else if (!read_note
&& c
== '\n')
1930 pfile
->buffer
->cur
= pos
;
1931 if (pfile
->state
.in_directive
1932 || (pfile
->state
.parsing_args
1933 && pfile
->buffer
->next_line
>= pfile
->buffer
->rlimit
))
1935 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, 0,
1936 "unterminated raw string");
1941 accum
.append (pfile
, base
, pos
- base
+ 1);
1942 _cpp_process_line_notes (pfile
, false);
1944 if (pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
1945 CPP_INCREMENT_LINE (pfile
, 0);
1946 pfile
->buffer
->need_line
= true;
1948 if (!_cpp_get_fresh_line (pfile
))
1950 /* We ran out of file and failed to get a line. */
1951 location_t src_loc
= token
->src_loc
;
1952 token
->type
= CPP_EOF
;
1953 /* Tell the compiler the line number of the EOF token. */
1954 token
->src_loc
= pfile
->line_table
->highest_line
;
1957 _cpp_release_buff (pfile
, accum
.first
);
1958 cpp_error_with_line (pfile
, CPP_DL_ERROR
, src_loc
, 0,
1959 "unterminated raw string");
1960 /* Now pop the buffer that _cpp_get_fresh_line did not. */
1961 _cpp_pop_buffer (pfile
);
1965 pos
= base
= pfile
->buffer
->cur
;
1966 note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1970 if (CPP_OPTION (pfile
, user_literals
))
1972 /* If a string format macro, say from inttypes.h, is placed touching
1973 a string literal it could be parsed as a C++11 user-defined string
1974 literal thus breaking the program. */
1975 if (is_macro_not_literal_suffix (pfile
, pos
))
1977 /* Raise a warning, but do not consume subsequent tokens. */
1978 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
1979 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1981 "invalid suffix on literal; C++11 requires "
1982 "a space between literal and string macro");
1984 /* Grab user defined literal suffix. */
1985 else if (ISIDST (*pos
))
1987 type
= cpp_userdef_string_add_type (type
);
1990 while (ISIDNUM (*pos
))
1996 pfile
->buffer
->cur
= pos
;
1998 create_literal (pfile
, token
, base
, pos
- base
, type
);
2001 size_t extra_len
= pos
- base
;
2002 uchar
*dest
= _cpp_unaligned_alloc (pfile
, accum
.accum
+ extra_len
+ 1);
2005 token
->val
.str
.len
= accum
.accum
+ extra_len
;
2006 token
->val
.str
.text
= dest
;
2007 for (_cpp_buff
*buf
= accum
.first
; buf
; buf
= buf
->next
)
2009 size_t len
= BUFF_FRONT (buf
) - buf
->base
;
2010 memcpy (dest
, buf
->base
, len
);
2013 _cpp_release_buff (pfile
, accum
.first
);
2014 memcpy (dest
, base
, extra_len
);
2015 dest
[extra_len
] = '\0';
2019 /* Lexes a string, character constant, or angle-bracketed header file
2020 name. The stored string contains the spelling, including opening
2021 quote and any leading 'L', 'u', 'U' or 'u8' and optional
2022 'R' modifier. It returns the type of the literal, or CPP_OTHER
2023 if it was not properly terminated, or CPP_LESS for an unterminated
2024 header name which must be relexed as normal tokens.
2026 The spelling is NUL-terminated, but it is not guaranteed that this
2027 is the first NUL since embedded NULs are preserved. */
2029 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
2031 bool saw_NUL
= false;
2033 cppchar_t terminator
;
2034 enum cpp_ttype type
;
2037 terminator
= *cur
++;
2038 if (terminator
== 'L' || terminator
== 'U')
2039 terminator
= *cur
++;
2040 else if (terminator
== 'u')
2042 terminator
= *cur
++;
2043 if (terminator
== '8')
2044 terminator
= *cur
++;
2046 if (terminator
== 'R')
2048 lex_raw_string (pfile
, token
, base
);
2051 if (terminator
== '"')
2052 type
= (*base
== 'L' ? CPP_WSTRING
:
2053 *base
== 'U' ? CPP_STRING32
:
2054 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
2056 else if (terminator
== '\'')
2057 type
= (*base
== 'L' ? CPP_WCHAR
:
2058 *base
== 'U' ? CPP_CHAR32
:
2059 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8CHAR
: CPP_CHAR16
)
2062 terminator
= '>', type
= CPP_HEADER_NAME
;
2066 cppchar_t c
= *cur
++;
2068 /* In #include-style directives, terminators are not escapable. */
2069 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
2071 else if (c
== terminator
)
2076 /* Unmatched quotes always yield undefined behavior, but
2077 greedy lexing means that what appears to be an unterminated
2078 header name may actually be a legitimate sequence of tokens. */
2079 if (terminator
== '>')
2081 token
->type
= CPP_LESS
;
2091 if (saw_NUL
&& !pfile
->state
.skipping
)
2092 cpp_error (pfile
, CPP_DL_WARNING
,
2093 "null character(s) preserved in literal");
2095 if (type
== CPP_OTHER
&& CPP_OPTION (pfile
, lang
) != CLK_ASM
)
2096 cpp_error (pfile
, CPP_DL_PEDWARN
, "missing terminating %c character",
2099 if (CPP_OPTION (pfile
, user_literals
))
2101 /* If a string format macro, say from inttypes.h, is placed touching
2102 a string literal it could be parsed as a C++11 user-defined string
2103 literal thus breaking the program. */
2104 if (is_macro_not_literal_suffix (pfile
, cur
))
2106 /* Raise a warning, but do not consume subsequent tokens. */
2107 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
2108 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
2110 "invalid suffix on literal; C++11 requires "
2111 "a space between literal and string macro");
2113 /* Grab user defined literal suffix. */
2114 else if (ISIDST (*cur
))
2116 type
= cpp_userdef_char_add_type (type
);
2117 type
= cpp_userdef_string_add_type (type
);
2120 while (ISIDNUM (*cur
))
2124 else if (CPP_OPTION (pfile
, cpp_warn_cxx11_compat
)
2125 && is_macro (pfile
, cur
)
2126 && !pfile
->state
.skipping
)
2127 cpp_warning_with_line (pfile
, CPP_W_CXX11_COMPAT
,
2128 token
->src_loc
, 0, "C++11 requires a space "
2129 "between string literal and macro");
2131 pfile
->buffer
->cur
= cur
;
2132 create_literal (pfile
, token
, base
, cur
- base
, type
);
2135 /* Return the comment table. The client may not make any assumption
2136 about the ordering of the table. */
2138 cpp_get_comments (cpp_reader
*pfile
)
2140 return &pfile
->comments
;
2143 /* Append a comment to the end of the comment table. */
2145 store_comment (cpp_reader
*pfile
, cpp_token
*token
)
2149 if (pfile
->comments
.allocated
== 0)
2151 pfile
->comments
.allocated
= 256;
2152 pfile
->comments
.entries
= (cpp_comment
*) xmalloc
2153 (pfile
->comments
.allocated
* sizeof (cpp_comment
));
2156 if (pfile
->comments
.count
== pfile
->comments
.allocated
)
2158 pfile
->comments
.allocated
*= 2;
2159 pfile
->comments
.entries
= (cpp_comment
*) xrealloc
2160 (pfile
->comments
.entries
,
2161 pfile
->comments
.allocated
* sizeof (cpp_comment
));
2164 len
= token
->val
.str
.len
;
2166 /* Copy comment. Note, token may not be NULL terminated. */
2167 pfile
->comments
.entries
[pfile
->comments
.count
].comment
=
2168 (char *) xmalloc (sizeof (char) * (len
+ 1));
2169 memcpy (pfile
->comments
.entries
[pfile
->comments
.count
].comment
,
2170 token
->val
.str
.text
, len
);
2171 pfile
->comments
.entries
[pfile
->comments
.count
].comment
[len
] = '\0';
2173 /* Set source location. */
2174 pfile
->comments
.entries
[pfile
->comments
.count
].sloc
= token
->src_loc
;
2176 /* Increment the count of entries in the comment table. */
2177 pfile
->comments
.count
++;
2180 /* The stored comment includes the comment start and any terminator. */
2182 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
2185 unsigned char *buffer
;
2186 unsigned int len
, clen
, i
;
2188 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
2190 /* C++ comments probably (not definitely) have moved past a new
2191 line, which we don't want to save in the comment. */
2192 if (is_vspace (pfile
->buffer
->cur
[-1]))
2195 /* If we are currently in a directive or in argument parsing, then
2196 we need to store all C++ comments as C comments internally, and
2197 so we need to allocate a little extra space in that case.
2199 Note that the only time we encounter a directive here is
2200 when we are saving comments in a "#define". */
2201 clen
= ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
)
2202 && type
== '/') ? len
+ 2 : len
;
2204 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
2206 token
->type
= CPP_COMMENT
;
2207 token
->val
.str
.len
= clen
;
2208 token
->val
.str
.text
= buffer
;
2211 memcpy (buffer
+ 1, from
, len
- 1);
2213 /* Finish conversion to a C comment, if necessary. */
2214 if ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
) && type
== '/')
2217 buffer
[clen
- 2] = '*';
2218 buffer
[clen
- 1] = '/';
2219 /* As there can be in a C++ comments illegal sequences for C comments
2220 we need to filter them out. */
2221 for (i
= 2; i
< (clen
- 2); i
++)
2222 if (buffer
[i
] == '/' && (buffer
[i
- 1] == '*' || buffer
[i
+ 1] == '*'))
2226 /* Finally store this comment for use by clients of libcpp. */
2227 store_comment (pfile
, token
);
2230 /* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
2234 fallthrough_comment_p (cpp_reader
*pfile
, const unsigned char *comment_start
)
2236 const unsigned char *from
= comment_start
+ 1;
2238 switch (CPP_OPTION (pfile
, cpp_warn_implicit_fallthrough
))
2240 /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
2241 don't recognize any comments. The latter only checks attributes,
2242 the former doesn't warn. */
2246 /* -Wimplicit-fallthrough=1 considers any comment, no matter what
2251 /* -Wimplicit-fallthrough=2 looks for (case insensitive)
2252 .*falls?[ \t-]*thr(u|ough).* regex. */
2253 for (; (size_t) (pfile
->buffer
->cur
- from
) >= sizeof "fallthru" - 1;
2256 /* Is there anything like strpbrk with upper boundary, or
2257 memchr looking for 2 characters rather than just one? */
2258 if (from
[0] != 'f' && from
[0] != 'F')
2260 if (from
[1] != 'a' && from
[1] != 'A')
2262 if (from
[2] != 'l' && from
[2] != 'L')
2264 if (from
[3] != 'l' && from
[3] != 'L')
2266 from
+= sizeof "fall" - 1;
2267 if (from
[0] == 's' || from
[0] == 'S')
2269 while (*from
== ' ' || *from
== '\t' || *from
== '-')
2271 if (from
[0] != 't' && from
[0] != 'T')
2273 if (from
[1] != 'h' && from
[1] != 'H')
2275 if (from
[2] != 'r' && from
[2] != 'R')
2277 if (from
[3] == 'u' || from
[3] == 'U')
2279 if (from
[3] != 'o' && from
[3] != 'O')
2281 if (from
[4] != 'u' && from
[4] != 'U')
2283 if (from
[5] != 'g' && from
[5] != 'G')
2285 if (from
[6] != 'h' && from
[6] != 'H')
2295 /* Whole comment contents:
2299 if (*from
== '-' || *from
== '@')
2301 size_t len
= sizeof "fallthrough" - 1;
2302 if ((size_t) (pfile
->buffer
->cur
- from
- 1) < len
)
2304 if (memcmp (from
+ 1, "fallthrough", len
))
2308 if (from
[len
+ 1] != '@')
2314 /* Whole comment contents (regex):
2315 lint -fallthrough[ \t]*
2317 else if (*from
== 'l')
2319 size_t len
= sizeof "int -fallthrough" - 1;
2320 if ((size_t) (pfile
->buffer
->cur
- from
- 1) < len
)
2322 if (memcmp (from
+ 1, "int -fallthrough", len
))
2325 while (*from
== ' ' || *from
== '\t')
2328 /* Whole comment contents (regex):
2329 [ \t]*FALLTHR(U|OUGH)[ \t]*
2331 else if (CPP_OPTION (pfile
, cpp_warn_implicit_fallthrough
) == 4)
2333 while (*from
== ' ' || *from
== '\t')
2335 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "FALLTHRU" - 1)
2337 if (memcmp (from
, "FALLTHR", sizeof "FALLTHR" - 1))
2339 from
+= sizeof "FALLTHR" - 1;
2342 else if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "OUGH" - 1)
2344 else if (memcmp (from
, "OUGH", sizeof "OUGH" - 1))
2347 from
+= sizeof "OUGH" - 1;
2348 while (*from
== ' ' || *from
== '\t')
2351 /* Whole comment contents (regex):
2352 [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
2353 [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
2354 [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
2358 while (*from
== ' ' || *from
== '\t' || *from
== '.' || *from
== '!')
2360 unsigned char f
= *from
;
2361 bool all_upper
= false;
2362 if (f
== 'E' || f
== 'e')
2364 if ((size_t) (pfile
->buffer
->cur
- from
)
2365 < sizeof "else fallthru" - 1)
2367 if (f
== 'E' && memcmp (from
+ 1, "LSE", sizeof "LSE" - 1) == 0)
2369 else if (memcmp (from
+ 1, "lse", sizeof "lse" - 1))
2371 from
+= sizeof "else" - 1;
2377 if (all_upper
&& *from
== 'f')
2379 if (f
== 'e' && *from
== 'F')
2383 else if (f
== 'I' || f
== 'i')
2385 if ((size_t) (pfile
->buffer
->cur
- from
)
2386 < sizeof "intentional fallthru" - 1)
2388 if (f
== 'I' && memcmp (from
+ 1, "NTENTIONAL",
2389 sizeof "NTENTIONAL" - 1) == 0)
2391 else if (memcmp (from
+ 1, "ntentional",
2392 sizeof "ntentional" - 1))
2394 from
+= sizeof "intentional" - 1;
2398 if (all_upper
&& *from
== 'f')
2403 if (memcmp (from
, "LY F", sizeof "LY F" - 1))
2405 from
+= sizeof "LY " - 1;
2409 if (memcmp (from
, "ly ", sizeof "ly " - 1))
2411 from
+= sizeof "ly " - 1;
2413 if (f
== 'i' && *from
== 'F')
2417 if (f
!= 'F' && f
!= 'f')
2419 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "fallthru" - 1)
2421 if (f
== 'F' && memcmp (from
+ 1, "ALL", sizeof "ALL" - 1) == 0)
2425 else if (memcmp (from
+ 1, "all", sizeof "all" - 1))
2427 from
+= sizeof "fall" - 1;
2428 if (*from
== (all_upper
? 'S' : 's') && from
[1] == ' ')
2430 else if (*from
== ' ' || *from
== '-')
2432 else if (*from
!= (all_upper
? 'T' : 't'))
2434 if ((f
== 'f' || *from
!= 'T') && (all_upper
|| *from
!= 't'))
2436 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "thru" - 1)
2438 if (memcmp (from
+ 1, all_upper
? "HRU" : "hru", sizeof "hru" - 1))
2440 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "through" - 1)
2442 if (memcmp (from
+ 1, all_upper
? "HROUGH" : "hrough",
2443 sizeof "hrough" - 1))
2445 from
+= sizeof "through" - 1;
2448 from
+= sizeof "thru" - 1;
2449 while (*from
== ' ' || *from
== '\t' || *from
== '.' || *from
== '!')
2454 if (*comment_start
== '*')
2458 while (*from
&& *from
!= '*'
2459 && *from
!= '\n' && *from
!= '\r')
2461 if (*from
!= '*' || from
[1] == '/')
2468 while (*from
&& *from
!= '\n' && *from
!= '\r')
2472 /* C block comment. */
2473 if (*comment_start
== '*')
2475 if (*from
!= '*' || from
[1] != '/')
2478 /* C++ line comment. */
2479 else if (*from
!= '\n')
2485 /* Allocate COUNT tokens for RUN. */
2487 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
2489 run
->base
= XNEWVEC (cpp_token
, count
);
2490 run
->limit
= run
->base
+ count
;
2494 /* Returns the next tokenrun, or creates one if there is none. */
2496 next_tokenrun (tokenrun
*run
)
2498 if (run
->next
== NULL
)
2500 run
->next
= XNEW (tokenrun
);
2501 run
->next
->prev
= run
;
2502 _cpp_init_tokenrun (run
->next
, 250);
2508 /* Return the number of not yet processed token in a given
2511 _cpp_remaining_tokens_num_in_context (cpp_context
*context
)
2513 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
2514 return (LAST (context
).token
- FIRST (context
).token
);
2515 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
2516 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
2517 return (LAST (context
).ptoken
- FIRST (context
).ptoken
);
2522 /* Returns the token present at index INDEX in a given context. If
2523 INDEX is zero, the next token to be processed is returned. */
2524 static const cpp_token
*
2525 _cpp_token_from_context_at (cpp_context
*context
, int index
)
2527 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
2528 return &(FIRST (context
).token
[index
]);
2529 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
2530 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
2531 return FIRST (context
).ptoken
[index
];
2536 /* Look ahead in the input stream. */
2538 cpp_peek_token (cpp_reader
*pfile
, int index
)
2540 cpp_context
*context
= pfile
->context
;
2541 const cpp_token
*peektok
;
2544 /* First, scan through any pending cpp_context objects. */
2545 while (context
->prev
)
2547 ptrdiff_t sz
= _cpp_remaining_tokens_num_in_context (context
);
2549 if (index
< (int) sz
)
2550 return _cpp_token_from_context_at (context
, index
);
2552 context
= context
->prev
;
2555 /* We will have to read some new tokens after all (and do so
2556 without invalidating preceding tokens). */
2558 pfile
->keep_tokens
++;
2560 /* For peeked tokens temporarily disable line_change reporting,
2561 until the tokens are parsed for real. */
2562 void (*line_change
) (cpp_reader
*, const cpp_token
*, int)
2563 = pfile
->cb
.line_change
;
2564 pfile
->cb
.line_change
= NULL
;
2568 peektok
= _cpp_lex_token (pfile
);
2569 if (peektok
->type
== CPP_EOF
)
2574 else if (peektok
->type
== CPP_PRAGMA
)
2576 /* Don't peek past a pragma. */
2577 if (peektok
== &pfile
->directive_result
)
2578 /* Save the pragma in the buffer. */
2579 *pfile
->cur_token
++ = *peektok
;
2586 _cpp_backup_tokens_direct (pfile
, count
- index
);
2587 pfile
->keep_tokens
--;
2588 pfile
->cb
.line_change
= line_change
;
2593 /* Allocate a single token that is invalidated at the same time as the
2594 rest of the tokens on the line. Has its line and col set to the
2595 same as the last lexed token, so that diagnostics appear in the
2598 _cpp_temp_token (cpp_reader
*pfile
)
2600 cpp_token
*old
, *result
;
2601 ptrdiff_t sz
= pfile
->cur_run
->limit
- pfile
->cur_token
;
2602 ptrdiff_t la
= (ptrdiff_t) pfile
->lookaheads
;
2604 old
= pfile
->cur_token
- 1;
2605 /* Any pre-existing lookaheads must not be clobbered. */
2610 tokenrun
*next
= next_tokenrun (pfile
->cur_run
);
2613 memmove (next
->base
+ 1, next
->base
,
2614 (la
- sz
) * sizeof (cpp_token
));
2616 next
->base
[0] = pfile
->cur_run
->limit
[-1];
2620 memmove (pfile
->cur_token
+ 1, pfile
->cur_token
,
2621 MIN (la
, sz
- 1) * sizeof (cpp_token
));
2624 if (!sz
&& pfile
->cur_token
== pfile
->cur_run
->limit
)
2626 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2627 pfile
->cur_token
= pfile
->cur_run
->base
;
2630 result
= pfile
->cur_token
++;
2631 result
->src_loc
= old
->src_loc
;
2635 /* We're at the beginning of a logical line (so not in
2636 directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set. See
2637 if we should enter deferred_pragma mode to tokenize the rest of the
2638 line as a module control-line. */
2641 cpp_maybe_module_directive (cpp_reader
*pfile
, cpp_token
*result
)
2643 unsigned backup
= 0; /* Tokens we peeked. */
2644 cpp_hashnode
*node
= result
->val
.node
.node
;
2645 cpp_token
*peek
= result
;
2646 cpp_token
*keyword
= peek
;
2647 cpp_hashnode
*(&n_modules
)[spec_nodes::M_HWM
][2] = pfile
->spec_nodes
.n_modules
;
2648 int header_count
= 0;
2650 /* Make sure the incoming state is as we expect it. This way we
2651 can restore it using constants. */
2652 gcc_checking_assert (!pfile
->state
.in_deferred_pragma
2653 && !pfile
->state
.skipping
2654 && !pfile
->state
.parsing_args
2655 && !pfile
->state
.angled_headers
2656 && (pfile
->state
.save_comments
2657 == !CPP_OPTION (pfile
, discard_comments
)));
2659 /* Enter directives mode sufficiently for peeking. We don't have
2660 to actually set in_directive. */
2661 pfile
->state
.in_deferred_pragma
= true;
2663 /* These two fields are needed to process tokenization in deferred
2664 pragma mode. They are not used outside deferred pragma mode or
2666 pfile
->state
.pragma_allow_expansion
= true;
2667 pfile
->directive_line
= result
->src_loc
;
2669 /* Saving comments is incompatible with directives mode. */
2670 pfile
->state
.save_comments
= 0;
2672 if (node
== n_modules
[spec_nodes::M_EXPORT
][0])
2674 peek
= _cpp_lex_direct (pfile
);
2677 if (keyword
->type
!= CPP_NAME
)
2679 node
= keyword
->val
.node
.node
;
2680 if (!(node
->flags
& NODE_MODULE
))
2684 if (node
== n_modules
[spec_nodes::M__IMPORT
][0])
2686 header_count
= backup
+ 2 + 16;
2687 else if (node
== n_modules
[spec_nodes::M_IMPORT
][0])
2689 header_count
= backup
+ 2 + (CPP_OPTION (pfile
, preprocessed
) ? 16 : 0);
2690 else if (node
== n_modules
[spec_nodes::M_MODULE
][0])
2695 /* We've seen [export] {module|import|__import}. Check the next token. */
2697 /* After '{,__}import' a header name may appear. */
2698 pfile
->state
.angled_headers
= true;
2699 peek
= _cpp_lex_direct (pfile
);
2702 /* ... import followed by identifier, ':', '<' or
2703 header-name preprocessing tokens, or module
2704 followed by cpp-identifier, ':' or ';' preprocessing
2705 tokens. C++ keywords are not yet relevant. */
2706 if (peek
->type
== CPP_NAME
2707 || peek
->type
== CPP_COLON
2709 ? (peek
->type
== CPP_LESS
2710 || (peek
->type
== CPP_STRING
&& peek
->val
.str
.text
[0] != 'R')
2711 || peek
->type
== CPP_HEADER_NAME
)
2712 : peek
->type
== CPP_SEMICOLON
))
2714 pfile
->state
.pragma_allow_expansion
= !CPP_OPTION (pfile
, preprocessed
);
2715 if (!pfile
->state
.pragma_allow_expansion
)
2716 pfile
->state
.prevent_expansion
++;
2718 if (!header_count
&& linemap_included_from
2719 (LINEMAPS_LAST_ORDINARY_MAP (pfile
->line_table
)))
2720 cpp_error_with_line (pfile
, CPP_DL_ERROR
, keyword
->src_loc
, 0,
2721 "module control-line cannot be in included file");
2723 /* The first one or two tokens cannot be macro names. */
2724 for (int ix
= backup
; ix
--;)
2726 cpp_token
*tok
= ix
? keyword
: result
;
2727 cpp_hashnode
*node
= tok
->val
.node
.node
;
2729 /* Don't attempt to expand the token. */
2730 tok
->flags
|= NO_EXPAND
;
2731 if (_cpp_defined_macro_p (node
)
2732 && _cpp_maybe_notify_macro_use (pfile
, node
, tok
->src_loc
)
2733 && !cpp_fun_like_macro_p (node
))
2734 cpp_error_with_line (pfile
, CPP_DL_ERROR
, tok
->src_loc
, 0,
2735 "module control-line \"%s\" cannot be"
2736 " an object-like macro",
2740 /* Map to underbar variants. */
2741 keyword
->val
.node
.node
= n_modules
[header_count
2742 ? spec_nodes::M_IMPORT
2743 : spec_nodes::M_MODULE
][1];
2745 result
->val
.node
.node
= n_modules
[spec_nodes::M_EXPORT
][1];
2747 /* Maybe tell the tokenizer we expect a header-name down the
2749 pfile
->state
.directive_file_token
= header_count
;
2754 /* Drop out of directive mode. */
2755 /* We aaserted save_comments had this value upon entry. */
2756 pfile
->state
.save_comments
2757 = !CPP_OPTION (pfile
, discard_comments
);
2758 pfile
->state
.in_deferred_pragma
= false;
2759 /* Do not let this remain on. */
2760 pfile
->state
.angled_headers
= false;
2763 /* In either case we want to backup the peeked tokens. */
2766 /* If we saw EOL, we should drop it, because this isn't a module
2767 control-line after all. */
2768 bool eol
= peek
->type
== CPP_PRAGMA_EOL
;
2769 if (!eol
|| backup
> 1)
2771 /* Put put the peeked tokens back */
2772 _cpp_backup_tokens_direct (pfile
, backup
);
2773 /* But if the last one was an EOL, forget it. */
2775 pfile
->lookaheads
--;
2780 /* Lex a token into RESULT (external interface). Takes care of issues
2781 like directive handling, token lookahead, multiple include
2782 optimization and skipping. */
2784 _cpp_lex_token (cpp_reader
*pfile
)
2790 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
2792 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2793 pfile
->cur_token
= pfile
->cur_run
->base
;
2795 /* We assume that the current token is somewhere in the current
2797 if (pfile
->cur_token
< pfile
->cur_run
->base
2798 || pfile
->cur_token
>= pfile
->cur_run
->limit
)
2801 if (pfile
->lookaheads
)
2803 pfile
->lookaheads
--;
2804 result
= pfile
->cur_token
++;
2807 result
= _cpp_lex_direct (pfile
);
2809 if (result
->flags
& BOL
)
2811 /* Is this a directive. If _cpp_handle_directive returns
2812 false, it is an assembler #. */
2813 if (result
->type
== CPP_HASH
2814 /* 6.10.3 p 11: Directives in a list of macro arguments
2815 gives undefined behavior. This implementation
2816 handles the directive as normal. */
2817 && pfile
->state
.parsing_args
!= 1)
2819 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
2821 if (pfile
->directive_result
.type
== CPP_PADDING
)
2823 result
= &pfile
->directive_result
;
2826 else if (pfile
->state
.in_deferred_pragma
)
2827 result
= &pfile
->directive_result
;
2828 else if (result
->type
== CPP_NAME
2829 && (result
->val
.node
.node
->flags
& NODE_MODULE
)
2830 && !pfile
->state
.skipping
2831 /* Unlike regular directives, we do not deal with
2832 tokenizing module directives as macro arguments.
2833 That's not permitted. */
2834 && !pfile
->state
.parsing_args
)
2836 /* P1857. Before macro expansion, At start of logical
2838 /* We don't have to consider lookaheads at this point. */
2839 gcc_checking_assert (!pfile
->lookaheads
);
2841 cpp_maybe_module_directive (pfile
, result
);
2844 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
2845 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
2848 /* We don't skip tokens in directives. */
2849 if (pfile
->state
.in_directive
|| pfile
->state
.in_deferred_pragma
)
2852 /* Outside a directive, invalidate controlling macros. At file
2853 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2854 get here and MI optimization works. */
2855 pfile
->mi_valid
= false;
2857 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
2864 /* Returns true if a fresh line has been loaded. */
2866 _cpp_get_fresh_line (cpp_reader
*pfile
)
2868 /* We can't get a new line until we leave the current directive. */
2869 if (pfile
->state
.in_directive
)
2874 cpp_buffer
*buffer
= pfile
->buffer
;
2876 if (!buffer
->need_line
)
2879 if (buffer
->next_line
< buffer
->rlimit
)
2881 _cpp_clean_line (pfile
);
2885 /* First, get out of parsing arguments state. */
2886 if (pfile
->state
.parsing_args
)
2889 /* End of buffer. Non-empty files should end in a newline. */
2890 if (buffer
->buf
!= buffer
->rlimit
2891 && buffer
->next_line
> buffer
->rlimit
2892 && !buffer
->from_stage3
)
2894 /* Clip to buffer size. */
2895 buffer
->next_line
= buffer
->rlimit
;
2898 if (buffer
->prev
&& !buffer
->return_at_eof
)
2899 _cpp_pop_buffer (pfile
);
2902 /* End of translation. Do not pop the buffer yet. Increment
2903 line number so that the EOF token is on a line of its own
2904 (_cpp_lex_direct doesn't increment in that case, because
2905 it's hard for it to distinguish this special case). */
2906 CPP_INCREMENT_LINE (pfile
, 0);
2912 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2915 result->type = ELSE_TYPE; \
2916 if (*buffer->cur == CHAR) \
2917 buffer->cur++, result->type = THEN_TYPE; \
2921 /* Lex a token into pfile->cur_token, which is also incremented, to
2922 get diagnostics pointing to the correct location.
2924 Does not handle issues such as token lookahead, multiple-include
2925 optimization, directives, skipping etc. This function is only
2926 suitable for use by _cpp_lex_token, and in special cases like
2927 lex_expansion_token which doesn't care for any of these issues.
2929 When meeting a newline, returns CPP_EOF if parsing a directive,
2930 otherwise returns to the start of the token buffer if permissible.
2931 Returns the location of the lexed token. */
2933 _cpp_lex_direct (cpp_reader
*pfile
)
2937 const unsigned char *comment_start
;
2938 bool fallthrough_comment
= false;
2939 cpp_token
*result
= pfile
->cur_token
++;
2943 buffer
= pfile
->buffer
;
2944 if (buffer
->need_line
)
2946 gcc_assert (!pfile
->state
.in_deferred_pragma
);
2947 if (!_cpp_get_fresh_line (pfile
))
2949 result
->type
= CPP_EOF
;
2950 /* Not a real EOF in a directive or arg parsing -- we refuse
2951 to advance to the next file now, and will once we're out
2953 if (!pfile
->state
.in_directive
&& !pfile
->state
.parsing_args
)
2955 /* Tell the compiler the line number of the EOF token. */
2956 result
->src_loc
= pfile
->line_table
->highest_line
;
2957 result
->flags
= BOL
;
2958 /* Now pop the buffer that _cpp_get_fresh_line did not. */
2959 _cpp_pop_buffer (pfile
);
2963 if (buffer
!= pfile
->buffer
)
2964 fallthrough_comment
= false;
2965 if (!pfile
->keep_tokens
)
2967 pfile
->cur_run
= &pfile
->base_run
;
2968 result
= pfile
->base_run
.base
;
2969 pfile
->cur_token
= result
+ 1;
2971 result
->flags
= BOL
;
2972 if (pfile
->state
.parsing_args
== 2)
2973 result
->flags
|= PREV_WHITE
;
2975 buffer
= pfile
->buffer
;
2977 result
->src_loc
= pfile
->line_table
->highest_line
;
2980 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
2981 && !pfile
->overlaid_buffer
)
2983 _cpp_process_line_notes (pfile
, false);
2984 result
->src_loc
= pfile
->line_table
->highest_line
;
2988 if (pfile
->forced_token_location
)
2989 result
->src_loc
= pfile
->forced_token_location
;
2991 result
->src_loc
= linemap_position_for_column (pfile
->line_table
,
2992 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
2996 case ' ': case '\t': case '\f': case '\v': case '\0':
2997 result
->flags
|= PREV_WHITE
;
2998 skip_whitespace (pfile
, c
);
3002 /* Increment the line, unless this is the last line ... */
3003 if (buffer
->cur
< buffer
->rlimit
3004 /* ... or this is a #include, (where _cpp_stack_file needs to
3005 unwind by one line) ... */
3006 || (pfile
->state
.in_directive
> 1
3007 /* ... except traditional-cpp increments this elsewhere. */
3008 && !CPP_OPTION (pfile
, traditional
)))
3009 CPP_INCREMENT_LINE (pfile
, 0);
3010 buffer
->need_line
= true;
3011 if (pfile
->state
.in_deferred_pragma
)
3013 /* Produce the PRAGMA_EOL on this line. File reading
3014 ensures there is always a \n at end of the buffer, thus
3015 in a deferred pragma we always see CPP_PRAGMA_EOL before
3017 result
->type
= CPP_PRAGMA_EOL
;
3018 result
->flags
&= ~PREV_WHITE
;
3019 pfile
->state
.in_deferred_pragma
= false;
3020 if (!pfile
->state
.pragma_allow_expansion
)
3021 pfile
->state
.prevent_expansion
--;
3026 case '0': case '1': case '2': case '3': case '4':
3027 case '5': case '6': case '7': case '8': case '9':
3029 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3030 result
->type
= CPP_NUMBER
;
3031 lex_number (pfile
, &result
->val
.str
, &nst
);
3032 warn_about_normalization (pfile
, result
, &nst
);
3040 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
3041 wide strings or raw strings. */
3042 if (c
== 'L' || CPP_OPTION (pfile
, rliterals
)
3043 || (c
!= 'R' && CPP_OPTION (pfile
, uliterals
)))
3045 if ((*buffer
->cur
== '\'' && c
!= 'R')
3046 || *buffer
->cur
== '"'
3047 || (*buffer
->cur
== 'R'
3049 && buffer
->cur
[1] == '"'
3050 && CPP_OPTION (pfile
, rliterals
))
3051 || (*buffer
->cur
== '8'
3053 && ((buffer
->cur
[1] == '"' || (buffer
->cur
[1] == '\''
3054 && CPP_OPTION (pfile
, utf8_char_literals
)))
3055 || (buffer
->cur
[1] == 'R' && buffer
->cur
[2] == '"'
3056 && CPP_OPTION (pfile
, rliterals
)))))
3058 lex_string (pfile
, result
, buffer
->cur
- 1);
3065 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
3066 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
3067 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
3068 case 's': case 't': case 'v': case 'w': case 'x':
3070 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
3071 case 'G': case 'H': case 'I': case 'J': case 'K':
3072 case 'M': case 'N': case 'O': case 'P': case 'Q':
3073 case 'S': case 'T': case 'V': case 'W': case 'X':
3075 result
->type
= CPP_NAME
;
3077 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3078 result
->val
.node
.node
= lex_identifier (pfile
, buffer
->cur
- 1, false,
3080 &result
->val
.node
.spelling
);
3081 warn_about_normalization (pfile
, result
, &nst
);
3084 /* Convert named operators to their proper types. */
3085 if (result
->val
.node
.node
->flags
& NODE_OPERATOR
)
3087 result
->flags
|= NAMED_OP
;
3088 result
->type
= (enum cpp_ttype
) result
->val
.node
.node
->directive_index
;
3091 /* Signal FALLTHROUGH comment followed by another token. */
3092 if (fallthrough_comment
)
3093 result
->flags
|= PREV_FALLTHROUGH
;
3098 lex_string (pfile
, result
, buffer
->cur
- 1);
3102 /* A potential block or line comment. */
3103 comment_start
= buffer
->cur
;
3108 if (_cpp_skip_block_comment (pfile
))
3109 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
3111 else if (c
== '/' && ! CPP_OPTION (pfile
, traditional
))
3113 /* Don't warn for system headers. */
3114 if (_cpp_in_system_header (pfile
))
3116 /* Warn about comments if pedantically GNUC89, and not
3117 in system headers. */
3118 else if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
3119 && CPP_PEDANTIC (pfile
)
3120 && ! buffer
->warned_cplusplus_comments
)
3122 if (cpp_error (pfile
, CPP_DL_PEDWARN
,
3123 "C++ style comments are not allowed in ISO C90"))
3124 cpp_error (pfile
, CPP_DL_NOTE
,
3125 "(this will be reported only once per input file)");
3126 buffer
->warned_cplusplus_comments
= 1;
3128 /* Or if specifically desired via -Wc90-c99-compat. */
3129 else if (CPP_OPTION (pfile
, cpp_warn_c90_c99_compat
) > 0
3130 && ! CPP_OPTION (pfile
, cplusplus
)
3131 && ! buffer
->warned_cplusplus_comments
)
3133 if (cpp_error (pfile
, CPP_DL_WARNING
,
3134 "C++ style comments are incompatible with C90"))
3135 cpp_error (pfile
, CPP_DL_NOTE
,
3136 "(this will be reported only once per input file)");
3137 buffer
->warned_cplusplus_comments
= 1;
3139 /* In C89/C94, C++ style comments are forbidden. */
3140 else if ((CPP_OPTION (pfile
, lang
) == CLK_STDC89
3141 || CPP_OPTION (pfile
, lang
) == CLK_STDC94
))
3143 /* But don't be confused about valid code such as
3144 - // immediately followed by *,
3145 - // in a preprocessing directive,
3146 - // in an #if 0 block. */
3147 if (buffer
->cur
[1] == '*'
3148 || pfile
->state
.in_directive
3149 || pfile
->state
.skipping
)
3151 result
->type
= CPP_DIV
;
3154 else if (! buffer
->warned_cplusplus_comments
)
3156 if (cpp_error (pfile
, CPP_DL_ERROR
,
3157 "C++ style comments are not allowed in "
3159 cpp_error (pfile
, CPP_DL_NOTE
,
3160 "(this will be reported only once per input "
3162 buffer
->warned_cplusplus_comments
= 1;
3165 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
3166 cpp_warning (pfile
, CPP_W_COMMENTS
, "multi-line comment");
3171 result
->type
= CPP_DIV_EQ
;
3176 result
->type
= CPP_DIV
;
3180 if (fallthrough_comment_p (pfile
, comment_start
))
3181 fallthrough_comment
= true;
3183 if (pfile
->cb
.comment
)
3185 size_t len
= pfile
->buffer
->cur
- comment_start
;
3186 pfile
->cb
.comment (pfile
, result
->src_loc
, comment_start
- 1,
3190 if (!pfile
->state
.save_comments
)
3192 result
->flags
|= PREV_WHITE
;
3193 goto update_tokens_line
;
3196 if (fallthrough_comment
)
3197 result
->flags
|= PREV_FALLTHROUGH
;
3199 /* Save the comment as a token in its own right. */
3200 save_comment (pfile
, result
, comment_start
, c
);
3204 if (pfile
->state
.angled_headers
)
3206 lex_string (pfile
, result
, buffer
->cur
- 1);
3207 if (result
->type
!= CPP_LESS
)
3211 result
->type
= CPP_LESS
;
3212 if (*buffer
->cur
== '=')
3214 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
3215 if (*buffer
->cur
== '>'
3216 && CPP_OPTION (pfile
, cplusplus
)
3217 && CPP_OPTION (pfile
, lang
) >= CLK_GNUCXX20
)
3218 buffer
->cur
++, result
->type
= CPP_SPACESHIP
;
3220 else if (*buffer
->cur
== '<')
3223 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
3225 else if (CPP_OPTION (pfile
, digraphs
))
3227 if (*buffer
->cur
== ':')
3229 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
3230 three characters are <:: and the subsequent character
3231 is neither : nor >, the < is treated as a preprocessor
3232 token by itself". */
3233 if (CPP_OPTION (pfile
, cplusplus
)
3234 && CPP_OPTION (pfile
, lang
) != CLK_CXX98
3235 && CPP_OPTION (pfile
, lang
) != CLK_GNUCXX
3236 && buffer
->cur
[1] == ':'
3237 && buffer
->cur
[2] != ':' && buffer
->cur
[2] != '>')
3241 result
->flags
|= DIGRAPH
;
3242 result
->type
= CPP_OPEN_SQUARE
;
3244 else if (*buffer
->cur
== '%')
3247 result
->flags
|= DIGRAPH
;
3248 result
->type
= CPP_OPEN_BRACE
;
3254 result
->type
= CPP_GREATER
;
3255 if (*buffer
->cur
== '=')
3256 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
3257 else if (*buffer
->cur
== '>')
3260 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
3265 result
->type
= CPP_MOD
;
3266 if (*buffer
->cur
== '=')
3267 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
3268 else if (CPP_OPTION (pfile
, digraphs
))
3270 if (*buffer
->cur
== ':')
3273 result
->flags
|= DIGRAPH
;
3274 result
->type
= CPP_HASH
;
3275 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
3276 buffer
->cur
+= 2, result
->type
= CPP_PASTE
, result
->val
.token_no
= 0;
3278 else if (*buffer
->cur
== '>')
3281 result
->flags
|= DIGRAPH
;
3282 result
->type
= CPP_CLOSE_BRACE
;
3288 result
->type
= CPP_DOT
;
3289 if (ISDIGIT (*buffer
->cur
))
3291 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3292 result
->type
= CPP_NUMBER
;
3293 lex_number (pfile
, &result
->val
.str
, &nst
);
3294 warn_about_normalization (pfile
, result
, &nst
);
3296 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
3297 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
3298 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
3299 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
3303 result
->type
= CPP_PLUS
;
3304 if (*buffer
->cur
== '+')
3305 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
3306 else if (*buffer
->cur
== '=')
3307 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
3311 result
->type
= CPP_MINUS
;
3312 if (*buffer
->cur
== '>')
3315 result
->type
= CPP_DEREF
;
3316 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
3317 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
3319 else if (*buffer
->cur
== '-')
3320 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
3321 else if (*buffer
->cur
== '=')
3322 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
3326 result
->type
= CPP_AND
;
3327 if (*buffer
->cur
== '&')
3328 buffer
->cur
++, result
->type
= CPP_AND_AND
;
3329 else if (*buffer
->cur
== '=')
3330 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
3334 result
->type
= CPP_OR
;
3335 if (*buffer
->cur
== '|')
3336 buffer
->cur
++, result
->type
= CPP_OR_OR
;
3337 else if (*buffer
->cur
== '=')
3338 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
3342 result
->type
= CPP_COLON
;
3343 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, scope
))
3344 buffer
->cur
++, result
->type
= CPP_SCOPE
;
3345 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
3348 result
->flags
|= DIGRAPH
;
3349 result
->type
= CPP_CLOSE_SQUARE
;
3353 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
3354 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
3355 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
3356 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
3357 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); result
->val
.token_no
= 0; break;
3359 case '?': result
->type
= CPP_QUERY
; break;
3360 case '~': result
->type
= CPP_COMPL
; break;
3361 case ',': result
->type
= CPP_COMMA
; break;
3362 case '(': result
->type
= CPP_OPEN_PAREN
; break;
3363 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
3364 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
3365 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
3366 case '{': result
->type
= CPP_OPEN_BRACE
; break;
3367 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
3368 case ';': result
->type
= CPP_SEMICOLON
; break;
3370 /* @ is a punctuator in Objective-C. */
3371 case '@': result
->type
= CPP_ATSIGN
; break;
3375 const uchar
*base
= --buffer
->cur
;
3377 /* Check for an extended identifier ($ or UCN or UTF-8). */
3378 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3379 if (forms_identifier_p (pfile
, true, &nst
))
3381 result
->type
= CPP_NAME
;
3382 result
->val
.node
.node
= lex_identifier (pfile
, base
, true, &nst
,
3383 &result
->val
.node
.spelling
);
3384 warn_about_normalization (pfile
, result
, &nst
);
3388 /* Otherwise this will form a CPP_OTHER token. Parse valid UTF-8 as a
3391 if (c
>= utf8_signifier
)
3393 const uchar
*pstr
= base
;
3395 if (_cpp_valid_utf8 (pfile
, &pstr
, buffer
->rlimit
, 0, NULL
, &s
))
3398 create_literal (pfile
, result
, base
, buffer
->cur
- base
, CPP_OTHER
);
3404 /* Potentially convert the location of the token to a range. */
3405 if (result
->src_loc
>= RESERVED_LOCATION_COUNT
3406 && result
->type
!= CPP_EOF
)
3408 /* Ensure that any line notes are processed, so that we have the
3409 correct physical line/column for the end-point of the token even
3410 when a logical line is split via one or more backslashes. */
3411 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
3412 && !pfile
->overlaid_buffer
)
3413 _cpp_process_line_notes (pfile
, false);
3415 source_range tok_range
;
3416 tok_range
.m_start
= result
->src_loc
;
3418 = linemap_position_for_column (pfile
->line_table
,
3419 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
3421 result
->src_loc
= COMBINE_LOCATION_DATA (pfile
->line_table
,
3429 /* An upper bound on the number of bytes needed to spell TOKEN.
3430 Does not include preceding whitespace. */
3432 cpp_token_len (const cpp_token
*token
)
3436 switch (TOKEN_SPELL (token
))
3438 default: len
= 6; break;
3439 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
3440 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
.node
) * 10; break;
3446 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
3447 Return the number of bytes read out of NAME. (There are always
3448 10 bytes written to BUFFER.) */
3451 utf8_to_ucn (unsigned char *buffer
, const unsigned char *name
)
3457 unsigned long utf32
;
3459 /* Compute the length of the UTF-8 sequence. */
3460 for (t
= *name
; t
& 0x80; t
<<= 1)
3463 utf32
= *name
& (0x7F >> ucn_len
);
3464 for (ucn_len_c
= 1; ucn_len_c
< ucn_len
; ucn_len_c
++)
3466 utf32
= (utf32
<< 6) | (*++name
& 0x3F);
3468 /* Ill-formed UTF-8. */
3469 if ((*name
& ~0x3F) != 0x80)
3475 for (j
= 7; j
>= 0; j
--)
3476 *buffer
++ = "0123456789abcdef"[(utf32
>> (4 * j
)) & 0xF];
3480 /* Given a token TYPE corresponding to a digraph, return a pointer to
3481 the spelling of the digraph. */
3482 static const unsigned char *
3483 cpp_digraph2name (enum cpp_ttype type
)
3485 return digraph_spellings
[(int) type
- (int) CPP_FIRST_DIGRAPH
];
3488 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
3489 The buffer must already contain the enough space to hold the
3490 token's spelling. Returns a pointer to the character after the
3491 last character written. */
3493 _cpp_spell_ident_ucns (unsigned char *buffer
, cpp_hashnode
*ident
)
3496 const unsigned char *name
= NODE_NAME (ident
);
3498 for (i
= 0; i
< NODE_LEN (ident
); i
++)
3499 if (name
[i
] & ~0x7F)
3501 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
3505 *buffer
++ = name
[i
];
3510 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3511 already contain the enough space to hold the token's spelling.
3512 Returns a pointer to the character after the last character written.
3513 FORSTRING is true if this is to be the spelling after translation
3514 phase 1 (with the original spelling of extended identifiers), false
3515 if extended identifiers should always be written using UCNs (there is
3516 no option for always writing them in the internal UTF-8 form).
3517 FIXME: Would be nice if we didn't need the PFILE argument. */
3519 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
3520 unsigned char *buffer
, bool forstring
)
3522 switch (TOKEN_SPELL (token
))
3524 case SPELL_OPERATOR
:
3526 const unsigned char *spelling
;
3529 if (token
->flags
& DIGRAPH
)
3530 spelling
= cpp_digraph2name (token
->type
);
3531 else if (token
->flags
& NAMED_OP
)
3534 spelling
= TOKEN_NAME (token
);
3536 while ((c
= *spelling
++) != '\0')
3545 memcpy (buffer
, NODE_NAME (token
->val
.node
.spelling
),
3546 NODE_LEN (token
->val
.node
.spelling
));
3547 buffer
+= NODE_LEN (token
->val
.node
.spelling
);
3550 buffer
= _cpp_spell_ident_ucns (buffer
, token
->val
.node
.node
);
3554 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
3555 buffer
+= token
->val
.str
.len
;
3559 cpp_error (pfile
, CPP_DL_ICE
,
3560 "unspellable token %s", TOKEN_NAME (token
));
3567 /* Returns TOKEN spelt as a null-terminated string. The string is
3568 freed when the reader is destroyed. Useful for diagnostics. */
3570 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
3572 unsigned int len
= cpp_token_len (token
) + 1;
3573 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
3575 end
= cpp_spell_token (pfile
, token
, start
, false);
3581 /* Returns a pointer to a string which spells the token defined by
3582 TYPE and FLAGS. Used by C front ends, which really should move to
3583 using cpp_token_as_text. */
3585 cpp_type2name (enum cpp_ttype type
, unsigned char flags
)
3587 if (flags
& DIGRAPH
)
3588 return (const char *) cpp_digraph2name (type
);
3589 else if (flags
& NAMED_OP
)
3590 return cpp_named_operator2name (type
);
3592 return (const char *) token_spellings
[type
].name
;
3595 /* Writes the spelling of token to FP, without any preceding space.
3596 Separated from cpp_spell_token for efficiency - to avoid stdio
3597 double-buffering. */
3599 cpp_output_token (const cpp_token
*token
, FILE *fp
)
3601 switch (TOKEN_SPELL (token
))
3603 case SPELL_OPERATOR
:
3605 const unsigned char *spelling
;
3608 if (token
->flags
& DIGRAPH
)
3609 spelling
= cpp_digraph2name (token
->type
);
3610 else if (token
->flags
& NAMED_OP
)
3613 spelling
= TOKEN_NAME (token
);
3618 while ((c
= *++spelling
) != '\0');
3626 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
3628 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
3629 if (name
[i
] & ~0x7F)
3631 unsigned char buffer
[10];
3632 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
3633 fwrite (buffer
, 1, 10, fp
);
3636 fputc (NODE_NAME (token
->val
.node
.node
)[i
], fp
);
3641 if (token
->type
== CPP_HEADER_NAME
)
3643 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
3644 if (token
->type
== CPP_HEADER_NAME
)
3649 /* An error, most probably. */
3654 /* Compare two tokens. */
3656 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
3658 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
3659 switch (TOKEN_SPELL (a
))
3661 default: /* Keep compiler happy. */
3662 case SPELL_OPERATOR
:
3663 /* token_no is used to track where multiple consecutive ##
3664 tokens were originally located. */
3665 return (a
->type
!= CPP_PASTE
|| a
->val
.token_no
== b
->val
.token_no
);
3667 return (a
->type
!= CPP_MACRO_ARG
3668 || (a
->val
.macro_arg
.arg_no
== b
->val
.macro_arg
.arg_no
3669 && a
->val
.macro_arg
.spelling
== b
->val
.macro_arg
.spelling
));
3671 return (a
->val
.node
.node
== b
->val
.node
.node
3672 && a
->val
.node
.spelling
== b
->val
.node
.spelling
);
3674 return (a
->val
.str
.len
== b
->val
.str
.len
3675 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
3682 /* Returns nonzero if a space should be inserted to avoid an
3683 accidental token paste for output. For simplicity, it is
3684 conservative, and occasionally advises a space where one is not
3685 needed, e.g. "." and ".2". */
3687 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
3688 const cpp_token
*token2
)
3690 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
3693 if (token1
->flags
& NAMED_OP
)
3695 if (token2
->flags
& NAMED_OP
)
3699 if (token2
->flags
& DIGRAPH
)
3700 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
3701 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
3702 c
= token_spellings
[b
].name
[0];
3704 /* Quickly get everything that can paste with an '='. */
3705 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
3710 case CPP_GREATER
: return c
== '>';
3711 case CPP_LESS
: return c
== '<' || c
== '%' || c
== ':';
3712 case CPP_PLUS
: return c
== '+';
3713 case CPP_MINUS
: return c
== '-' || c
== '>';
3714 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
3715 case CPP_MOD
: return c
== ':' || c
== '>';
3716 case CPP_AND
: return c
== '&';
3717 case CPP_OR
: return c
== '|';
3718 case CPP_COLON
: return c
== ':' || c
== '>';
3719 case CPP_DEREF
: return c
== '*';
3720 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
3721 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
3723 case CPP_NAME
: return ((b
== CPP_NUMBER
3724 && name_p (pfile
, &token2
->val
.str
))
3726 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
3727 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
3729 || c
== '.' || c
== '+' || c
== '-');
3731 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
3733 || (CPP_OPTION (pfile
, objc
)
3734 && token1
->val
.str
.text
[0] == '@'
3735 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
3736 case CPP_LESS_EQ
: return c
== '>';
3739 case CPP_UTF8STRING
:
3741 case CPP_STRING32
: return (CPP_OPTION (pfile
, user_literals
)
3743 || (TOKEN_SPELL (token2
) == SPELL_LITERAL
3744 && ISIDST (token2
->val
.str
.text
[0]))));
3752 /* Output all the remaining tokens on the current line, and a newline
3753 character, to FP. Leading whitespace is removed. If there are
3754 macros, special token padding is not performed. */
3756 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
3758 const cpp_token
*token
;
3760 token
= cpp_get_token (pfile
);
3761 while (token
->type
!= CPP_EOF
)
3763 cpp_output_token (token
, fp
);
3764 token
= cpp_get_token (pfile
);
3765 if (token
->flags
& PREV_WHITE
)
3772 /* Return a string representation of all the remaining tokens on the
3773 current line. The result is allocated using xmalloc and must be
3774 freed by the caller. */
3776 cpp_output_line_to_string (cpp_reader
*pfile
, const unsigned char *dir_name
)
3778 const cpp_token
*token
;
3779 unsigned int out
= dir_name
? ustrlen (dir_name
) : 0;
3780 unsigned int alloced
= 120 + out
;
3781 unsigned char *result
= (unsigned char *) xmalloc (alloced
);
3783 /* If DIR_NAME is empty, there are no initial contents. */
3786 sprintf ((char *) result
, "#%s ", dir_name
);
3790 token
= cpp_get_token (pfile
);
3791 while (token
->type
!= CPP_EOF
)
3793 unsigned char *last
;
3794 /* Include room for a possible space and the terminating nul. */
3795 unsigned int len
= cpp_token_len (token
) + 2;
3797 if (out
+ len
> alloced
)
3800 if (out
+ len
> alloced
)
3801 alloced
= out
+ len
;
3802 result
= (unsigned char *) xrealloc (result
, alloced
);
3805 last
= cpp_spell_token (pfile
, token
, &result
[out
], 0);
3806 out
= last
- result
;
3808 token
= cpp_get_token (pfile
);
3809 if (token
->flags
& PREV_WHITE
)
3810 result
[out
++] = ' ';
3817 /* Memory buffers. Changing these three constants can have a dramatic
3818 effect on performance. The values here are reasonable defaults,
3819 but might be tuned. If you adjust them, be sure to test across a
3820 range of uses of cpplib, including heavy nested function-like macro
3821 expansion. Also check the change in peak memory usage (NJAMD is a
3822 good tool for this). */
3823 #define MIN_BUFF_SIZE 8000
3824 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3825 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3826 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3828 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3829 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3832 /* Create a new allocation buffer. Place the control block at the end
3833 of the buffer, so that buffer overflows will cause immediate chaos. */
3835 new_buff (size_t len
)
3838 unsigned char *base
;
3840 if (len
< MIN_BUFF_SIZE
)
3841 len
= MIN_BUFF_SIZE
;
3842 len
= CPP_ALIGN (len
);
3844 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3845 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3847 size_t slen
= CPP_ALIGN2 (sizeof (_cpp_buff
), 2 * DEFAULT_ALIGNMENT
);
3848 base
= XNEWVEC (unsigned char, len
+ slen
);
3849 result
= (_cpp_buff
*) base
;
3852 base
= XNEWVEC (unsigned char, len
+ sizeof (_cpp_buff
));
3853 result
= (_cpp_buff
*) (base
+ len
);
3855 result
->base
= base
;
3857 result
->limit
= base
+ len
;
3858 result
->next
= NULL
;
3862 /* Place a chain of unwanted allocation buffers on the free list. */
3864 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
3866 _cpp_buff
*end
= buff
;
3870 end
->next
= pfile
->free_buffs
;
3871 pfile
->free_buffs
= buff
;
3874 /* Return a free buffer of size at least MIN_SIZE. */
3876 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
3878 _cpp_buff
*result
, **p
;
3880 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
3885 return new_buff (min_size
);
3887 size
= result
->limit
- result
->base
;
3888 /* Return a buffer that's big enough, but don't waste one that's
3890 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
3895 result
->next
= NULL
;
3896 result
->cur
= result
->base
;
3900 /* Creates a new buffer with enough space to hold the uncommitted
3901 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
3902 the excess bytes to the new buffer. Chains the new buffer after
3903 BUFF, and returns the new buffer. */
3905 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
3907 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
3908 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
3910 buff
->next
= new_buff
;
3911 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
3915 /* Creates a new buffer with enough space to hold the uncommitted
3916 remaining bytes of the buffer pointed to by BUFF, and at least
3917 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3918 Chains the new buffer before the buffer pointed to by BUFF, and
3919 updates the pointer to point to the new buffer. */
3921 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
3923 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
3924 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
3926 new_buff
= _cpp_get_buff (pfile
, size
);
3927 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
3928 new_buff
->next
= old_buff
;
3932 /* Free a chain of buffers starting at BUFF. */
3934 _cpp_free_buff (_cpp_buff
*buff
)
3938 for (; buff
; buff
= next
)
3941 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3949 /* Allocate permanent, unaligned storage of length LEN. */
3951 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
3953 _cpp_buff
*buff
= pfile
->u_buff
;
3954 unsigned char *result
= buff
->cur
;
3956 if (len
> (size_t) (buff
->limit
- result
))
3958 buff
= _cpp_get_buff (pfile
, len
);
3959 buff
->next
= pfile
->u_buff
;
3960 pfile
->u_buff
= buff
;
3964 buff
->cur
= result
+ len
;
3968 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3969 That buffer is used for growing allocations when saving macro
3970 replacement lists in a #define, and when parsing an answer to an
3971 assertion in #assert, #unassert or #if (and therefore possibly
3972 whilst expanding macros). It therefore must not be used by any
3973 code that they might call: specifically the lexer and the guts of
3976 All existing other uses clearly fit this restriction: storing
3977 registered pragmas during initialization. */
3979 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
3981 _cpp_buff
*buff
= pfile
->a_buff
;
3982 unsigned char *result
= buff
->cur
;
3984 if (len
> (size_t) (buff
->limit
- result
))
3986 buff
= _cpp_get_buff (pfile
, len
);
3987 buff
->next
= pfile
->a_buff
;
3988 pfile
->a_buff
= buff
;
3992 buff
->cur
= result
+ len
;
3996 /* Commit or allocate storage from a buffer. */
3999 _cpp_commit_buff (cpp_reader
*pfile
, size_t size
)
4001 void *ptr
= BUFF_FRONT (pfile
->a_buff
);
4003 if (pfile
->hash_table
->alloc_subobject
)
4005 void *copy
= pfile
->hash_table
->alloc_subobject (size
);
4006 memcpy (copy
, ptr
, size
);
4010 BUFF_FRONT (pfile
->a_buff
) += size
;
4015 /* Say which field of TOK is in use. */
4017 enum cpp_token_fld_kind
4018 cpp_token_val_index (const cpp_token
*tok
)
4020 switch (TOKEN_SPELL (tok
))
4023 return CPP_TOKEN_FLD_NODE
;
4025 return CPP_TOKEN_FLD_STR
;
4026 case SPELL_OPERATOR
:
4027 /* Operands which were originally spelled as ident keep around
4028 the node for the exact spelling. */
4029 if (tok
->flags
& NAMED_OP
)
4030 return CPP_TOKEN_FLD_NODE
;
4031 else if (tok
->type
== CPP_PASTE
)
4032 return CPP_TOKEN_FLD_TOKEN_NO
;
4034 return CPP_TOKEN_FLD_NONE
;
4036 if (tok
->type
== CPP_MACRO_ARG
)
4037 return CPP_TOKEN_FLD_ARG_NO
;
4038 else if (tok
->type
== CPP_PADDING
)
4039 return CPP_TOKEN_FLD_SOURCE
;
4040 else if (tok
->type
== CPP_PRAGMA
)
4041 return CPP_TOKEN_FLD_PRAGMA
;
4044 return CPP_TOKEN_FLD_NONE
;
4048 /* All tokens lexed in R after calling this function will be forced to
4049 have their location_t to be P, until
4050 cpp_stop_forcing_token_locations is called for R. */
4053 cpp_force_token_locations (cpp_reader
*r
, location_t loc
)
4055 r
->forced_token_location
= loc
;
4058 /* Go back to assigning locations naturally for lexed tokens. */
4061 cpp_stop_forcing_token_locations (cpp_reader
*r
)
4063 r
->forced_token_location
= 0;
4066 /* We're looking at \, if it's escaping EOL, look past it. If at
4067 LIMIT, don't advance. */
4069 static const unsigned char *
4070 do_peek_backslash (const unsigned char *peek
, const unsigned char *limit
)
4072 const unsigned char *probe
= peek
;
4074 if (__builtin_expect (peek
[1] == '\n', true))
4078 if (__builtin_expect (probe
< limit
, true))
4082 /* The user might be perverse. */
4083 return do_peek_backslash (peek
, limit
);
4086 else if (__builtin_expect (peek
[1] == '\r', false))
4088 if (probe
[2] == '\n')
4096 static const unsigned char *
4097 do_peek_next (const unsigned char *peek
, const unsigned char *limit
)
4099 if (__builtin_expect (*peek
== '\\', false))
4100 peek
= do_peek_backslash (peek
, limit
);
4104 static const unsigned char *
4105 do_peek_prev (const unsigned char *peek
, const unsigned char *bound
)
4110 unsigned char c
= *--peek
;
4111 if (__builtin_expect (c
== '\n', false)
4112 || __builtin_expect (c
== 'r', false))
4117 if (c
== '\n' && peek
[ix
] == '\r')
4119 if (peek
+ ix
== bound
)
4124 if (peek
[ix
] == '\\')
4125 return do_peek_prev (peek
+ ix
, bound
);
4133 /* If PEEK[-1] is identifier MATCH, scan past it and trailing white
4134 space. Otherwise return NULL. */
4136 static const unsigned char *
4137 do_peek_ident (const char *match
, const unsigned char *peek
,
4138 const unsigned char *limit
)
4140 for (; *++match
; peek
++)
4141 if (*peek
!= *match
)
4143 peek
= do_peek_next (peek
, limit
);
4144 if (*peek
!= *match
)
4148 /* Must now not be looking at an identifier char. */
4149 peek
= do_peek_next (peek
, limit
);
4150 if (ISIDNUM (*peek
))
4153 /* Skip control-line whitespace. */
4155 while (*peek
== ' ' || *peek
== '\t')
4157 if (__builtin_expect (*peek
== '\\', false))
4159 peek
= do_peek_backslash (peek
, limit
);
4167 /* Are we looking at a module control line starting as PEEK - 1? */
4170 do_peek_module (cpp_reader
*pfile
, unsigned char c
,
4171 const unsigned char *peek
, const unsigned char *limit
)
4173 bool import
= false;
4175 if (__builtin_expect (c
== 'e', false))
4177 if (!((peek
[0] == 'x' || peek
[0] == '\\')
4178 && (peek
= do_peek_ident ("export", peek
, limit
))))
4181 /* export, peek for import or module. No need to peek __import
4185 if (!((peek
[1] == 'm' || peek
[1] == '\\')
4186 && (peek
= do_peek_ident ("import", peek
+ 1, limit
))))
4190 else if (peek
[0] == 'm')
4192 if (!((peek
[1] == 'o' || peek
[1] == '\\')
4193 && (peek
= do_peek_ident ("module", peek
+ 1, limit
))))
4199 else if (__builtin_expect (c
== 'i', false))
4201 if (!((peek
[0] == 'm' || peek
[0] == '\\')
4202 && (peek
= do_peek_ident ("import", peek
, limit
))))
4206 else if (__builtin_expect (c
== '_', false))
4208 /* Needed for translated includes. */
4209 if (!((peek
[0] == '_' || peek
[0] == '\\')
4210 && (peek
= do_peek_ident ("__import", peek
, limit
))))
4214 else if (__builtin_expect (c
== 'm', false))
4216 if (!((peek
[0] == 'o' || peek
[0] == '\\')
4217 && (peek
= do_peek_ident ("module", peek
, limit
))))
4223 /* Peek the next character to see if it's good enough. We'll be at
4224 the first non-whitespace char, including skipping an escaped
4226 /* ... import followed by identifier, ':', '<' or header-name
4227 preprocessing tokens, or module followed by identifier, ':' or
4228 ';' preprocessing tokens. */
4229 unsigned char p
= *peek
++;
4231 /* A character literal is ... single quotes, ... optionally preceded
4232 by u8, u, U, or L */
4233 /* A string-literal is a ... double quotes, optionally prefixed by
4234 R, u8, u8R, u, uR, U, UR, L, or LR */
4237 peek
= do_peek_next (peek
, limit
);
4245 else if (p
== 'U' || p
== 'L')
4248 peek
= do_peek_next (peek
, limit
);
4250 if (*peek
== '\"' || *peek
== '\'')
4255 /* Identifier. Ok. */
4260 if (CPP_OPTION (pfile
, rliterals
))
4262 peek
= do_peek_next (peek
, limit
);
4266 /* Identifier. Ok. */
4268 else if ('Z' - 'A' == 25
4269 ? ((p
>= 'A' && p
<= 'Z') || (p
>= 'a' && p
<= 'z') || p
== '_')
4272 /* Identifier. Ok. */
4276 /* Maybe angle header, ok for import. Reject
4277 '<=', '<<' digraph:'<:'. */
4280 peek
= do_peek_next (peek
, limit
);
4281 if (*peek
== '=' || *peek
== '<'
4282 || (*peek
== ':' && CPP_OPTION (pfile
, digraphs
)))
4287 /* SEMICOLON, ok for module. */
4293 /* STRING, ok for import. */
4299 /* Maybe COLON, ok. Reject '::', digraph:':>'. */
4300 peek
= do_peek_next (peek
, limit
);
4301 if (*peek
== ':' || (*peek
== '>' && CPP_OPTION (pfile
, digraphs
)))
4305 /* FIXME: Detect a unicode character, excluding those not
4306 permitted as the initial character. [lex.name]/1. I presume
4307 we need to check the \[uU] spellings, and directly using
4308 Unicode in say UTF8 form? Or perhaps we do the phase-1
4309 conversion of UTF8 to universal-character-names? */
4315 /* Directives-only scanning. Somewhat more relaxed than correct
4316 parsing -- some ill-formed programs will not be rejected. */
4319 cpp_directive_only_process (cpp_reader
*pfile
,
4321 void (*cb
) (cpp_reader
*, CPP_DO_task
, void *, ...))
4323 bool module_p
= CPP_OPTION (pfile
, module_directives
);
4328 /* Buffer initialization, but no line cleaning. */
4329 cpp_buffer
*buffer
= pfile
->buffer
;
4330 buffer
->cur_note
= buffer
->notes_used
= 0;
4331 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
4332 buffer
->need_line
= false;
4333 /* Files always end in a newline or carriage return. We rely on this for
4334 character peeking safety. */
4335 gcc_assert (buffer
->rlimit
[0] == '\n' || buffer
->rlimit
[0] == '\r');
4337 const unsigned char *base
= buffer
->cur
;
4338 unsigned line_count
= 0;
4339 const unsigned char *line_start
= base
;
4344 const unsigned char *lwm
= base
;
4345 for (const unsigned char *pos
= base
, *limit
= buffer
->rlimit
;
4348 unsigned char c
= *pos
++;
4349 /* This matches the switch in _cpp_lex_direct. */
4352 case ' ': case '\t': case '\f': case '\v':
4353 /* Whitespace, do nothing. */
4356 case '\r': /* MAC line ending, or Windows \r\n */
4365 CPP_INCREMENT_LINE (pfile
, 0);
4371 /* <backslash><newline> is removed, and doesn't undo any
4372 preceeding escape or whatnot. */
4378 else if (*pos
== '\r')
4390 /* Line directive. */
4391 if (pos
- 1 > base
&& !pfile
->state
.skipping
)
4392 cb (pfile
, CPP_DO_print
, data
,
4393 line_count
, base
, pos
- 1 - base
);
4395 /* Prep things for directive handling. */
4396 buffer
->next_line
= pos
;
4397 buffer
->need_line
= true;
4398 bool ok
= _cpp_get_fresh_line (pfile
);
4399 gcc_checking_assert (ok
);
4401 /* Ensure proper column numbering for generated
4403 buffer
->line_base
-= pos
- line_start
;
4405 _cpp_handle_directive (pfile
, line_start
+ 1 != pos
);
4407 /* Sanitize the line settings. Duplicate #include's can
4409 // FIXME: Necessary?
4410 pfile
->line_table
->highest_location
4411 = pfile
->line_table
->highest_line
;
4413 if (!pfile
->state
.skipping
4414 && pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
4415 cb (pfile
, CPP_DO_location
, data
,
4416 pfile
->line_table
->highest_line
);
4424 const unsigned char *peek
= do_peek_next (pos
, limit
);
4425 if (!(*peek
== '/' || *peek
== '*'))
4428 /* Line or block comment */
4429 bool is_block
= *peek
== '*';
4433 = linemap_position_for_column (pfile
->line_table
,
4452 CPP_INCREMENT_LINE (pfile
, 0);
4455 if (!esc
&& !is_block
)
4467 if (pos
> peek
&& !esc
)
4483 if (pos
< limit
|| is_block
)
4484 cpp_error_with_line (pfile
, CPP_DL_ERROR
, sloc
, 0,
4485 "unterminated comment");
4492 if (!CPP_OPTION (pfile
, digit_separators
))
4493 goto delimited_string
;
4495 /* Possibly a number punctuator. */
4496 if (!ISIDNUM (*do_peek_next (pos
, limit
)))
4497 goto delimited_string
;
4502 if (!CPP_OPTION (pfile
, rliterals
))
4503 goto delimited_string
;
4507 /* For ' see if it's a number punctuator
4508 \.?<digit>(<digit>|<identifier-nondigit>
4509 |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
4510 /* For " see if it's a raw string
4511 {U,L,u,u8}R. This includes CPP_NUMBER detection,
4512 because that could be 0e+R. */
4513 const unsigned char *peek
= pos
- 1;
4514 bool quote_first
= c
== '"';
4515 bool quote_eight
= false;
4516 bool maybe_number_start
= false;
4517 bool want_number
= false;
4519 while ((peek
= do_peek_prev (peek
, lwm
)))
4521 unsigned char p
= *peek
;
4532 quote_first
= false;
4533 if (p
== 'L' || p
== 'U' || p
== 'u')
4540 else if (quote_eight
)
4547 quote_eight
= false;
4552 if (!want_number
&& ISIDNUM (p
))
4560 maybe_number_start
= true;
4563 else if (ISIDNUM (p
))
4564 maybe_number_start
= false;
4565 else if (p
== '+' || p
== '-')
4567 if (const unsigned char *peek_prev
4568 = do_peek_prev (peek
, lwm
))
4571 if (p
== 'e' || p
== 'E'
4572 || p
== 'p' || p
== 'P')
4575 maybe_number_start
= false;
4583 else if (p
== '\'' || p
== '\"')
4585 /* If this is lwm, this must be the end of a
4586 previous string. So this is a trailing
4587 literal type, (a) if those are allowed,
4588 and (b) maybe_start is false. Otherwise
4589 this must be a CPP_NUMBER because we've
4590 met another ', and we'd have checked that
4591 in its own right. */
4592 if (peek
== lwm
&& CPP_OPTION (pfile
, uliterals
))
4594 if (!maybe_number_start
&& !want_number
)
4595 /* Must be a literal type. */
4599 && CPP_OPTION (pfile
, digit_separators
))
4600 maybe_number_start
= true;
4605 else if (!quote_first
&& !quote_eight
)
4609 if (maybe_number_start
)
4617 goto delimited_string
;
4622 /* (Possibly raw) string or char literal. */
4623 unsigned char end
= c
;
4625 const unsigned char *delim
= NULL
;
4626 location_t sloc
= linemap_position_for_column (pfile
->line_table
,
4632 /* There can be no line breaks in the delimiter. */
4634 for (delim_len
= 0; (c
= *pos
++) != '('; delim_len
++)
4636 if (delim_len
== 16)
4638 cpp_error_with_line (pfile
, CPP_DL_ERROR
,
4640 "raw string delimiter"
4648 if (strchr (") \\\t\v\f\n", c
))
4650 cpp_error_with_line (pfile
, CPP_DL_ERROR
,
4652 "invalid character '%c'"
4681 CPP_INCREMENT_LINE (pfile
, 0);
4691 && pos
+ delim_len
+ 1 < limit
4692 && pos
[delim_len
] == end
4693 && !memcmp (delim
, pos
, delim_len
))
4695 pos
+= delim_len
+ 1;
4702 if (!raw
&& !(esc
& 1) && c
== end
)
4709 cpp_error_with_line (pfile
, CPP_DL_ERROR
, sloc
, 0,
4710 "unterminated literal");
4722 if (bol
&& module_p
&& !pfile
->state
.skipping
4723 && do_peek_module (pfile
, c
, pos
, limit
))
4725 /* We've seen the start of a module control line.
4726 Start up the tokenizer. */
4727 pos
--; /* Backup over the first character. */
4729 /* Backup over whitespace to start of line. */
4730 while (pos
> line_start
4731 && (pos
[-1] == ' ' || pos
[-1] == '\t'))
4735 cb (pfile
, CPP_DO_print
, data
, line_count
, base
, pos
- base
);
4737 /* Prep things for directive handling. */
4738 buffer
->next_line
= pos
;
4739 buffer
->need_line
= true;
4741 /* Now get tokens until the PRAGMA_EOL. */
4744 location_t spelling
;
4745 const cpp_token
*tok
4746 = cpp_get_token_with_location (pfile
, &spelling
);
4748 gcc_assert (pfile
->state
.in_deferred_pragma
4749 || tok
->type
== CPP_PRAGMA_EOL
);
4750 cb (pfile
, CPP_DO_token
, data
, tok
, spelling
);
4752 while (pfile
->state
.in_deferred_pragma
);
4754 if (pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
4755 cb (pfile
, CPP_DO_location
, data
,
4756 pfile
->line_table
->highest_line
);
4758 pfile
->mi_valid
= false;
4766 pfile
->mi_valid
= false;
4771 if (buffer
->rlimit
> base
&& !pfile
->state
.skipping
)
4773 const unsigned char *limit
= buffer
->rlimit
;
4774 /* If the file was not newline terminated, add rlimit, which is
4775 guaranteed to point to a newline, to the end of our range. */
4776 if (limit
[-1] != '\n')
4779 CPP_INCREMENT_LINE (pfile
, 0);
4782 cb (pfile
, CPP_DO_print
, data
, line_count
, base
, limit
- base
);
4785 _cpp_pop_buffer (pfile
);
4787 while (pfile
->buffer
);