]> gcc.gnu.org Git - gcc.git/blame - libcpp/lex.c
Imported GNU Classpath 0.92
[gcc.git] / libcpp / lex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
cbada204 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
200031d1 20Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
45b966db
ZW
21
22#include "config.h"
23#include "system.h"
45b966db 24#include "cpplib.h"
4f4e53dd 25#include "internal.h"
45b966db 26
93c80368 27enum spell_type
f9a0e96c 28{
93c80368 29 SPELL_OPERATOR = 0,
93c80368 30 SPELL_IDENT,
6338b358 31 SPELL_LITERAL,
93c80368 32 SPELL_NONE
f9a0e96c
ZW
33};
34
93c80368 35struct token_spelling
f9a0e96c 36{
93c80368
NB
37 enum spell_type category;
38 const unsigned char *name;
f9a0e96c
ZW
39};
40
8206c799
ZW
41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368 43
21b11495
ZW
44#define OP(e, s) { SPELL_OPERATOR, U s },
45#define TK(e, s) { SPELL_ ## s, U #e },
8206c799 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 52
6cf87ca4
ZW
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
6cf87ca4
ZW
56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61static int name_p (cpp_reader *, const cpp_string *);
6cf87ca4
ZW
62static tokenrun *next_tokenrun (tokenrun *);
63
6cf87ca4 64static _cpp_buff *new_buff (size_t);
15dad1d9 65
9d10c9a9 66
041c3194 67/* Utility routine:
9e62c811 68
bfb9dc7f
ZW
69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 71int
6cf87ca4 72cpp_ideq (const cpp_token *token, const char *string)
041c3194 73{
bfb9dc7f 74 if (token->type != CPP_NAME)
041c3194 75 return 0;
bfb9dc7f 76
562a5c27 77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
15dad1d9 78}
1368ee70 79
26aea073
NB
80/* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
87062813 82static void
6cf87ca4 83add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
0d9f234d 84{
26aea073
NB
85 if (buffer->notes_used == buffer->notes_cap)
86 {
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
c3f829c1
GDR
88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
26aea073 90 }
0d9f234d 91
26aea073
NB
92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
0d9f234d
NB
95}
96
26aea073
NB
97/* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99void
6cf87ca4 100_cpp_clean_line (cpp_reader *pfile)
45b966db 101{
26aea073
NB
102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
87062813 105
26aea073
NB
106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
87062813 111
26aea073 112 if (!buffer->from_stage3)
45b966db 113 {
d08dcf87
ZW
114 /* Short circuit for the common case of an un-escaped line with
115 no trigraphs. The primary win here is by not writing any
116 data back to memory until we have to. */
117 for (;;)
118 {
119 c = *++s;
120 if (c == '\n' || c == '\r')
121 {
122 d = (uchar *) s;
123
124 if (s == buffer->rlimit)
125 goto done;
126
127 /* DOS line ending? */
128 if (c == '\r' && s[1] == '\n')
129 s++;
130
131 if (s == buffer->rlimit)
132 goto done;
133
134 /* check for escaped newline */
135 p = d;
136 while (p != buffer->next_line && is_nvspace (p[-1]))
137 p--;
138 if (p == buffer->next_line || p[-1] != '\\')
139 goto done;
140
141 /* Have an escaped newline; process it and proceed to
142 the slow path. */
143 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144 d = p - 2;
145 buffer->next_line = p - 1;
146 break;
147 }
148 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149 {
150 /* Have a trigraph. We may or may not have to convert
151 it. Add a line note regardless, for -Wtrigraphs. */
152 add_line_note (buffer, s, s[2]);
153 if (CPP_OPTION (pfile, trigraphs))
154 {
155 /* We do, and that means we have to switch to the
156 slow path. */
157 d = (uchar *) s;
158 *d = _cpp_trigraph_map[s[2]];
159 s += 2;
160 break;
161 }
162 }
163 }
164
26aea073
NB
165
166 for (;;)
4a5b68a2 167 {
26aea073
NB
168 c = *++s;
169 *++d = c;
170
171 if (c == '\n' || c == '\r')
172 {
173 /* Handle DOS line endings. */
174 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175 s++;
176 if (s == buffer->rlimit)
177 break;
178
179 /* Escaped? */
180 p = d;
181 while (p != buffer->next_line && is_nvspace (p[-1]))
182 p--;
183 if (p == buffer->next_line || p[-1] != '\\')
184 break;
185
41c32c98 186 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
26aea073
NB
187 d = p - 2;
188 buffer->next_line = p - 1;
189 }
190 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
191 {
192 /* Add a note regardless, for the benefit of -Wtrigraphs. */
41c32c98 193 add_line_note (buffer, d, s[2]);
26aea073
NB
194 if (CPP_OPTION (pfile, trigraphs))
195 {
196 *d = _cpp_trigraph_map[s[2]];
197 s += 2;
198 }
199 }
4a5b68a2 200 }
45b966db 201 }
26aea073
NB
202 else
203 {
204 do
205 s++;
206 while (*s != '\n' && *s != '\r');
207 d = (uchar *) s;
208
209 /* Handle DOS line endings. */
210 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211 s++;
212 }
0d9f234d 213
d08dcf87 214 done:
26aea073 215 *d = '\n';
41c32c98
NB
216 /* A sentinel note that should never be processed. */
217 add_line_note (buffer, d + 1, '\n');
26aea073 218 buffer->next_line = s + 1;
45b966db
ZW
219}
220
a8eb6044
NB
221/* Return true if the trigraph indicated by NOTE should be warned
222 about in a comment. */
223static bool
6cf87ca4 224warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
a8eb6044
NB
225{
226 const uchar *p;
227
228 /* Within comments we don't warn about trigraphs, unless the
229 trigraph forms an escaped newline, as that may change
6356f892 230 behavior. */
a8eb6044
NB
231 if (note->type != '/')
232 return false;
233
234 /* If -trigraphs, then this was an escaped newline iff the next note
235 is coincident. */
236 if (CPP_OPTION (pfile, trigraphs))
237 return note[1].pos == note->pos;
238
239 /* Otherwise, see if this forms an escaped newline. */
240 p = note->pos + 3;
241 while (is_nvspace (*p))
242 p++;
243
244 /* There might have been escaped newlines between the trigraph and the
245 newline we found. Hence the position test. */
246 return (*p == '\n' && p < note[1].pos);
247}
248
26aea073
NB
249/* Process the notes created by add_line_note as far as the current
250 location. */
251void
6cf87ca4 252_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
45b966db 253{
29401c30
NB
254 cpp_buffer *buffer = pfile->buffer;
255
26aea073 256 for (;;)
041c3194 257 {
26aea073
NB
258 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
259 unsigned int col;
a5c3cccd 260
26aea073
NB
261 if (note->pos > buffer->cur)
262 break;
a5c3cccd 263
26aea073
NB
264 buffer->cur_note++;
265 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
4d6baafa 266
41c32c98 267 if (note->type == '\\' || note->type == ' ')
26aea073 268 {
41c32c98 269 if (note->type == ' ' && !in_comment)
500bee0a 270 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
26aea073 271 "backslash and newline separated by space");
41c32c98 272
26aea073 273 if (buffer->next_line > buffer->rlimit)
87062813 274 {
500bee0a 275 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
26aea073
NB
276 "backslash-newline at end of file");
277 /* Prevent "no newline at end of file" warning. */
278 buffer->next_line = buffer->rlimit;
87062813 279 }
26aea073
NB
280
281 buffer->line_base = note->pos;
12f9df4e 282 CPP_INCREMENT_LINE (pfile, 0);
0d9f234d 283 }
41c32c98
NB
284 else if (_cpp_trigraph_map[note->type])
285 {
a8eb6044
NB
286 if (CPP_OPTION (pfile, warn_trigraphs)
287 && (!in_comment || warn_in_comment (pfile, note)))
41c32c98
NB
288 {
289 if (CPP_OPTION (pfile, trigraphs))
500bee0a 290 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
41c32c98
NB
291 "trigraph ??%c converted to %c",
292 note->type,
293 (int) _cpp_trigraph_map[note->type]);
294 else
905bd7b5
GK
295 {
296 cpp_error_with_line
500bee0a 297 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
905bd7b5
GK
298 "trigraph ??%c ignored, use -trigraphs to enable",
299 note->type);
300 }
41c32c98
NB
301 }
302 }
303 else
304 abort ();
041c3194 305 }
45b966db
ZW
306}
307
0d9f234d
NB
308/* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
6f572ac2
NB
310 nonzero if comment terminated by EOF, zero otherwise.
311
312 Buffer->cur points to the initial asterisk of the comment. */
26aea073 313bool
6cf87ca4 314_cpp_skip_block_comment (cpp_reader *pfile)
45b966db 315{
041c3194 316 cpp_buffer *buffer = pfile->buffer;
d08dcf87
ZW
317 const uchar *cur = buffer->cur;
318 uchar c;
0d9f234d 319
d08dcf87
ZW
320 cur++;
321 if (*cur == '/')
322 cur++;
0d9f234d 323
26aea073
NB
324 for (;;)
325 {
0d9f234d
NB
326 /* People like decorating comments with '*', so check for '/'
327 instead for efficiency. */
d08dcf87
ZW
328 c = *cur++;
329
041c3194 330 if (c == '/')
45b966db 331 {
d08dcf87 332 if (cur[-2] == '*')
0d9f234d 333 break;
041c3194 334
0d9f234d 335 /* Warn about potential nested comments, but not if the '/'
a1f300c0 336 comes immediately before the true comment delimiter.
041c3194 337 Don't bother to get it right across escaped newlines. */
0d9f234d 338 if (CPP_OPTION (pfile, warn_comments)
d08dcf87
ZW
339 && cur[0] == '*' && cur[1] != '/')
340 {
341 buffer->cur = cur;
0527bc4e 342 cpp_error_with_line (pfile, CPP_DL_WARNING,
500bee0a 343 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
d08dcf87
ZW
344 "\"/*\" within comment");
345 }
45b966db 346 }
26aea073
NB
347 else if (c == '\n')
348 {
12f9df4e 349 unsigned int cols;
d08dcf87 350 buffer->cur = cur - 1;
26aea073
NB
351 _cpp_process_line_notes (pfile, true);
352 if (buffer->next_line >= buffer->rlimit)
353 return true;
354 _cpp_clean_line (pfile);
12f9df4e
PB
355
356 cols = buffer->next_line - buffer->line_base;
357 CPP_INCREMENT_LINE (pfile, cols);
358
d08dcf87 359 cur = buffer->cur;
26aea073 360 }
45b966db 361 }
041c3194 362
d08dcf87 363 buffer->cur = cur;
a8eb6044 364 _cpp_process_line_notes (pfile, true);
26aea073 365 return false;
45b966db
ZW
366}
367
480709cc 368/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 369 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 370 if a multiline comment. */
041c3194 371static int
6cf87ca4 372skip_line_comment (cpp_reader *pfile)
45b966db 373{
cbcff6df 374 cpp_buffer *buffer = pfile->buffer;
500bee0a 375 unsigned int orig_line = pfile->line_table->highest_line;
041c3194 376
26aea073
NB
377 while (*buffer->cur != '\n')
378 buffer->cur++;
480709cc 379
26aea073 380 _cpp_process_line_notes (pfile, true);
500bee0a 381 return orig_line != pfile->line_table->highest_line;
041c3194 382}
45b966db 383
26aea073 384/* Skips whitespace, saving the next non-whitespace character. */
52fadca8 385static void
6cf87ca4 386skip_whitespace (cpp_reader *pfile, cppchar_t c)
041c3194
ZW
387{
388 cpp_buffer *buffer = pfile->buffer;
f7d151fb 389 bool saw_NUL = false;
45b966db 390
0d9f234d 391 do
041c3194 392 {
91fcd158 393 /* Horizontal space always OK. */
26aea073 394 if (c == ' ' || c == '\t')
0d9f234d 395 ;
0d9f234d 396 /* Just \f \v or \0 left. */
91fcd158 397 else if (c == '\0')
f7d151fb 398 saw_NUL = true;
93c80368 399 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
500bee0a 400 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
ebef4e8c
NB
401 CPP_BUF_COL (buffer),
402 "%s in preprocessing directive",
403 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 404
0d9f234d 405 c = *buffer->cur++;
45b966db 406 }
ec5c56db 407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
408 while (is_nvspace (c));
409
f7d151fb 410 if (saw_NUL)
0527bc4e 411 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
f7d151fb 412
480709cc 413 buffer->cur--;
041c3194 414}
45b966db 415
93c80368
NB
416/* See if the characters of a number token are valid in a name (no
417 '.', '+' or '-'). */
418static int
6cf87ca4 419name_p (cpp_reader *pfile, const cpp_string *string)
93c80368
NB
420{
421 unsigned int i;
422
423 for (i = 0; i < string->len; i++)
424 if (!is_idchar (string->text[i]))
425 return 0;
426
df383483 427 return 1;
93c80368
NB
428}
429
50668cf6
GK
430/* After parsing an identifier or other sequence, produce a warning about
431 sequences not in NFC/NFKC. */
432static void
433warn_about_normalization (cpp_reader *pfile,
434 const cpp_token *token,
435 const struct normalize_state *s)
436{
437 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
438 && !pfile->state.skipping)
439 {
440 /* Make sure that the token is printed using UCNs, even
441 if we'd otherwise happily print UTF-8. */
c3f829c1 442 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
50668cf6
GK
443 size_t sz;
444
445 sz = cpp_spell_token (pfile, token, buf, false) - buf;
446 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
447 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
cbada204 448 "`%.*s' is not in NFKC", (int) sz, buf);
50668cf6
GK
449 else
450 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
cbada204 451 "`%.*s' is not in NFC", (int) sz, buf);
50668cf6
GK
452 }
453}
454
bced6edf 455/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1613e52b 456 an identifier. FIRST is TRUE if this starts an identifier. */
bced6edf 457static bool
50668cf6
GK
458forms_identifier_p (cpp_reader *pfile, int first,
459 struct normalize_state *state)
bced6edf 460{
1613e52b
NB
461 cpp_buffer *buffer = pfile->buffer;
462
463 if (*buffer->cur == '$')
464 {
465 if (!CPP_OPTION (pfile, dollars_in_ident))
466 return false;
467
468 buffer->cur++;
78b8811a 469 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1613e52b 470 {
78b8811a 471 CPP_OPTION (pfile, warn_dollars) = 0;
0527bc4e 472 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1613e52b
NB
473 }
474
475 return true;
476 }
bced6edf 477
1613e52b 478 /* Is this a syntactically valid UCN? */
af15a2fe 479 if (CPP_OPTION (pfile, extended_identifiers)
6baba9bb 480 && *buffer->cur == '\\'
1613e52b 481 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
bced6edf 482 {
1613e52b 483 buffer->cur += 2;
50668cf6
GK
484 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
485 state))
1613e52b
NB
486 return true;
487 buffer->cur -= 2;
bced6edf 488 }
bced6edf 489
1613e52b 490 return false;
bced6edf
NB
491}
492
493/* Lex an identifier starting at BUFFER->CUR - 1. */
0d9f234d 494static cpp_hashnode *
50668cf6
GK
495lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
496 struct normalize_state *nst)
45b966db 497{
93c80368 498 cpp_hashnode *result;
47e20491 499 const uchar *cur;
c6e83800
ZW
500 unsigned int len;
501 unsigned int hash = HT_HASHSTEP (0, *base);
2c3fcba6 502
c6e83800 503 cur = pfile->buffer->cur;
47e20491
GK
504 if (! starts_ucn)
505 while (ISIDNUM (*cur))
506 {
507 hash = HT_HASHSTEP (hash, *cur);
508 cur++;
509 }
510 pfile->buffer->cur = cur;
50668cf6 511 if (starts_ucn || forms_identifier_p (pfile, false, nst))
10cf9bde 512 {
47e20491
GK
513 /* Slower version for identifiers containing UCNs (or $). */
514 do {
515 while (ISIDNUM (*pfile->buffer->cur))
50668cf6
GK
516 {
517 pfile->buffer->cur++;
518 NORMALIZE_STATE_UPDATE_IDNUM (nst);
519 }
520 } while (forms_identifier_p (pfile, false, nst));
47e20491
GK
521 result = _cpp_interpret_identifier (pfile, base,
522 pfile->buffer->cur - base);
2c3fcba6 523 }
47e20491
GK
524 else
525 {
526 len = cur - base;
527 hash = HT_HASHFINISH (hash, len);
bced6edf 528
47e20491
GK
529 result = (cpp_hashnode *)
530 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
531 }
2c3fcba6 532
bced6edf 533 /* Rarely, identifiers require diagnostics when lexed. */
2c3fcba6
ZW
534 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
535 && !pfile->state.skipping, 0))
536 {
537 /* It is allowed to poison the same identifier twice. */
538 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
0527bc4e 539 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
540 NODE_NAME (result));
541
542 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
543 replacement list of a variadic macro. */
544 if (result == pfile->spec_nodes.n__VA_ARGS__
545 && !pfile->state.va_args_ok)
0527bc4e 546 cpp_error (pfile, CPP_DL_PEDWARN,
6cf87ca4
ZW
547 "__VA_ARGS__ can only appear in the expansion"
548 " of a C99 variadic macro");
2c3fcba6
ZW
549 }
550
551 return result;
552}
553
bced6edf 554/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
45b966db 555static void
50668cf6
GK
556lex_number (cpp_reader *pfile, cpp_string *number,
557 struct normalize_state *nst)
45b966db 558{
562a5c27 559 const uchar *cur;
bced6edf
NB
560 const uchar *base;
561 uchar *dest;
45b966db 562
bced6edf
NB
563 base = pfile->buffer->cur - 1;
564 do
041c3194 565 {
bced6edf 566 cur = pfile->buffer->cur;
0d9f234d 567
bced6edf
NB
568 /* N.B. ISIDNUM does not include $. */
569 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
50668cf6
GK
570 {
571 cur++;
572 NORMALIZE_STATE_UPDATE_IDNUM (nst);
573 }
45b966db 574
10cf9bde 575 pfile->buffer->cur = cur;
45b966db 576 }
50668cf6 577 while (forms_identifier_p (pfile, false, nst));
93c80368 578
bced6edf
NB
579 number->len = cur - base;
580 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
581 memcpy (dest, base, number->len);
582 dest[number->len] = '\0';
583 number->text = dest;
93c80368
NB
584}
585
6338b358
NB
586/* Create a token of type TYPE with a literal spelling. */
587static void
6cf87ca4
ZW
588create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
589 unsigned int len, enum cpp_ttype type)
6338b358
NB
590{
591 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
592
593 memcpy (dest, base, len);
594 dest[len] = '\0';
595 token->type = type;
596 token->val.str.len = len;
597 token->val.str.text = dest;
598}
599
bced6edf 600/* Lexes a string, character constant, or angle-bracketed header file
6338b358
NB
601 name. The stored string contains the spelling, including opening
602 quote and leading any leading 'L'. It returns the type of the
603 literal, or CPP_OTHER if it was not properly terminated.
604
605 The spelling is NUL-terminated, but it is not guaranteed that this
606 is the first NUL since embedded NULs are preserved. */
041c3194 607static void
6cf87ca4 608lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
45b966db 609{
6338b358
NB
610 bool saw_NUL = false;
611 const uchar *cur;
bced6edf 612 cppchar_t terminator;
6338b358
NB
613 enum cpp_ttype type;
614
615 cur = base;
616 terminator = *cur++;
617 if (terminator == 'L')
618 terminator = *cur++;
619 if (terminator == '\"')
620 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
621 else if (terminator == '\'')
622 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
623 else
624 terminator = '>', type = CPP_HEADER_NAME;
93c80368 625
0d9f234d 626 for (;;)
45b966db 627 {
6338b358 628 cppchar_t c = *cur++;
7868b4a2 629
6f572ac2 630 /* In #include-style directives, terminators are not escapable. */
6338b358
NB
631 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
632 cur++;
633 else if (c == terminator)
bced6edf 634 break;
6338b358 635 else if (c == '\n')
0d9f234d 636 {
6338b358
NB
637 cur--;
638 type = CPP_OTHER;
639 break;
45b966db 640 }
6338b358
NB
641 else if (c == '\0')
642 saw_NUL = true;
45b966db
ZW
643 }
644
6338b358 645 if (saw_NUL && !pfile->state.skipping)
0527bc4e
JDA
646 cpp_error (pfile, CPP_DL_WARNING,
647 "null character(s) preserved in literal");
45b966db 648
6338b358
NB
649 pfile->buffer->cur = cur;
650 create_literal (pfile, token, base, cur - base, type);
0d9f234d 651}
041c3194 652
93c80368 653/* The stored comment includes the comment start and any terminator. */
9e62c811 654static void
6cf87ca4
ZW
655save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
656 cppchar_t type)
9e62c811 657{
041c3194 658 unsigned char *buffer;
477cdac7 659 unsigned int len, clen;
df383483 660
1c6d33ef 661 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 662
3542203b
NB
663 /* C++ comments probably (not definitely) have moved past a new
664 line, which we don't want to save in the comment. */
480709cc 665 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 666 len--;
477cdac7
JT
667
668 /* If we are currently in a directive, then we need to store all
669 C++ comments as C comments internally, and so we need to
670 allocate a little extra space in that case.
671
672 Note that the only time we encounter a directive here is
673 when we are saving comments in a "#define". */
674 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
675
676 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 677
041c3194 678 token->type = CPP_COMMENT;
477cdac7 679 token->val.str.len = clen;
0d9f234d 680 token->val.str.text = buffer;
45b966db 681
1c6d33ef
NB
682 buffer[0] = '/';
683 memcpy (buffer + 1, from, len - 1);
477cdac7 684
1eeeb6a4 685 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
686 if (pfile->state.in_directive && type == '/')
687 {
688 buffer[1] = '*';
689 buffer[clen - 2] = '*';
690 buffer[clen - 1] = '/';
691 }
0d9f234d 692}
45b966db 693
5fddcffc
NB
694/* Allocate COUNT tokens for RUN. */
695void
6cf87ca4 696_cpp_init_tokenrun (tokenrun *run, unsigned int count)
5fddcffc 697{
72bb2c39 698 run->base = XNEWVEC (cpp_token, count);
5fddcffc
NB
699 run->limit = run->base + count;
700 run->next = NULL;
701}
702
703/* Returns the next tokenrun, or creates one if there is none. */
704static tokenrun *
6cf87ca4 705next_tokenrun (tokenrun *run)
5fddcffc
NB
706{
707 if (run->next == NULL)
708 {
72bb2c39 709 run->next = XNEW (tokenrun);
bdcbe496 710 run->next->prev = run;
5fddcffc
NB
711 _cpp_init_tokenrun (run->next, 250);
712 }
713
714 return run->next;
715}
716
4ed5bcfb
NB
717/* Allocate a single token that is invalidated at the same time as the
718 rest of the tokens on the line. Has its line and col set to the
719 same as the last lexed token, so that diagnostics appear in the
720 right place. */
721cpp_token *
6cf87ca4 722_cpp_temp_token (cpp_reader *pfile)
4ed5bcfb
NB
723{
724 cpp_token *old, *result;
725
726 old = pfile->cur_token - 1;
727 if (pfile->cur_token == pfile->cur_run->limit)
728 {
729 pfile->cur_run = next_tokenrun (pfile->cur_run);
730 pfile->cur_token = pfile->cur_run->base;
731 }
732
733 result = pfile->cur_token++;
12f9df4e 734 result->src_loc = old->src_loc;
4ed5bcfb
NB
735 return result;
736}
737
14baae01
NB
738/* Lex a token into RESULT (external interface). Takes care of issues
739 like directive handling, token lookahead, multiple include
a1f300c0 740 optimization and skipping. */
345894b4 741const cpp_token *
6cf87ca4 742_cpp_lex_token (cpp_reader *pfile)
5fddcffc 743{
bdcbe496 744 cpp_token *result;
5fddcffc 745
bdcbe496 746 for (;;)
5fddcffc 747 {
bdcbe496 748 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 749 {
bdcbe496
NB
750 pfile->cur_run = next_tokenrun (pfile->cur_run);
751 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
752 }
753
bdcbe496 754 if (pfile->lookaheads)
14baae01
NB
755 {
756 pfile->lookaheads--;
757 result = pfile->cur_token++;
758 }
bdcbe496 759 else
14baae01 760 result = _cpp_lex_direct (pfile);
bdcbe496
NB
761
762 if (result->flags & BOL)
5fddcffc 763 {
bdcbe496
NB
764 /* Is this a directive. If _cpp_handle_directive returns
765 false, it is an assembler #. */
766 if (result->type == CPP_HASH
e808ec9c
NB
767 /* 6.10.3 p 11: Directives in a list of macro arguments
768 gives undefined behavior. This implementation
769 handles the directive as normal. */
bc4071dd 770 && pfile->state.parsing_args != 1)
21b11495 771 {
bc4071dd 772 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
21b11495 773 {
bc4071dd
RH
774 if (pfile->directive_result.type == CPP_PADDING)
775 continue;
21b11495 776 result = &pfile->directive_result;
21b11495
ZW
777 }
778 }
bc4071dd
RH
779 else if (pfile->state.in_deferred_pragma)
780 result = &pfile->directive_result;
21b11495 781
97293897 782 if (pfile->cb.line_change && !pfile->state.skipping)
6cf87ca4 783 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
5fddcffc 784 }
5fddcffc 785
bdcbe496 786 /* We don't skip tokens in directives. */
bc4071dd 787 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
bdcbe496 788 break;
5fddcffc 789
bdcbe496 790 /* Outside a directive, invalidate controlling macros. At file
14baae01 791 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
6356f892 792 get here and MI optimization works. */
5fddcffc 793 pfile->mi_valid = false;
bdcbe496
NB
794
795 if (!pfile->state.skipping || result->type == CPP_EOF)
796 break;
5fddcffc
NB
797 }
798
345894b4 799 return result;
5fddcffc
NB
800}
801
26aea073
NB
802/* Returns true if a fresh line has been loaded. */
803bool
6cf87ca4 804_cpp_get_fresh_line (cpp_reader *pfile)
004cb263 805{
22234f56
PB
806 int return_at_eof;
807
26aea073
NB
808 /* We can't get a new line until we leave the current directive. */
809 if (pfile->state.in_directive)
810 return false;
df383483 811
26aea073 812 for (;;)
1a76916c 813 {
26aea073 814 cpp_buffer *buffer = pfile->buffer;
1a76916c 815
26aea073
NB
816 if (!buffer->need_line)
817 return true;
818
819 if (buffer->next_line < buffer->rlimit)
004cb263 820 {
26aea073
NB
821 _cpp_clean_line (pfile);
822 return true;
823 }
004cb263 824
26aea073
NB
825 /* First, get out of parsing arguments state. */
826 if (pfile->state.parsing_args)
827 return false;
828
829 /* End of buffer. Non-empty files should end in a newline. */
830 if (buffer->buf != buffer->rlimit
831 && buffer->next_line > buffer->rlimit
832 && !buffer->from_stage3)
833 {
834 /* Only warn once. */
835 buffer->next_line = buffer->rlimit;
500bee0a 836 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
26aea073
NB
837 CPP_BUF_COLUMN (buffer, buffer->cur),
838 "no newline at end of file");
839 }
22234f56
PB
840
841 return_at_eof = buffer->return_at_eof;
26aea073 842 _cpp_pop_buffer (pfile);
22234f56 843 if (pfile->buffer == NULL || return_at_eof)
a506c55c 844 return false;
26aea073 845 }
004cb263
NB
846}
847
6f572ac2
NB
848#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
849 do \
850 { \
851 result->type = ELSE_TYPE; \
852 if (*buffer->cur == CHAR) \
853 buffer->cur++, result->type = THEN_TYPE; \
854 } \
855 while (0)
480709cc 856
14baae01
NB
857/* Lex a token into pfile->cur_token, which is also incremented, to
858 get diagnostics pointing to the correct location.
859
860 Does not handle issues such as token lookahead, multiple-include
f1ba665b 861 optimization, directives, skipping etc. This function is only
14baae01
NB
862 suitable for use by _cpp_lex_token, and in special cases like
863 lex_expansion_token which doesn't care for any of these issues.
864
865 When meeting a newline, returns CPP_EOF if parsing a directive,
866 otherwise returns to the start of the token buffer if permissible.
867 Returns the location of the lexed token. */
868cpp_token *
6cf87ca4 869_cpp_lex_direct (cpp_reader *pfile)
45b966db 870{
0d9f234d 871 cppchar_t c;
adb84b42 872 cpp_buffer *buffer;
0d9f234d 873 const unsigned char *comment_start;
14baae01 874 cpp_token *result = pfile->cur_token++;
9ec7291f 875
5fddcffc 876 fresh_line:
26aea073 877 result->flags = 0;
2be570f9 878 buffer = pfile->buffer;
a506c55c 879 if (buffer->need_line)
26aea073 880 {
bc4071dd
RH
881 if (pfile->state.in_deferred_pragma)
882 {
883 result->type = CPP_PRAGMA_EOL;
884 pfile->state.in_deferred_pragma = false;
885 if (!pfile->state.pragma_allow_expansion)
886 pfile->state.prevent_expansion--;
887 return result;
888 }
26aea073
NB
889 if (!_cpp_get_fresh_line (pfile))
890 {
891 result->type = CPP_EOF;
9ff7868d
NB
892 if (!pfile->state.in_directive)
893 {
894 /* Tell the compiler the line number of the EOF token. */
500bee0a 895 result->src_loc = pfile->line_table->highest_line;
9ff7868d
NB
896 result->flags = BOL;
897 }
26aea073
NB
898 return result;
899 }
900 if (!pfile->keep_tokens)
901 {
902 pfile->cur_run = &pfile->base_run;
903 result = pfile->base_run.base;
904 pfile->cur_token = result + 1;
905 }
906 result->flags = BOL;
907 if (pfile->state.parsing_args == 2)
908 result->flags |= PREV_WHITE;
909 }
a506c55c 910 buffer = pfile->buffer;
5fddcffc 911 update_tokens_line:
500bee0a 912 result->src_loc = pfile->line_table->highest_line;
041c3194 913
5fddcffc 914 skipped_white:
26aea073
NB
915 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
916 && !pfile->overlaid_buffer)
917 {
918 _cpp_process_line_notes (pfile, false);
500bee0a 919 result->src_loc = pfile->line_table->highest_line;
26aea073 920 }
480709cc 921 c = *buffer->cur++;
12f9df4e 922
500bee0a
PB
923 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
924 CPP_BUF_COLUMN (buffer, buffer->cur));
5fddcffc 925
0d9f234d 926 switch (c)
45b966db 927 {
4d6baafa
NB
928 case ' ': case '\t': case '\f': case '\v': case '\0':
929 result->flags |= PREV_WHITE;
26aea073
NB
930 skip_whitespace (pfile, c);
931 goto skipped_white;
0d9f234d 932
26aea073 933 case '\n':
12f9df4e
PB
934 if (buffer->cur < buffer->rlimit)
935 CPP_INCREMENT_LINE (pfile, 0);
26aea073
NB
936 buffer->need_line = true;
937 goto fresh_line;
46d07497 938
0d9f234d
NB
939 case '0': case '1': case '2': case '3': case '4':
940 case '5': case '6': case '7': case '8': case '9':
50668cf6
GK
941 {
942 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
943 result->type = CPP_NUMBER;
944 lex_number (pfile, &result->val.str, &nst);
945 warn_about_normalization (pfile, result, &nst);
946 break;
947 }
46d07497 948
0abc6a6a
NB
949 case 'L':
950 /* 'L' may introduce wide characters or strings. */
bced6edf
NB
951 if (*buffer->cur == '\'' || *buffer->cur == '"')
952 {
6338b358 953 lex_string (pfile, result, buffer->cur - 1);
bced6edf
NB
954 break;
955 }
df383483 956 /* Fall through. */
0abc6a6a 957
0d9f234d
NB
958 case '_':
959 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
960 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
961 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
962 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
963 case 'y': case 'z':
964 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 965 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
966 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
967 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
968 case 'Y': case 'Z':
969 result->type = CPP_NAME;
50668cf6
GK
970 {
971 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
972 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
973 &nst);
974 warn_about_normalization (pfile, result, &nst);
975 }
0d9f234d 976
0d9f234d 977 /* Convert named operators to their proper types. */
0abc6a6a 978 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
979 {
980 result->flags |= NAMED_OP;
c3f829c1 981 result->type = (enum cpp_ttype) result->val.node->directive_index;
0d9f234d
NB
982 }
983 break;
984
985 case '\'':
986 case '"':
6338b358 987 lex_string (pfile, result, buffer->cur - 1);
0d9f234d 988 break;
041c3194 989
0d9f234d 990 case '/':
1c6d33ef
NB
991 /* A potential block or line comment. */
992 comment_start = buffer->cur;
6f572ac2
NB
993 c = *buffer->cur;
994
1c6d33ef
NB
995 if (c == '*')
996 {
26aea073 997 if (_cpp_skip_block_comment (pfile))
0527bc4e 998 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
0d9f234d 999 }
480709cc 1000 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
12f9df4e 1001 || cpp_in_system_header (pfile)))
0d9f234d 1002 {
bdb05a7b
NB
1003 /* Warn about comments only if pedantically GNUC89, and not
1004 in system headers. */
1005 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1006 && ! buffer->warned_cplusplus_comments)
041c3194 1007 {
0527bc4e 1008 cpp_error (pfile, CPP_DL_PEDWARN,
56508306 1009 "C++ style comments are not allowed in ISO C90");
0527bc4e 1010 cpp_error (pfile, CPP_DL_PEDWARN,
ebef4e8c 1011 "(this will be reported only once per input file)");
1c6d33ef
NB
1012 buffer->warned_cplusplus_comments = 1;
1013 }
0d9f234d 1014
01ef6563 1015 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
0527bc4e 1016 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1c6d33ef 1017 }
480709cc
NB
1018 else if (c == '=')
1019 {
6f572ac2 1020 buffer->cur++;
480709cc
NB
1021 result->type = CPP_DIV_EQ;
1022 break;
1023 }
1024 else
1025 {
480709cc
NB
1026 result->type = CPP_DIV;
1027 break;
1028 }
0d9f234d 1029
1c6d33ef
NB
1030 if (!pfile->state.save_comments)
1031 {
1032 result->flags |= PREV_WHITE;
5fddcffc 1033 goto update_tokens_line;
0d9f234d 1034 }
1c6d33ef
NB
1035
1036 /* Save the comment as a token in its own right. */
477cdac7 1037 save_comment (pfile, result, comment_start, c);
bdcbe496 1038 break;
0d9f234d
NB
1039
1040 case '<':
1041 if (pfile->state.angled_headers)
1042 {
6338b358 1043 lex_string (pfile, result, buffer->cur - 1);
480709cc 1044 break;
0d9f234d 1045 }
45b966db 1046
6f572ac2
NB
1047 result->type = CPP_LESS;
1048 if (*buffer->cur == '=')
1049 buffer->cur++, result->type = CPP_LESS_EQ;
1050 else if (*buffer->cur == '<')
0d9f234d 1051 {
6f572ac2
NB
1052 buffer->cur++;
1053 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 1054 }
6f572ac2 1055 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
0d9f234d 1056 {
6f572ac2
NB
1057 buffer->cur++;
1058 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d 1059 }
6f572ac2 1060 else if (CPP_OPTION (pfile, digraphs))
480709cc 1061 {
6f572ac2
NB
1062 if (*buffer->cur == ':')
1063 {
1064 buffer->cur++;
1065 result->flags |= DIGRAPH;
1066 result->type = CPP_OPEN_SQUARE;
1067 }
1068 else if (*buffer->cur == '%')
1069 {
1070 buffer->cur++;
1071 result->flags |= DIGRAPH;
1072 result->type = CPP_OPEN_BRACE;
1073 }
480709cc 1074 }
0d9f234d
NB
1075 break;
1076
1077 case '>':
6f572ac2
NB
1078 result->type = CPP_GREATER;
1079 if (*buffer->cur == '=')
1080 buffer->cur++, result->type = CPP_GREATER_EQ;
1081 else if (*buffer->cur == '>')
0d9f234d 1082 {
6f572ac2
NB
1083 buffer->cur++;
1084 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1085 }
1086 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1087 {
1088 buffer->cur++;
1089 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
0d9f234d
NB
1090 }
1091 break;
1092
cbcff6df 1093 case '%':
6f572ac2
NB
1094 result->type = CPP_MOD;
1095 if (*buffer->cur == '=')
1096 buffer->cur++, result->type = CPP_MOD_EQ;
1097 else if (CPP_OPTION (pfile, digraphs))
480709cc 1098 {
6f572ac2 1099 if (*buffer->cur == ':')
480709cc 1100 {
6f572ac2
NB
1101 buffer->cur++;
1102 result->flags |= DIGRAPH;
1103 result->type = CPP_HASH;
1104 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1105 buffer->cur += 2, result->type = CPP_PASTE;
1106 }
1107 else if (*buffer->cur == '>')
1108 {
1109 buffer->cur++;
1110 result->flags |= DIGRAPH;
1111 result->type = CPP_CLOSE_BRACE;
480709cc 1112 }
480709cc 1113 }
0d9f234d
NB
1114 break;
1115
cbcff6df 1116 case '.':
480709cc 1117 result->type = CPP_DOT;
6f572ac2 1118 if (ISDIGIT (*buffer->cur))
480709cc 1119 {
50668cf6 1120 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
480709cc 1121 result->type = CPP_NUMBER;
50668cf6
GK
1122 lex_number (pfile, &result->val.str, &nst);
1123 warn_about_normalization (pfile, result, &nst);
480709cc 1124 }
6f572ac2
NB
1125 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1126 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1127 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1128 buffer->cur++, result->type = CPP_DOT_STAR;
0d9f234d 1129 break;
45b966db 1130
0d9f234d 1131 case '+':
6f572ac2
NB
1132 result->type = CPP_PLUS;
1133 if (*buffer->cur == '+')
1134 buffer->cur++, result->type = CPP_PLUS_PLUS;
1135 else if (*buffer->cur == '=')
1136 buffer->cur++, result->type = CPP_PLUS_EQ;
0d9f234d 1137 break;
04e3ec78 1138
0d9f234d 1139 case '-':
6f572ac2
NB
1140 result->type = CPP_MINUS;
1141 if (*buffer->cur == '>')
0d9f234d 1142 {
6f572ac2 1143 buffer->cur++;
480709cc 1144 result->type = CPP_DEREF;
6f572ac2
NB
1145 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1146 buffer->cur++, result->type = CPP_DEREF_STAR;
480709cc 1147 }
6f572ac2
NB
1148 else if (*buffer->cur == '-')
1149 buffer->cur++, result->type = CPP_MINUS_MINUS;
1150 else if (*buffer->cur == '=')
1151 buffer->cur++, result->type = CPP_MINUS_EQ;
0d9f234d 1152 break;
45b966db 1153
0d9f234d 1154 case '&':
6f572ac2
NB
1155 result->type = CPP_AND;
1156 if (*buffer->cur == '&')
1157 buffer->cur++, result->type = CPP_AND_AND;
1158 else if (*buffer->cur == '=')
1159 buffer->cur++, result->type = CPP_AND_EQ;
0d9f234d 1160 break;
df383483 1161
0d9f234d 1162 case '|':
6f572ac2
NB
1163 result->type = CPP_OR;
1164 if (*buffer->cur == '|')
1165 buffer->cur++, result->type = CPP_OR_OR;
1166 else if (*buffer->cur == '=')
1167 buffer->cur++, result->type = CPP_OR_EQ;
0d9f234d 1168 break;
45b966db 1169
0d9f234d 1170 case ':':
6f572ac2
NB
1171 result->type = CPP_COLON;
1172 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1173 buffer->cur++, result->type = CPP_SCOPE;
1174 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
0d9f234d 1175 {
6f572ac2 1176 buffer->cur++;
0d9f234d 1177 result->flags |= DIGRAPH;
480709cc
NB
1178 result->type = CPP_CLOSE_SQUARE;
1179 }
0d9f234d 1180 break;
45b966db 1181
480709cc
NB
1182 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1183 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1184 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1185 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1186 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1187
26aea073 1188 case '?': result->type = CPP_QUERY; break;
0d9f234d
NB
1189 case '~': result->type = CPP_COMPL; break;
1190 case ',': result->type = CPP_COMMA; break;
1191 case '(': result->type = CPP_OPEN_PAREN; break;
1192 case ')': result->type = CPP_CLOSE_PAREN; break;
1193 case '[': result->type = CPP_OPEN_SQUARE; break;
1194 case ']': result->type = CPP_CLOSE_SQUARE; break;
1195 case '{': result->type = CPP_OPEN_BRACE; break;
1196 case '}': result->type = CPP_CLOSE_BRACE; break;
1197 case ';': result->type = CPP_SEMICOLON; break;
1198
40f03658 1199 /* @ is a punctuator in Objective-C. */
cc937581 1200 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1201
0abc6a6a 1202 case '$':
1613e52b
NB
1203 case '\\':
1204 {
1205 const uchar *base = --buffer->cur;
50668cf6 1206 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
0abc6a6a 1207
50668cf6 1208 if (forms_identifier_p (pfile, true, &nst))
1613e52b
NB
1209 {
1210 result->type = CPP_NAME;
50668cf6
GK
1211 result->val.node = lex_identifier (pfile, base, true, &nst);
1212 warn_about_normalization (pfile, result, &nst);
1613e52b
NB
1213 break;
1214 }
1215 buffer->cur++;
1067694a 1216 }
1613e52b 1217
1067694a 1218 default:
6338b358
NB
1219 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1220 break;
0d9f234d 1221 }
bdcbe496
NB
1222
1223 return result;
0d9f234d
NB
1224}
1225
59325650
NB
1226/* An upper bound on the number of bytes needed to spell TOKEN.
1227 Does not include preceding whitespace. */
93c80368 1228unsigned int
6cf87ca4 1229cpp_token_len (const cpp_token *token)
0d9f234d 1230{
93c80368 1231 unsigned int len;
6d2c2047 1232
93c80368 1233 switch (TOKEN_SPELL (token))
041c3194 1234 {
59325650 1235 default: len = 4; break;
6338b358 1236 case SPELL_LITERAL: len = token->val.str.len; break;
47e20491 1237 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
041c3194 1238 }
59325650
NB
1239
1240 return len;
6d2c2047
ZW
1241}
1242
47e20491
GK
1243/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1244 Return the number of bytes read out of NAME. (There are always
1245 10 bytes written to BUFFER.) */
1246
1247static size_t
1248utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1249{
1250 int j;
1251 int ucn_len = 0;
1252 int ucn_len_c;
1253 unsigned t;
1254 unsigned long utf32;
1255
1256 /* Compute the length of the UTF-8 sequence. */
1257 for (t = *name; t & 0x80; t <<= 1)
1258 ucn_len++;
1259
1260 utf32 = *name & (0x7F >> ucn_len);
1261 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1262 {
1263 utf32 = (utf32 << 6) | (*++name & 0x3F);
1264
1265 /* Ill-formed UTF-8. */
1266 if ((*name & ~0x3F) != 0x80)
1267 abort ();
1268 }
1269
1270 *buffer++ = '\\';
1271 *buffer++ = 'U';
1272 for (j = 7; j >= 0; j--)
1273 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1274 return ucn_len;
1275}
1276
1277
041c3194 1278/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885 1279 already contain the enough space to hold the token's spelling.
6cf87ca4 1280 Returns a pointer to the character after the last character written.
47e20491
GK
1281 FORSTRING is true if this is to be the spelling after translation
1282 phase 1 (this is different for UCNs).
6cf87ca4 1283 FIXME: Would be nice if we didn't need the PFILE argument. */
93c80368 1284unsigned char *
6cf87ca4 1285cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
47e20491 1286 unsigned char *buffer, bool forstring)
041c3194 1287{
96be6998 1288 switch (TOKEN_SPELL (token))
041c3194
ZW
1289 {
1290 case SPELL_OPERATOR:
1291 {
1292 const unsigned char *spelling;
1293 unsigned char c;
d6d5f795 1294
041c3194 1295 if (token->flags & DIGRAPH)
37b8524c
JDA
1296 spelling
1297 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1298 else if (token->flags & NAMED_OP)
1299 goto spell_ident;
041c3194 1300 else
96be6998 1301 spelling = TOKEN_NAME (token);
df383483 1302
041c3194
ZW
1303 while ((c = *spelling++) != '\0')
1304 *buffer++ = c;
1305 }
1306 break;
d6d5f795 1307
47ad4138 1308 spell_ident:
041c3194 1309 case SPELL_IDENT:
47e20491
GK
1310 if (forstring)
1311 {
1312 memcpy (buffer, NODE_NAME (token->val.node),
1313 NODE_LEN (token->val.node));
1314 buffer += NODE_LEN (token->val.node);
1315 }
1316 else
1317 {
1318 size_t i;
1319 const unsigned char * name = NODE_NAME (token->val.node);
1320
1321 for (i = 0; i < NODE_LEN (token->val.node); i++)
1322 if (name[i] & ~0x7F)
1323 {
1324 i += utf8_to_ucn (buffer, name + i) - 1;
1325 buffer += 10;
1326 }
1327 else
1328 *buffer++ = NODE_NAME (token->val.node)[i];
1329 }
041c3194 1330 break;
d6d5f795 1331
6338b358 1332 case SPELL_LITERAL:
47ad4138
ZW
1333 memcpy (buffer, token->val.str.text, token->val.str.len);
1334 buffer += token->val.str.len;
1335 break;
1336
041c3194 1337 case SPELL_NONE:
0527bc4e
JDA
1338 cpp_error (pfile, CPP_DL_ICE,
1339 "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1340 break;
1341 }
d6d5f795 1342
041c3194
ZW
1343 return buffer;
1344}
d6d5f795 1345
5d8ebbd8
NB
1346/* Returns TOKEN spelt as a null-terminated string. The string is
1347 freed when the reader is destroyed. Useful for diagnostics. */
93c80368 1348unsigned char *
6cf87ca4 1349cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
59325650
NB
1350{
1351 unsigned int len = cpp_token_len (token) + 1;
ece54d54 1352 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1353
47e20491 1354 end = cpp_spell_token (pfile, token, start, false);
93c80368 1355 end[0] = '\0';
c5a04734 1356
93c80368
NB
1357 return start;
1358}
c5a04734 1359
5d8ebbd8
NB
1360/* Used by C front ends, which really should move to using
1361 cpp_token_as_text. */
93c80368 1362const char *
6cf87ca4 1363cpp_type2name (enum cpp_ttype type)
93c80368
NB
1364{
1365 return (const char *) token_spellings[type].name;
1366}
c5a04734 1367
4ed5bcfb
NB
1368/* Writes the spelling of token to FP, without any preceding space.
1369 Separated from cpp_spell_token for efficiency - to avoid stdio
1370 double-buffering. */
93c80368 1371void
6cf87ca4 1372cpp_output_token (const cpp_token *token, FILE *fp)
93c80368 1373{
93c80368 1374 switch (TOKEN_SPELL (token))
c5a04734 1375 {
93c80368
NB
1376 case SPELL_OPERATOR:
1377 {
1378 const unsigned char *spelling;
3b681e9d 1379 int c;
c5a04734 1380
93c80368 1381 if (token->flags & DIGRAPH)
37b8524c
JDA
1382 spelling
1383 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1384 else if (token->flags & NAMED_OP)
1385 goto spell_ident;
1386 else
1387 spelling = TOKEN_NAME (token);
041c3194 1388
3b681e9d
ZW
1389 c = *spelling;
1390 do
1391 putc (c, fp);
1392 while ((c = *++spelling) != '\0');
93c80368
NB
1393 }
1394 break;
041c3194 1395
93c80368
NB
1396 spell_ident:
1397 case SPELL_IDENT:
47e20491
GK
1398 {
1399 size_t i;
1400 const unsigned char * name = NODE_NAME (token->val.node);
1401
1402 for (i = 0; i < NODE_LEN (token->val.node); i++)
1403 if (name[i] & ~0x7F)
1404 {
1405 unsigned char buffer[10];
1406 i += utf8_to_ucn (buffer, name + i) - 1;
1407 fwrite (buffer, 1, 10, fp);
1408 }
1409 else
1410 fputc (NODE_NAME (token->val.node)[i], fp);
1411 }
1412 break;
041c3194 1413
6338b358 1414 case SPELL_LITERAL:
47ad4138
ZW
1415 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1416 break;
1417
93c80368
NB
1418 case SPELL_NONE:
1419 /* An error, most probably. */
1420 break;
041c3194 1421 }
c5a04734
ZW
1422}
1423
93c80368
NB
1424/* Compare two tokens. */
1425int
6cf87ca4 1426_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
c5a04734 1427{
93c80368
NB
1428 if (a->type == b->type && a->flags == b->flags)
1429 switch (TOKEN_SPELL (a))
1430 {
1431 default: /* Keep compiler happy. */
1432 case SPELL_OPERATOR:
1433 return 1;
93c80368 1434 case SPELL_NONE:
56051c0a 1435 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1436 case SPELL_IDENT:
1437 return a->val.node == b->val.node;
6338b358 1438 case SPELL_LITERAL:
93c80368
NB
1439 return (a->val.str.len == b->val.str.len
1440 && !memcmp (a->val.str.text, b->val.str.text,
1441 a->val.str.len));
1442 }
c5a04734 1443
041c3194
ZW
1444 return 0;
1445}
1446
93c80368
NB
1447/* Returns nonzero if a space should be inserted to avoid an
1448 accidental token paste for output. For simplicity, it is
1449 conservative, and occasionally advises a space where one is not
1450 needed, e.g. "." and ".2". */
93c80368 1451int
6cf87ca4
ZW
1452cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1453 const cpp_token *token2)
c5a04734 1454{
93c80368
NB
1455 enum cpp_ttype a = token1->type, b = token2->type;
1456 cppchar_t c;
c5a04734 1457
93c80368
NB
1458 if (token1->flags & NAMED_OP)
1459 a = CPP_NAME;
1460 if (token2->flags & NAMED_OP)
1461 b = CPP_NAME;
c5a04734 1462
93c80368
NB
1463 c = EOF;
1464 if (token2->flags & DIGRAPH)
37b8524c 1465 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1466 else if (token_spellings[b].category == SPELL_OPERATOR)
1467 c = token_spellings[b].name[0];
c5a04734 1468
93c80368 1469 /* Quickly get everything that can paste with an '='. */
37b8524c 1470 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1471 return 1;
c5a04734 1472
93c80368 1473 switch (a)
c5a04734 1474 {
93c80368
NB
1475 case CPP_GREATER: return c == '>' || c == '?';
1476 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1477 case CPP_PLUS: return c == '+';
1478 case CPP_MINUS: return c == '-' || c == '>';
1479 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1480 case CPP_MOD: return c == ':' || c == '>';
1481 case CPP_AND: return c == '&';
1482 case CPP_OR: return c == '|';
1483 case CPP_COLON: return c == ':' || c == '>';
1484 case CPP_DEREF: return c == '*';
26ec42ee 1485 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1486 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1487 case CPP_NAME: return ((b == CPP_NUMBER
1488 && name_p (pfile, &token2->val.str))
1489 || b == CPP_NAME
1490 || b == CPP_CHAR || b == CPP_STRING); /* L */
1491 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1492 || c == '.' || c == '+' || c == '-');
1613e52b 1493 /* UCNs */
1067694a
NB
1494 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1495 && b == CPP_NAME)
1613e52b 1496 || (CPP_OPTION (pfile, objc)
1067694a 1497 && token1->val.str.text[0] == '@'
1613e52b 1498 && (b == CPP_NAME || b == CPP_STRING)));
93c80368 1499 default: break;
c5a04734 1500 }
c5a04734 1501
417f3e3a 1502 return 0;
c5a04734
ZW
1503}
1504
93c80368 1505/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1506 character, to FP. Leading whitespace is removed. If there are
1507 macros, special token padding is not performed. */
c5a04734 1508void
6cf87ca4 1509cpp_output_line (cpp_reader *pfile, FILE *fp)
c5a04734 1510{
4ed5bcfb 1511 const cpp_token *token;
96be6998 1512
4ed5bcfb
NB
1513 token = cpp_get_token (pfile);
1514 while (token->type != CPP_EOF)
96be6998 1515 {
4ed5bcfb
NB
1516 cpp_output_token (token, fp);
1517 token = cpp_get_token (pfile);
1518 if (token->flags & PREV_WHITE)
1519 putc (' ', fp);
96be6998
ZW
1520 }
1521
93c80368 1522 putc ('\n', fp);
041c3194 1523}
c5a04734 1524
1e013d2e
NB
1525/* Memory buffers. Changing these three constants can have a dramatic
1526 effect on performance. The values here are reasonable defaults,
1527 but might be tuned. If you adjust them, be sure to test across a
1528 range of uses of cpplib, including heavy nested function-like macro
1529 expansion. Also check the change in peak memory usage (NJAMD is a
1530 good tool for this). */
1531#define MIN_BUFF_SIZE 8000
87062813 1532#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1533#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1534 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1535
87062813
NB
1536#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1537 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1538#endif
1539
c9e7a609
NB
1540/* Create a new allocation buffer. Place the control block at the end
1541 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5 1542static _cpp_buff *
6cf87ca4 1543new_buff (size_t len)
b8af0ca5
NB
1544{
1545 _cpp_buff *result;
ece54d54 1546 unsigned char *base;
b8af0ca5 1547
1e013d2e
NB
1548 if (len < MIN_BUFF_SIZE)
1549 len = MIN_BUFF_SIZE;
c70f6ed3 1550 len = CPP_ALIGN (len);
b8af0ca5 1551
c3f829c1 1552 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
b8af0ca5
NB
1553 result = (_cpp_buff *) (base + len);
1554 result->base = base;
1555 result->cur = base;
1556 result->limit = base + len;
1557 result->next = NULL;
1558 return result;
1559}
1560
1561/* Place a chain of unwanted allocation buffers on the free list. */
1562void
6cf87ca4 1563_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
b8af0ca5
NB
1564{
1565 _cpp_buff *end = buff;
1566
1567 while (end->next)
1568 end = end->next;
1569 end->next = pfile->free_buffs;
1570 pfile->free_buffs = buff;
1571}
1572
1573/* Return a free buffer of size at least MIN_SIZE. */
1574_cpp_buff *
6cf87ca4 1575_cpp_get_buff (cpp_reader *pfile, size_t min_size)
b8af0ca5
NB
1576{
1577 _cpp_buff *result, **p;
1578
1579 for (p = &pfile->free_buffs;; p = &(*p)->next)
1580 {
6142088c 1581 size_t size;
1e013d2e
NB
1582
1583 if (*p == NULL)
b8af0ca5 1584 return new_buff (min_size);
1e013d2e
NB
1585 result = *p;
1586 size = result->limit - result->base;
1587 /* Return a buffer that's big enough, but don't waste one that's
1588 way too big. */
34f5271d 1589 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
1590 break;
1591 }
1592
1593 *p = result->next;
1594 result->next = NULL;
1595 result->cur = result->base;
1596 return result;
1597}
1598
4fe9b91c 1599/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1600 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1601 the excess bytes to the new buffer. Chains the new buffer after
1602 BUFF, and returns the new buffer. */
b8af0ca5 1603_cpp_buff *
6cf87ca4 1604_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
b8af0ca5 1605{
6142088c 1606 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 1607 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 1608
8c3b2693
NB
1609 buff->next = new_buff;
1610 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1611 return new_buff;
1612}
1613
4fe9b91c 1614/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1615 remaining bytes of the buffer pointed to by BUFF, and at least
1616 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1617 Chains the new buffer before the buffer pointed to by BUFF, and
1618 updates the pointer to point to the new buffer. */
1619void
6cf87ca4 1620_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
8c3b2693
NB
1621{
1622 _cpp_buff *new_buff, *old_buff = *pbuff;
1623 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1624
1625 new_buff = _cpp_get_buff (pfile, size);
1626 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1627 new_buff->next = old_buff;
1628 *pbuff = new_buff;
b8af0ca5
NB
1629}
1630
1631/* Free a chain of buffers starting at BUFF. */
1632void
5671bf27 1633_cpp_free_buff (_cpp_buff *buff)
b8af0ca5
NB
1634{
1635 _cpp_buff *next;
1636
1637 for (; buff; buff = next)
1638 {
1639 next = buff->next;
1640 free (buff->base);
1641 }
1642}
417f3e3a 1643
ece54d54
NB
1644/* Allocate permanent, unaligned storage of length LEN. */
1645unsigned char *
6cf87ca4 1646_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
ece54d54
NB
1647{
1648 _cpp_buff *buff = pfile->u_buff;
1649 unsigned char *result = buff->cur;
1650
1651 if (len > (size_t) (buff->limit - result))
1652 {
1653 buff = _cpp_get_buff (pfile, len);
1654 buff->next = pfile->u_buff;
1655 pfile->u_buff = buff;
1656 result = buff->cur;
1657 }
1658
1659 buff->cur = result + len;
1660 return result;
1661}
1662
87062813
NB
1663/* Allocate permanent, unaligned storage of length LEN from a_buff.
1664 That buffer is used for growing allocations when saving macro
1665 replacement lists in a #define, and when parsing an answer to an
1666 assertion in #assert, #unassert or #if (and therefore possibly
1667 whilst expanding macros). It therefore must not be used by any
1668 code that they might call: specifically the lexer and the guts of
1669 the macro expander.
1670
1671 All existing other uses clearly fit this restriction: storing
1672 registered pragmas during initialization. */
93c80368 1673unsigned char *
6cf87ca4 1674_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3fef5b2b 1675{
8c3b2693
NB
1676 _cpp_buff *buff = pfile->a_buff;
1677 unsigned char *result = buff->cur;
3fef5b2b 1678
8c3b2693 1679 if (len > (size_t) (buff->limit - result))
3fef5b2b 1680 {
8c3b2693
NB
1681 buff = _cpp_get_buff (pfile, len);
1682 buff->next = pfile->a_buff;
1683 pfile->a_buff = buff;
1684 result = buff->cur;
3fef5b2b 1685 }
041c3194 1686
8c3b2693 1687 buff->cur = result + len;
93c80368 1688 return result;
041c3194 1689}
d8044160
GK
1690
1691/* Say which field of TOK is in use. */
1692
1693enum cpp_token_fld_kind
1694cpp_token_val_index (cpp_token *tok)
1695{
1696 switch (TOKEN_SPELL (tok))
1697 {
1698 case SPELL_IDENT:
1699 return CPP_TOKEN_FLD_NODE;
1700 case SPELL_LITERAL:
1701 return CPP_TOKEN_FLD_STR;
1702 case SPELL_NONE:
1703 if (tok->type == CPP_MACRO_ARG)
1704 return CPP_TOKEN_FLD_ARG_NO;
1705 else if (tok->type == CPP_PADDING)
1706 return CPP_TOKEN_FLD_SOURCE;
21b11495 1707 else if (tok->type == CPP_PRAGMA)
bc4071dd 1708 return CPP_TOKEN_FLD_PRAGMA;
d8044160
GK
1709 /* else fall through */
1710 default:
1711 return CPP_TOKEN_FLD_NONE;
1712 }
1713}
This page took 1.302375 seconds and 5 git commands to generate.