gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "cpphash.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s           },
  45 #define TK(e, s) { s,              U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  57 static void lex_number (cpp_reader *, cpp_string *);
  58 static bool forms_identifier_p (cpp_reader *, int);
  59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = xrealloc (buffer->notes,
  92                                 buffer->notes_cap * sizeof (_cpp_line_note));
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       /* Short circuit for the common case of an un-escaped line with
 118          no trigraphs.  The primary win here is by not writing any
 119          data back to memory until we have to.  */
 120       for (;;)
 121         {
 122           c = *++s;
 123           if (c == '\n' || c == '\r')
 124             {
 125               d = (uchar *) s;
 126
 127               if (s == buffer->rlimit)
 128                 goto done;
 129
 130               /* DOS line ending? */
 131               if (c == '\r' && s[1] == '\n')
 132                 s++;
 133
 134               if (s == buffer->rlimit)
 135                 goto done;
 136
 137               /* check for escaped newline */
 138               p = d;
 139               while (p != buffer->next_line && is_nvspace (p[-1]))
 140                 p--;
 141               if (p == buffer->next_line || p[-1] != '\\')
 142                 goto done;
 143
 144               /* Have an escaped newline; process it and proceed to
 145                  the slow path.  */
 146               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 147               d = p - 2;
 148               buffer->next_line = p - 1;
 149               break;
 150             }
 151           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 152             {
 153               /* Have a trigraph.  We may or may not have to convert
 154                  it.  Add a line note regardless, for -Wtrigraphs.  */
 155               add_line_note (buffer, s, s[2]);
 156               if (CPP_OPTION (pfile, trigraphs))
 157                 {
 158                   /* We do, and that means we have to switch to the
 159                      slow path.  */
 160                   d = (uchar *) s;
 161                   *d = _cpp_trigraph_map[s[2]];
 162                   s += 2;
 163                   break;
 164                 }
 165             }
 166         }
 167
 168
 169       for (;;)
 170         {
 171           c = *++s;
 172           *++d = c;
 173
 174           if (c == '\n' || c == '\r')
 175             {
 176                   /* Handle DOS line endings.  */
 177               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 178                 s++;
 179               if (s == buffer->rlimit)
 180                 break;
 181
 182               /* Escaped?  */
 183               p = d;
 184               while (p != buffer->next_line && is_nvspace (p[-1]))
 185                 p--;
 186               if (p == buffer->next_line || p[-1] != '\\')
 187                 break;
 188
 189               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 190               d = p - 2;
 191               buffer->next_line = p - 1;
 192             }
 193           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 194             {
 195               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 196               add_line_note (buffer, d, s[2]);
 197               if (CPP_OPTION (pfile, trigraphs))
 198                 {
 199                   *d = _cpp_trigraph_map[s[2]];
 200                   s += 2;
 201                 }
 202             }
 203         }
 204     }
 205   else
 206     {
 207       do
 208         s++;
 209       while (*s != '\n' && *s != '\r');
 210       d = (uchar *) s;
 211
 212       /* Handle DOS line endings.  */
 213       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 214         s++;
 215     }
 216
 217  done:
 218   *d = '\n';
 219   /* A sentinel note that should never be processed.  */
 220   add_line_note (buffer, d + 1, '\n');
 221   buffer->next_line = s + 1;
 222 }
 223
 224 /* Return true if the trigraph indicated by NOTE should be warned
 225    about in a comment.  */
 226 static bool
 227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 228 {
 229   const uchar *p;
 230
 231   /* Within comments we don't warn about trigraphs, unless the
 232      trigraph forms an escaped newline, as that may change
 233      behavior.  */
 234   if (note->type != '/')
 235     return false;
 236
 237   /* If -trigraphs, then this was an escaped newline iff the next note
 238      is coincident.  */
 239   if (CPP_OPTION (pfile, trigraphs))
 240     return note[1].pos == note->pos;
 241
 242   /* Otherwise, see if this forms an escaped newline.  */
 243   p = note->pos + 3;
 244   while (is_nvspace (*p))
 245     p++;
 246
 247   /* There might have been escaped newlines between the trigraph and the
 248      newline we found.  Hence the position test.  */
 249   return (*p == '\n' && p < note[1].pos);
 250 }
 251
 252 /* Process the notes created by add_line_note as far as the current
 253    location.  */
 254 void
 255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 256 {
 257   cpp_buffer *buffer = pfile->buffer;
 258
 259   for (;;)
 260     {
 261       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 262       unsigned int col;
 263
 264       if (note->pos > buffer->cur)
 265         break;
 266
 267       buffer->cur_note++;
 268       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 269
 270       if (note->type == '\\' || note->type == ' ')
 271         {
 272           if (note->type == ' ' && !in_comment)
 273             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 274                                  "backslash and newline separated by space");
 275
 276           if (buffer->next_line > buffer->rlimit)
 277             {
 278               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 279                                    "backslash-newline at end of file");
 280               /* Prevent "no newline at end of file" warning.  */
 281               buffer->next_line = buffer->rlimit;
 282             }
 283
 284           buffer->line_base = note->pos;
 285           CPP_INCREMENT_LINE (pfile, 0);
 286         }
 287       else if (_cpp_trigraph_map[note->type])
 288         {
 289           if (CPP_OPTION (pfile, warn_trigraphs)
 290               && (!in_comment || warn_in_comment (pfile, note)))
 291             {
 292               if (CPP_OPTION (pfile, trigraphs))
 293                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 294                                      "trigraph ??%c converted to %c",
 295                                      note->type,
 296                                      (int) _cpp_trigraph_map[note->type]);
 297               else
 298                 {
 299                   cpp_error_with_line
 300                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 301                      "trigraph ??%c ignored, use -trigraphs to enable",
 302                      note->type);
 303                 }
 304             }
 305         }
 306       else
 307         abort ();
 308     }
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    nonzero if comment terminated by EOF, zero otherwise.
 314
 315    Buffer->cur points to the initial asterisk of the comment.  */
 316 bool
 317 _cpp_skip_block_comment (cpp_reader *pfile)
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   const uchar *cur = buffer->cur;
 321   uchar c;
 322
 323   cur++;
 324   if (*cur == '/')
 325     cur++;
 326
 327   for (;;)
 328     {
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       c = *cur++;
 332
 333       if (c == '/')
 334         {
 335           if (cur[-2] == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimiter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && cur[0] == '*' && cur[1] != '/')
 343             {
 344               buffer->cur = cur;
 345               cpp_error_with_line (pfile, CPP_DL_WARNING,
 346                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 347                                    "\"/*\" within comment");
 348             }
 349         }
 350       else if (c == '\n')
 351         {
 352           unsigned int cols;
 353           buffer->cur = cur - 1;
 354           _cpp_process_line_notes (pfile, true);
 355           if (buffer->next_line >= buffer->rlimit)
 356             return true;
 357           _cpp_clean_line (pfile);
 358
 359           cols = buffer->next_line - buffer->line_base;
 360           CPP_INCREMENT_LINE (pfile, cols);
 361
 362           cur = buffer->cur;
 363         }
 364     }
 365
 366   buffer->cur = cur;
 367   _cpp_process_line_notes (pfile, true);
 368   return false;
 369 }
 370
 371 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 372    terminating newline.  Handles escaped newlines.  Returns nonzero
 373    if a multiline comment.  */
 374 static int
 375 skip_line_comment (cpp_reader *pfile)
 376 {
 377   cpp_buffer *buffer = pfile->buffer;
 378   unsigned int orig_line = pfile->line_table->highest_line;
 379
 380   while (*buffer->cur != '\n')
 381     buffer->cur++;
 382
 383   _cpp_process_line_notes (pfile, true);
 384   return orig_line != pfile->line_table->highest_line;
 385 }
 386
 387 /* Skips whitespace, saving the next non-whitespace character.  */
 388 static void
 389 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 390 {
 391   cpp_buffer *buffer = pfile->buffer;
 392   bool saw_NUL = false;
 393
 394   do
 395     {
 396       /* Horizontal space always OK.  */
 397       if (c == ' ' || c == '\t')
 398         ;
 399       /* Just \f \v or \0 left.  */
 400       else if (c == '\0')
 401         saw_NUL = true;
 402       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 403         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 404                              CPP_BUF_COL (buffer),
 405                              "%s in preprocessing directive",
 406                              c == '\f' ? "form feed" : "vertical tab");
 407
 408       c = *buffer->cur++;
 409     }
 410   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 411   while (is_nvspace (c));
 412
 413   if (saw_NUL)
 414     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 415
 416   buffer->cur--;
 417 }
 418
 419 /* See if the characters of a number token are valid in a name (no
 420    '.', '+' or '-').  */
 421 static int
 422 name_p (cpp_reader *pfile, const cpp_string *string)
 423 {
 424   unsigned int i;
 425
 426   for (i = 0; i < string->len; i++)
 427     if (!is_idchar (string->text[i]))
 428       return 0;
 429
 430   return 1;
 431 }
 432
 433 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 434    an identifier.  FIRST is TRUE if this starts an identifier.  */
 435 static bool
 436 forms_identifier_p (cpp_reader *pfile, int first)
 437 {
 438   cpp_buffer *buffer = pfile->buffer;
 439
 440   if (*buffer->cur == '$')
 441     {
 442       if (!CPP_OPTION (pfile, dollars_in_ident))
 443         return false;
 444
 445       buffer->cur++;
 446       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 447         {
 448           CPP_OPTION (pfile, warn_dollars) = 0;
 449           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 450         }
 451
 452       return true;
 453     }
 454
 455   /* Is this a syntactically valid UCN?  */
 456   if (0 && *buffer->cur == '\\'
 457       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 458     {
 459       buffer->cur += 2;
 460       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 461         return true;
 462       buffer->cur -= 2;
 463     }
 464
 465   return false;
 466 }
 467
 468 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 469 static cpp_hashnode *
 470 lex_identifier (cpp_reader *pfile, const uchar *base)
 471 {
 472   cpp_hashnode *result;
 473   const uchar *cur;
 474
 475   do
 476     {
 477       cur = pfile->buffer->cur;
 478
 479       /* N.B. ISIDNUM does not include $.  */
 480       while (ISIDNUM (*cur))
 481         cur++;
 482
 483       pfile->buffer->cur = cur;
 484     }
 485   while (forms_identifier_p (pfile, false));
 486
 487   result = (cpp_hashnode *)
 488     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 489
 490   /* Rarely, identifiers require diagnostics when lexed.  */
 491   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 492                         && !pfile->state.skipping, 0))
 493     {
 494       /* It is allowed to poison the same identifier twice.  */
 495       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 496         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 497                    NODE_NAME (result));
 498
 499       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 500          replacement list of a variadic macro.  */
 501       if (result == pfile->spec_nodes.n__VA_ARGS__
 502           && !pfile->state.va_args_ok)
 503         cpp_error (pfile, CPP_DL_PEDWARN,
 504                    "__VA_ARGS__ can only appear in the expansion"
 505                    " of a C99 variadic macro");
 506     }
 507
 508   return result;
 509 }
 510
 511 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 512 static void
 513 lex_number (cpp_reader *pfile, cpp_string *number)
 514 {
 515   const uchar *cur;
 516   const uchar *base;
 517   uchar *dest;
 518
 519   base = pfile->buffer->cur - 1;
 520   do
 521     {
 522       cur = pfile->buffer->cur;
 523
 524       /* N.B. ISIDNUM does not include $.  */
 525       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 526         cur++;
 527
 528       pfile->buffer->cur = cur;
 529     }
 530   while (forms_identifier_p (pfile, false));
 531
 532   number->len = cur - base;
 533   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 534   memcpy (dest, base, number->len);
 535   dest[number->len] = '\0';
 536   number->text = dest;
 537 }
 538
 539 /* Create a token of type TYPE with a literal spelling.  */
 540 static void
 541 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 542                 unsigned int len, enum cpp_ttype type)
 543 {
 544   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 545
 546   memcpy (dest, base, len);
 547   dest[len] = '\0';
 548   token->type = type;
 549   token->val.str.len = len;
 550   token->val.str.text = dest;
 551 }
 552
 553 /* Lexes a string, character constant, or angle-bracketed header file
 554    name.  The stored string contains the spelling, including opening
 555    quote and leading any leading 'L'.  It returns the type of the
 556    literal, or CPP_OTHER if it was not properly terminated.
 557
 558    The spelling is NUL-terminated, but it is not guaranteed that this
 559    is the first NUL since embedded NULs are preserved.  */
 560 static void
 561 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 562 {
 563   bool saw_NUL = false;
 564   const uchar *cur;
 565   cppchar_t terminator;
 566   enum cpp_ttype type;
 567
 568   cur = base;
 569   terminator = *cur++;
 570   if (terminator == 'L')
 571     terminator = *cur++;
 572   if (terminator == '\"')
 573     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 574   else if (terminator == '\'')
 575     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 576   else
 577     terminator = '>', type = CPP_HEADER_NAME;
 578
 579   for (;;)
 580     {
 581       cppchar_t c = *cur++;
 582
 583       /* In #include-style directives, terminators are not escapable.  */
 584       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 585         cur++;
 586       else if (c == terminator)
 587         break;
 588       else if (c == '\n')
 589         {
 590           cur--;
 591           type = CPP_OTHER;
 592           break;
 593         }
 594       else if (c == '\0')
 595         saw_NUL = true;
 596     }
 597
 598   if (saw_NUL && !pfile->state.skipping)
 599     cpp_error (pfile, CPP_DL_WARNING,
 600                "null character(s) preserved in literal");
 601
 602   pfile->buffer->cur = cur;
 603   create_literal (pfile, token, base, cur - base, type);
 604 }
 605
 606 /* The stored comment includes the comment start and any terminator.  */
 607 static void
 608 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 609               cppchar_t type)
 610 {
 611   unsigned char *buffer;
 612   unsigned int len, clen;
 613
 614   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 615
 616   /* C++ comments probably (not definitely) have moved past a new
 617      line, which we don't want to save in the comment.  */
 618   if (is_vspace (pfile->buffer->cur[-1]))
 619     len--;
 620
 621   /* If we are currently in a directive, then we need to store all
 622      C++ comments as C comments internally, and so we need to
 623      allocate a little extra space in that case.
 624
 625      Note that the only time we encounter a directive here is
 626      when we are saving comments in a "#define".  */
 627   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 628
 629   buffer = _cpp_unaligned_alloc (pfile, clen);
 630
 631   token->type = CPP_COMMENT;
 632   token->val.str.len = clen;
 633   token->val.str.text = buffer;
 634
 635   buffer[0] = '/';
 636   memcpy (buffer + 1, from, len - 1);
 637
 638   /* Finish conversion to a C comment, if necessary.  */
 639   if (pfile->state.in_directive && type == '/')
 640     {
 641       buffer[1] = '*';
 642       buffer[clen - 2] = '*';
 643       buffer[clen - 1] = '/';
 644     }
 645 }
 646
 647 /* Allocate COUNT tokens for RUN.  */
 648 void
 649 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 650 {
 651   run->base = xnewvec (cpp_token, count);
 652   run->limit = run->base + count;
 653   run->next = NULL;
 654 }
 655
 656 /* Returns the next tokenrun, or creates one if there is none.  */
 657 static tokenrun *
 658 next_tokenrun (tokenrun *run)
 659 {
 660   if (run->next == NULL)
 661     {
 662       run->next = xnew (tokenrun);
 663       run->next->prev = run;
 664       _cpp_init_tokenrun (run->next, 250);
 665     }
 666
 667   return run->next;
 668 }
 669
 670 /* Allocate a single token that is invalidated at the same time as the
 671    rest of the tokens on the line.  Has its line and col set to the
 672    same as the last lexed token, so that diagnostics appear in the
 673    right place.  */
 674 cpp_token *
 675 _cpp_temp_token (cpp_reader *pfile)
 676 {
 677   cpp_token *old, *result;
 678
 679   old = pfile->cur_token - 1;
 680   if (pfile->cur_token == pfile->cur_run->limit)
 681     {
 682       pfile->cur_run = next_tokenrun (pfile->cur_run);
 683       pfile->cur_token = pfile->cur_run->base;
 684     }
 685
 686   result = pfile->cur_token++;
 687   result->src_loc = old->src_loc;
 688   return result;
 689 }
 690
 691 /* Lex a token into RESULT (external interface).  Takes care of issues
 692    like directive handling, token lookahead, multiple include
 693    optimization and skipping.  */
 694 const cpp_token *
 695 _cpp_lex_token (cpp_reader *pfile)
 696 {
 697   cpp_token *result;
 698
 699   for (;;)
 700     {
 701       if (pfile->cur_token == pfile->cur_run->limit)
 702         {
 703           pfile->cur_run = next_tokenrun (pfile->cur_run);
 704           pfile->cur_token = pfile->cur_run->base;
 705         }
 706
 707       if (pfile->lookaheads)
 708         {
 709           pfile->lookaheads--;
 710           result = pfile->cur_token++;
 711         }
 712       else
 713         result = _cpp_lex_direct (pfile);
 714
 715       if (result->flags & BOL)
 716         {
 717           /* Is this a directive.  If _cpp_handle_directive returns
 718              false, it is an assembler #.  */
 719           if (result->type == CPP_HASH
 720               /* 6.10.3 p 11: Directives in a list of macro arguments
 721                  gives undefined behavior.  This implementation
 722                  handles the directive as normal.  */
 723               && pfile->state.parsing_args != 1
 724               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 725             continue;
 726           if (pfile->cb.line_change && !pfile->state.skipping)
 727             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 728         }
 729
 730       /* We don't skip tokens in directives.  */
 731       if (pfile->state.in_directive)
 732         break;
 733
 734       /* Outside a directive, invalidate controlling macros.  At file
 735          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 736          get here and MI optimization works.  */
 737       pfile->mi_valid = false;
 738
 739       if (!pfile->state.skipping || result->type == CPP_EOF)
 740         break;
 741     }
 742
 743   return result;
 744 }
 745
 746 /* Returns true if a fresh line has been loaded.  */
 747 bool
 748 _cpp_get_fresh_line (cpp_reader *pfile)
 749 {
 750   int return_at_eof;
 751
 752   /* We can't get a new line until we leave the current directive.  */
 753   if (pfile->state.in_directive)
 754     return false;
 755
 756   for (;;)
 757     {
 758       cpp_buffer *buffer = pfile->buffer;
 759
 760       if (!buffer->need_line)
 761         return true;
 762
 763       if (buffer->next_line < buffer->rlimit)
 764         {
 765           _cpp_clean_line (pfile);
 766           return true;
 767         }
 768
 769       /* First, get out of parsing arguments state.  */
 770       if (pfile->state.parsing_args)
 771         return false;
 772
 773       /* End of buffer.  Non-empty files should end in a newline.  */
 774       if (buffer->buf != buffer->rlimit
 775           && buffer->next_line > buffer->rlimit
 776           && !buffer->from_stage3)
 777         {
 778           /* Only warn once.  */
 779           buffer->next_line = buffer->rlimit;
 780           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 781                                CPP_BUF_COLUMN (buffer, buffer->cur),
 782                                "no newline at end of file");
 783         }
 784
 785       return_at_eof = buffer->return_at_eof;
 786       _cpp_pop_buffer (pfile);
 787       if (pfile->buffer == NULL || return_at_eof)
 788         return false;
 789     }
 790 }
 791
 792 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 793   do                                                    \
 794     {                                                   \
 795       result->type = ELSE_TYPE;                         \
 796       if (*buffer->cur == CHAR)                         \
 797         buffer->cur++, result->type = THEN_TYPE;        \
 798     }                                                   \
 799   while (0)
 800
 801 /* Lex a token into pfile->cur_token, which is also incremented, to
 802    get diagnostics pointing to the correct location.
 803
 804    Does not handle issues such as token lookahead, multiple-include
 805    optimization, directives, skipping etc.  This function is only
 806    suitable for use by _cpp_lex_token, and in special cases like
 807    lex_expansion_token which doesn't care for any of these issues.
 808
 809    When meeting a newline, returns CPP_EOF if parsing a directive,
 810    otherwise returns to the start of the token buffer if permissible.
 811    Returns the location of the lexed token.  */
 812 cpp_token *
 813 _cpp_lex_direct (cpp_reader *pfile)
 814 {
 815   cppchar_t c;
 816   cpp_buffer *buffer;
 817   const unsigned char *comment_start;
 818   cpp_token *result = pfile->cur_token++;
 819
 820  fresh_line:
 821   result->flags = 0;
 822   buffer = pfile->buffer;
 823   if (buffer->need_line)
 824     {
 825       if (!_cpp_get_fresh_line (pfile))
 826         {
 827           result->type = CPP_EOF;
 828           if (!pfile->state.in_directive)
 829             {
 830               /* Tell the compiler the line number of the EOF token.  */
 831               result->src_loc = pfile->line_table->highest_line;
 832               result->flags = BOL;
 833             }
 834           return result;
 835         }
 836       if (!pfile->keep_tokens)
 837         {
 838           pfile->cur_run = &pfile->base_run;
 839           result = pfile->base_run.base;
 840           pfile->cur_token = result + 1;
 841         }
 842       result->flags = BOL;
 843       if (pfile->state.parsing_args == 2)
 844         result->flags |= PREV_WHITE;
 845     }
 846   buffer = pfile->buffer;
 847  update_tokens_line:
 848   result->src_loc = pfile->line_table->highest_line;
 849
 850  skipped_white:
 851   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 852       && !pfile->overlaid_buffer)
 853     {
 854       _cpp_process_line_notes (pfile, false);
 855       result->src_loc = pfile->line_table->highest_line;
 856     }
 857   c = *buffer->cur++;
 858
 859   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
 860                                CPP_BUF_COLUMN (buffer, buffer->cur));
 861
 862   switch (c)
 863     {
 864     case ' ': case '\t': case '\f': case '\v': case '\0':
 865       result->flags |= PREV_WHITE;
 866       skip_whitespace (pfile, c);
 867       goto skipped_white;
 868
 869     case '\n':
 870       if (buffer->cur < buffer->rlimit)
 871         CPP_INCREMENT_LINE (pfile, 0);
 872       buffer->need_line = true;
 873       goto fresh_line;
 874
 875     case '0': case '1': case '2': case '3': case '4':
 876     case '5': case '6': case '7': case '8': case '9':
 877       result->type = CPP_NUMBER;
 878       lex_number (pfile, &result->val.str);
 879       break;
 880
 881     case 'L':
 882       /* 'L' may introduce wide characters or strings.  */
 883       if (*buffer->cur == '\'' || *buffer->cur == '"')
 884         {
 885           lex_string (pfile, result, buffer->cur - 1);
 886           break;
 887         }
 888       /* Fall through.  */
 889
 890     case '_':
 891     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 892     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 893     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 894     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 895     case 'y': case 'z':
 896     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 897     case 'G': case 'H': case 'I': case 'J': case 'K':
 898     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 899     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 900     case 'Y': case 'Z':
 901       result->type = CPP_NAME;
 902       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 903
 904       /* Convert named operators to their proper types.  */
 905       if (result->val.node->flags & NODE_OPERATOR)
 906         {
 907           result->flags |= NAMED_OP;
 908           result->type = result->val.node->directive_index;
 909         }
 910       break;
 911
 912     case '\'':
 913     case '"':
 914       lex_string (pfile, result, buffer->cur - 1);
 915       break;
 916
 917     case '/':
 918       /* A potential block or line comment.  */
 919       comment_start = buffer->cur;
 920       c = *buffer->cur;
 921
 922       if (c == '*')
 923         {
 924           if (_cpp_skip_block_comment (pfile))
 925             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 926         }
 927       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 928                             || cpp_in_system_header (pfile)))
 929         {
 930           /* Warn about comments only if pedantically GNUC89, and not
 931              in system headers.  */
 932           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 933               && ! buffer->warned_cplusplus_comments)
 934             {
 935               cpp_error (pfile, CPP_DL_PEDWARN,
 936                          "C++ style comments are not allowed in ISO C90");
 937               cpp_error (pfile, CPP_DL_PEDWARN,
 938                          "(this will be reported only once per input file)");
 939               buffer->warned_cplusplus_comments = 1;
 940             }
 941
 942           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 943             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
 944         }
 945       else if (c == '=')
 946         {
 947           buffer->cur++;
 948           result->type = CPP_DIV_EQ;
 949           break;
 950         }
 951       else
 952         {
 953           result->type = CPP_DIV;
 954           break;
 955         }
 956
 957       if (!pfile->state.save_comments)
 958         {
 959           result->flags |= PREV_WHITE;
 960           goto update_tokens_line;
 961         }
 962
 963       /* Save the comment as a token in its own right.  */
 964       save_comment (pfile, result, comment_start, c);
 965       break;
 966
 967     case '<':
 968       if (pfile->state.angled_headers)
 969         {
 970           lex_string (pfile, result, buffer->cur - 1);
 971           break;
 972         }
 973
 974       result->type = CPP_LESS;
 975       if (*buffer->cur == '=')
 976         buffer->cur++, result->type = CPP_LESS_EQ;
 977       else if (*buffer->cur == '<')
 978         {
 979           buffer->cur++;
 980           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 981         }
 982       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 983         {
 984           buffer->cur++;
 985           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 986         }
 987       else if (CPP_OPTION (pfile, digraphs))
 988         {
 989           if (*buffer->cur == ':')
 990             {
 991               buffer->cur++;
 992               result->flags |= DIGRAPH;
 993               result->type = CPP_OPEN_SQUARE;
 994             }
 995           else if (*buffer->cur == '%')
 996             {
 997               buffer->cur++;
 998               result->flags |= DIGRAPH;
 999               result->type = CPP_OPEN_BRACE;
1000             }
1001         }
1002       break;
1003
1004     case '>':
1005       result->type = CPP_GREATER;
1006       if (*buffer->cur == '=')
1007         buffer->cur++, result->type = CPP_GREATER_EQ;
1008       else if (*buffer->cur == '>')
1009         {
1010           buffer->cur++;
1011           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1012         }
1013       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1014         {
1015           buffer->cur++;
1016           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1017         }
1018       break;
1019
1020     case '%':
1021       result->type = CPP_MOD;
1022       if (*buffer->cur == '=')
1023         buffer->cur++, result->type = CPP_MOD_EQ;
1024       else if (CPP_OPTION (pfile, digraphs))
1025         {
1026           if (*buffer->cur == ':')
1027             {
1028               buffer->cur++;
1029               result->flags |= DIGRAPH;
1030               result->type = CPP_HASH;
1031               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1032                 buffer->cur += 2, result->type = CPP_PASTE;
1033             }
1034           else if (*buffer->cur == '>')
1035             {
1036               buffer->cur++;
1037               result->flags |= DIGRAPH;
1038               result->type = CPP_CLOSE_BRACE;
1039             }
1040         }
1041       break;
1042
1043     case '.':
1044       result->type = CPP_DOT;
1045       if (ISDIGIT (*buffer->cur))
1046         {
1047           result->type = CPP_NUMBER;
1048           lex_number (pfile, &result->val.str);
1049         }
1050       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1051         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1052       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1053         buffer->cur++, result->type = CPP_DOT_STAR;
1054       break;
1055
1056     case '+':
1057       result->type = CPP_PLUS;
1058       if (*buffer->cur == '+')
1059         buffer->cur++, result->type = CPP_PLUS_PLUS;
1060       else if (*buffer->cur == '=')
1061         buffer->cur++, result->type = CPP_PLUS_EQ;
1062       break;
1063
1064     case '-':
1065       result->type = CPP_MINUS;
1066       if (*buffer->cur == '>')
1067         {
1068           buffer->cur++;
1069           result->type = CPP_DEREF;
1070           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1071             buffer->cur++, result->type = CPP_DEREF_STAR;
1072         }
1073       else if (*buffer->cur == '-')
1074         buffer->cur++, result->type = CPP_MINUS_MINUS;
1075       else if (*buffer->cur == '=')
1076         buffer->cur++, result->type = CPP_MINUS_EQ;
1077       break;
1078
1079     case '&':
1080       result->type = CPP_AND;
1081       if (*buffer->cur == '&')
1082         buffer->cur++, result->type = CPP_AND_AND;
1083       else if (*buffer->cur == '=')
1084         buffer->cur++, result->type = CPP_AND_EQ;
1085       break;
1086
1087     case '|':
1088       result->type = CPP_OR;
1089       if (*buffer->cur == '|')
1090         buffer->cur++, result->type = CPP_OR_OR;
1091       else if (*buffer->cur == '=')
1092         buffer->cur++, result->type = CPP_OR_EQ;
1093       break;
1094
1095     case ':':
1096       result->type = CPP_COLON;
1097       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1098         buffer->cur++, result->type = CPP_SCOPE;
1099       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1100         {
1101           buffer->cur++;
1102           result->flags |= DIGRAPH;
1103           result->type = CPP_CLOSE_SQUARE;
1104         }
1105       break;
1106
1107     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1108     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1109     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1110     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1111     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1112
1113     case '?': result->type = CPP_QUERY; break;
1114     case '~': result->type = CPP_COMPL; break;
1115     case ',': result->type = CPP_COMMA; break;
1116     case '(': result->type = CPP_OPEN_PAREN; break;
1117     case ')': result->type = CPP_CLOSE_PAREN; break;
1118     case '[': result->type = CPP_OPEN_SQUARE; break;
1119     case ']': result->type = CPP_CLOSE_SQUARE; break;
1120     case '{': result->type = CPP_OPEN_BRACE; break;
1121     case '}': result->type = CPP_CLOSE_BRACE; break;
1122     case ';': result->type = CPP_SEMICOLON; break;
1123
1124       /* @ is a punctuator in Objective-C.  */
1125     case '@': result->type = CPP_ATSIGN; break;
1126
1127     case '$':
1128     case '\\':
1129       {
1130         const uchar *base = --buffer->cur;
1131
1132         if (forms_identifier_p (pfile, true))
1133           {
1134             result->type = CPP_NAME;
1135             result->val.node = lex_identifier (pfile, base);
1136             break;
1137           }
1138         buffer->cur++;
1139       }
1140
1141     default:
1142       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1143       break;
1144     }
1145
1146   return result;
1147 }
1148
1149 /* An upper bound on the number of bytes needed to spell TOKEN.
1150    Does not include preceding whitespace.  */
1151 unsigned int
1152 cpp_token_len (const cpp_token *token)
1153 {
1154   unsigned int len;
1155
1156   switch (TOKEN_SPELL (token))
1157     {
1158     default:            len = 4;                                break;
1159     case SPELL_LITERAL: len = token->val.str.len;               break;
1160     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1161     }
1162
1163   return len;
1164 }
1165
1166 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1167    already contain the enough space to hold the token's spelling.
1168    Returns a pointer to the character after the last character written.
1169    FIXME: Would be nice if we didn't need the PFILE argument.  */
1170 unsigned char *
1171 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1172                  unsigned char *buffer)
1173 {
1174   switch (TOKEN_SPELL (token))
1175     {
1176     case SPELL_OPERATOR:
1177       {
1178         const unsigned char *spelling;
1179         unsigned char c;
1180
1181         if (token->flags & DIGRAPH)
1182           spelling
1183             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1184         else if (token->flags & NAMED_OP)
1185           goto spell_ident;
1186         else
1187           spelling = TOKEN_NAME (token);
1188
1189         while ((c = *spelling++) != '\0')
1190           *buffer++ = c;
1191       }
1192       break;
1193
1194     spell_ident:
1195     case SPELL_IDENT:
1196       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1197       buffer += NODE_LEN (token->val.node);
1198       break;
1199
1200     case SPELL_LITERAL:
1201       memcpy (buffer, token->val.str.text, token->val.str.len);
1202       buffer += token->val.str.len;
1203       break;
1204
1205     case SPELL_NONE:
1206       cpp_error (pfile, CPP_DL_ICE,
1207                  "unspellable token %s", TOKEN_NAME (token));
1208       break;
1209     }
1210
1211   return buffer;
1212 }
1213
1214 /* Returns TOKEN spelt as a null-terminated string.  The string is
1215    freed when the reader is destroyed.  Useful for diagnostics.  */
1216 unsigned char *
1217 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1218 {
1219   unsigned int len = cpp_token_len (token) + 1;
1220   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1221
1222   end = cpp_spell_token (pfile, token, start);
1223   end[0] = '\0';
1224
1225   return start;
1226 }
1227
1228 /* Used by C front ends, which really should move to using
1229    cpp_token_as_text.  */
1230 const char *
1231 cpp_type2name (enum cpp_ttype type)
1232 {
1233   return (const char *) token_spellings[type].name;
1234 }
1235
1236 /* Writes the spelling of token to FP, without any preceding space.
1237    Separated from cpp_spell_token for efficiency - to avoid stdio
1238    double-buffering.  */
1239 void
1240 cpp_output_token (const cpp_token *token, FILE *fp)
1241 {
1242   switch (TOKEN_SPELL (token))
1243     {
1244     case SPELL_OPERATOR:
1245       {
1246         const unsigned char *spelling;
1247         int c;
1248
1249         if (token->flags & DIGRAPH)
1250           spelling
1251             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1252         else if (token->flags & NAMED_OP)
1253           goto spell_ident;
1254         else
1255           spelling = TOKEN_NAME (token);
1256
1257         c = *spelling;
1258         do
1259           putc (c, fp);
1260         while ((c = *++spelling) != '\0');
1261       }
1262       break;
1263
1264     spell_ident:
1265     case SPELL_IDENT:
1266       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1267     break;
1268
1269     case SPELL_LITERAL:
1270       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1271       break;
1272
1273     case SPELL_NONE:
1274       /* An error, most probably.  */
1275       break;
1276     }
1277 }
1278
1279 /* Compare two tokens.  */
1280 int
1281 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1282 {
1283   if (a->type == b->type && a->flags == b->flags)
1284     switch (TOKEN_SPELL (a))
1285       {
1286       default:                  /* Keep compiler happy.  */
1287       case SPELL_OPERATOR:
1288         return 1;
1289       case SPELL_NONE:
1290         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1291       case SPELL_IDENT:
1292         return a->val.node == b->val.node;
1293       case SPELL_LITERAL:
1294         return (a->val.str.len == b->val.str.len
1295                 && !memcmp (a->val.str.text, b->val.str.text,
1296                             a->val.str.len));
1297       }
1298
1299   return 0;
1300 }
1301
1302 /* Returns nonzero if a space should be inserted to avoid an
1303    accidental token paste for output.  For simplicity, it is
1304    conservative, and occasionally advises a space where one is not
1305    needed, e.g. "." and ".2".  */
1306 int
1307 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1308                  const cpp_token *token2)
1309 {
1310   enum cpp_ttype a = token1->type, b = token2->type;
1311   cppchar_t c;
1312
1313   if (token1->flags & NAMED_OP)
1314     a = CPP_NAME;
1315   if (token2->flags & NAMED_OP)
1316     b = CPP_NAME;
1317
1318   c = EOF;
1319   if (token2->flags & DIGRAPH)
1320     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1321   else if (token_spellings[b].category == SPELL_OPERATOR)
1322     c = token_spellings[b].name[0];
1323
1324   /* Quickly get everything that can paste with an '='.  */
1325   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1326     return 1;
1327
1328   switch (a)
1329     {
1330     case CPP_GREATER:   return c == '>' || c == '?';
1331     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1332     case CPP_PLUS:      return c == '+';
1333     case CPP_MINUS:     return c == '-' || c == '>';
1334     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1335     case CPP_MOD:       return c == ':' || c == '>';
1336     case CPP_AND:       return c == '&';
1337     case CPP_OR:        return c == '|';
1338     case CPP_COLON:     return c == ':' || c == '>';
1339     case CPP_DEREF:     return c == '*';
1340     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1341     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1342     case CPP_NAME:      return ((b == CPP_NUMBER
1343                                  && name_p (pfile, &token2->val.str))
1344                                 || b == CPP_NAME
1345                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1346     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1347                                 || c == '.' || c == '+' || c == '-');
1348                                       /* UCNs */
1349     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1350                                  && b == CPP_NAME)
1351                                 || (CPP_OPTION (pfile, objc)
1352                                     && token1->val.str.text[0] == '@'
1353                                     && (b == CPP_NAME || b == CPP_STRING)));
1354     default:            break;
1355     }
1356
1357   return 0;
1358 }
1359
1360 /* Output all the remaining tokens on the current line, and a newline
1361    character, to FP.  Leading whitespace is removed.  If there are
1362    macros, special token padding is not performed.  */
1363 void
1364 cpp_output_line (cpp_reader *pfile, FILE *fp)
1365 {
1366   const cpp_token *token;
1367
1368   token = cpp_get_token (pfile);
1369   while (token->type != CPP_EOF)
1370     {
1371       cpp_output_token (token, fp);
1372       token = cpp_get_token (pfile);
1373       if (token->flags & PREV_WHITE)
1374         putc (' ', fp);
1375     }
1376
1377   putc ('\n', fp);
1378 }
1379
1380 /* Memory buffers.  Changing these three constants can have a dramatic
1381    effect on performance.  The values here are reasonable defaults,
1382    but might be tuned.  If you adjust them, be sure to test across a
1383    range of uses of cpplib, including heavy nested function-like macro
1384    expansion.  Also check the change in peak memory usage (NJAMD is a
1385    good tool for this).  */
1386 #define MIN_BUFF_SIZE 8000
1387 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1388 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1389         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1390
1391 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1392   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1393 #endif
1394
1395 /* Create a new allocation buffer.  Place the control block at the end
1396    of the buffer, so that buffer overflows will cause immediate chaos.  */
1397 static _cpp_buff *
1398 new_buff (size_t len)
1399 {
1400   _cpp_buff *result;
1401   unsigned char *base;
1402
1403   if (len < MIN_BUFF_SIZE)
1404     len = MIN_BUFF_SIZE;
1405   len = CPP_ALIGN (len);
1406
1407   base = xmalloc (len + sizeof (_cpp_buff));
1408   result = (_cpp_buff *) (base + len);
1409   result->base = base;
1410   result->cur = base;
1411   result->limit = base + len;
1412   result->next = NULL;
1413   return result;
1414 }
1415
1416 /* Place a chain of unwanted allocation buffers on the free list.  */
1417 void
1418 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1419 {
1420   _cpp_buff *end = buff;
1421
1422   while (end->next)
1423     end = end->next;
1424   end->next = pfile->free_buffs;
1425   pfile->free_buffs = buff;
1426 }
1427
1428 /* Return a free buffer of size at least MIN_SIZE.  */
1429 _cpp_buff *
1430 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1431 {
1432   _cpp_buff *result, **p;
1433
1434   for (p = &pfile->free_buffs;; p = &(*p)->next)
1435     {
1436       size_t size;
1437
1438       if (*p == NULL)
1439         return new_buff (min_size);
1440       result = *p;
1441       size = result->limit - result->base;
1442       /* Return a buffer that's big enough, but don't waste one that's
1443          way too big.  */
1444       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1445         break;
1446     }
1447
1448   *p = result->next;
1449   result->next = NULL;
1450   result->cur = result->base;
1451   return result;
1452 }
1453
1454 /* Creates a new buffer with enough space to hold the uncommitted
1455    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1456    the excess bytes to the new buffer.  Chains the new buffer after
1457    BUFF, and returns the new buffer.  */
1458 _cpp_buff *
1459 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1460 {
1461   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1462   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1463
1464   buff->next = new_buff;
1465   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1466   return new_buff;
1467 }
1468
1469 /* Creates a new buffer with enough space to hold the uncommitted
1470    remaining bytes of the buffer pointed to by BUFF, and at least
1471    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1472    Chains the new buffer before the buffer pointed to by BUFF, and
1473    updates the pointer to point to the new buffer.  */
1474 void
1475 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1476 {
1477   _cpp_buff *new_buff, *old_buff = *pbuff;
1478   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1479
1480   new_buff = _cpp_get_buff (pfile, size);
1481   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1482   new_buff->next = old_buff;
1483   *pbuff = new_buff;
1484 }
1485
1486 /* Free a chain of buffers starting at BUFF.  */
1487 void
1488 _cpp_free_buff (_cpp_buff *buff)
1489 {
1490   _cpp_buff *next;
1491
1492   for (; buff; buff = next)
1493     {
1494       next = buff->next;
1495       free (buff->base);
1496     }
1497 }
1498
1499 /* Allocate permanent, unaligned storage of length LEN.  */
1500 unsigned char *
1501 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1502 {
1503   _cpp_buff *buff = pfile->u_buff;
1504   unsigned char *result = buff->cur;
1505
1506   if (len > (size_t) (buff->limit - result))
1507     {
1508       buff = _cpp_get_buff (pfile, len);
1509       buff->next = pfile->u_buff;
1510       pfile->u_buff = buff;
1511       result = buff->cur;
1512     }
1513
1514   buff->cur = result + len;
1515   return result;
1516 }
1517
1518 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1519    That buffer is used for growing allocations when saving macro
1520    replacement lists in a #define, and when parsing an answer to an
1521    assertion in #assert, #unassert or #if (and therefore possibly
1522    whilst expanding macros).  It therefore must not be used by any
1523    code that they might call: specifically the lexer and the guts of
1524    the macro expander.
1525
1526    All existing other uses clearly fit this restriction: storing
1527    registered pragmas during initialization.  */
1528 unsigned char *
1529 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1530 {
1531   _cpp_buff *buff = pfile->a_buff;
1532   unsigned char *result = buff->cur;
1533
1534   if (len > (size_t) (buff->limit - result))
1535     {
1536       buff = _cpp_get_buff (pfile, len);
1537       buff->next = pfile->a_buff;
1538       pfile->a_buff = buff;
1539       result = buff->cur;
1540     }
1541
1542   buff->cur = result + len;
1543   return result;
1544 }