libcpp/traditional.c

   1 /* CPP Library - traditional lexical analysis and macro expansion.
   2    Copyright (C) 2002, 2004 Free Software Foundation, Inc.
   3    Contributed by Neil Booth, May 2002
   4
   5 This program is free software; you can redistribute it and/or modify it
   6 under the terms of the GNU General Public License as published by the
   7 Free Software Foundation; either version 2, or (at your option) any
   8 later version.
   9
  10 This program is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 GNU General Public License for more details.
  14
  15 You should have received a copy of the GNU General Public License
  16 along with this program; if not, write to the Free Software
  17 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #include "config.h"
  20 #include "system.h"
  21 #include "cpplib.h"
  22 #include "internal.h"
  23
  24 /* The replacement text of a function-like macro is stored as a
  25    contiguous sequence of aligned blocks, each representing the text
  26    between subsequent parameters.
  27
  28    Each block comprises the text between its surrounding parameters,
  29    the length of that text, and the one-based index of the following
  30    parameter.  The final block in the replacement text is easily
  31    recognizable as it has an argument index of zero.  */
  32
  33 struct block
  34 {
  35   unsigned int text_len;
  36   unsigned short arg_index;
  37   uchar text[1];
  38 };
  39
  40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
  41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
  42
  43 /* Structure holding information about a function-like macro
  44    invocation.  */
  45 struct fun_macro
  46 {
  47   /* Memory buffer holding the trad_arg array.  */
  48   _cpp_buff *buff;
  49
  50   /* An array of size the number of macro parameters + 1, containing
  51      the offsets of the start of each macro argument in the output
  52      buffer.  The argument continues until the character before the
  53      start of the next one.  */
  54   size_t *args;
  55
  56   /* The hashnode of the macro.  */
  57   cpp_hashnode *node;
  58
  59   /* The offset of the macro name in the output buffer.  */
  60   size_t offset;
  61
  62   /* The line the macro name appeared on.  */
  63   unsigned int line;
  64
  65   /* Zero-based index of argument being currently lexed.  */
  66   unsigned int argc;
  67 };
  68
  69 /* Lexing state.  It is mostly used to prevent macro expansion.  */
  70 enum ls {ls_none = 0,           /* Normal state.  */
  71          ls_fun_open,           /* When looking for '('.  */
  72          ls_fun_close,          /* When looking for ')'.  */
  73          ls_defined,            /* After defined.  */
  74          ls_defined_close,      /* Looking for ')' of defined().  */
  75          ls_hash,               /* After # in preprocessor conditional.  */
  76          ls_predicate,          /* After the predicate, maybe paren?  */
  77          ls_answer};            /* In answer to predicate.  */
  78
  79 /* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
  80    from recognizing comments and directives during its lexing pass.  */
  81
  82 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
  83 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  84 static const uchar *copy_comment (cpp_reader *, const uchar *, int);
  85 static void check_output_buffer (cpp_reader *, size_t);
  86 static void push_replacement_text (cpp_reader *, cpp_hashnode *);
  87 static bool scan_parameters (cpp_reader *, cpp_macro *);
  88 static bool recursive_macro (cpp_reader *, cpp_hashnode *);
  89 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
  90 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
  91                                  struct fun_macro *);
  92 static void save_argument (struct fun_macro *, size_t);
  93 static void replace_args_and_push (cpp_reader *, struct fun_macro *);
  94 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
  95
  96 /* Ensures we have N bytes' space in the output buffer, and
  97    reallocates it if not.  */
  98 static void
  99 check_output_buffer (cpp_reader *pfile, size_t n)
 100 {
 101   /* We might need two bytes to terminate an unterminated comment, and
 102      one more to terminate the line with a NUL.  */
 103   n += 2 + 1;
 104
 105   if (n > (size_t) (pfile->out.limit - pfile->out.cur))
 106     {
 107       size_t size = pfile->out.cur - pfile->out.base;
 108       size_t new_size = (size + n) * 3 / 2;
 109
 110       pfile->out.base = xrealloc (pfile->out.base, new_size);
 111       pfile->out.limit = pfile->out.base + new_size;
 112       pfile->out.cur = pfile->out.base + size;
 113     }
 114 }
 115
 116 /* Skip a C-style block comment in a macro as a result of -CC.
 117    Buffer->cur points to the initial asterisk of the comment.  */
 118 static void
 119 skip_macro_block_comment (cpp_reader *pfile)
 120 {
 121   const uchar *cur = pfile->buffer->cur;
 122
 123   cur++;
 124   if (*cur == '/')
 125     cur++;
 126
 127   /* People like decorating comments with '*', so check for '/'
 128      instead for efficiency.  */
 129   while(! (*cur++ == '/' && cur[-2] == '*') )
 130     ;
 131
 132   pfile->buffer->cur = cur;
 133 }
 134
 135 /* CUR points to the asterisk introducing a comment in the current
 136    context.  IN_DEFINE is true if we are in the replacement text of a
 137    macro.
 138
 139    The asterisk and following comment is copied to the buffer pointed
 140    to by pfile->out.cur, which must be of sufficient size.
 141    Unterminated comments are diagnosed, and correctly terminated in
 142    the output.  pfile->out.cur is updated depending upon IN_DEFINE,
 143    -C, -CC and pfile->state.in_directive.
 144
 145    Returns a pointer to the first character after the comment in the
 146    input buffer.  */
 147 static const uchar *
 148 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
 149 {
 150   bool unterminated, copy = false;
 151   source_location src_loc = pfile->line_table->highest_line;
 152   cpp_buffer *buffer = pfile->buffer;
 153
 154   buffer->cur = cur;
 155   if (pfile->context->prev)
 156     unterminated = false, skip_macro_block_comment (pfile);
 157   else
 158     unterminated = _cpp_skip_block_comment (pfile);
 159
 160   if (unterminated)
 161     cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
 162                          "unterminated comment");
 163
 164   /* Comments in directives become spaces so that tokens are properly
 165      separated when the ISO preprocessor re-lexes the line.  The
 166      exception is #define.  */
 167   if (pfile->state.in_directive)
 168     {
 169       if (in_define)
 170         {
 171           if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
 172             pfile->out.cur--;
 173           else
 174             copy = true;
 175         }
 176       else
 177         pfile->out.cur[-1] = ' ';
 178     }
 179   else if (CPP_OPTION (pfile, discard_comments))
 180     pfile->out.cur--;
 181   else
 182     copy = true;
 183
 184   if (copy)
 185     {
 186       size_t len = (size_t) (buffer->cur - cur);
 187       memcpy (pfile->out.cur, cur, len);
 188       pfile->out.cur += len;
 189       if (unterminated)
 190         {
 191           *pfile->out.cur++ = '*';
 192           *pfile->out.cur++ = '/';
 193         }
 194     }
 195
 196   return buffer->cur;
 197 }
 198
 199 /* CUR points to any character in the input buffer.  Skips over all
 200    contiguous horizontal white space and NULs, including comments if
 201    SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
 202    character or the end of the current context.  Escaped newlines are
 203    removed.
 204
 205    The whitespace is copied verbatim to the output buffer, except that
 206    comments are handled as described in copy_comment().
 207    pfile->out.cur is updated.
 208
 209    Returns a pointer to the first character after the whitespace in
 210    the input buffer.  */
 211 static const uchar *
 212 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
 213 {
 214   uchar *out = pfile->out.cur;
 215
 216   for (;;)
 217     {
 218       unsigned int c = *cur++;
 219       *out++ = c;
 220
 221       if (is_nvspace (c))
 222         continue;
 223
 224       if (c == '/' && *cur == '*' && skip_comments)
 225         {
 226           pfile->out.cur = out;
 227           cur = copy_comment (pfile, cur, false /* in_define */);
 228           out = pfile->out.cur;
 229           continue;
 230         }
 231
 232       out--;
 233       break;
 234     }
 235
 236   pfile->out.cur = out;
 237   return cur - 1;
 238 }
 239
 240 /* Lexes and outputs an identifier starting at CUR, which is assumed
 241    to point to a valid first character of an identifier.  Returns
 242    the hashnode, and updates out.cur.  */
 243 static cpp_hashnode *
 244 lex_identifier (cpp_reader *pfile, const uchar *cur)
 245 {
 246   size_t len;
 247   uchar *out = pfile->out.cur;
 248   cpp_hashnode *result;
 249
 250   do
 251     *out++ = *cur++;
 252   while (is_numchar (*cur));
 253
 254   CUR (pfile->context) = cur;
 255   len = out - pfile->out.cur;
 256   result = (cpp_hashnode *) ht_lookup (pfile->hash_table, pfile->out.cur,
 257                                        len, HT_ALLOC);
 258   pfile->out.cur = out;
 259   return result;
 260 }
 261
 262 /* Overlays the true file buffer temporarily with text of length LEN
 263    starting at START.  The true buffer is restored upon calling
 264    restore_buff().  */
 265 void
 266 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
 267 {
 268   cpp_buffer *buffer = pfile->buffer;
 269
 270   pfile->overlaid_buffer = buffer;
 271   pfile->saved_cur = buffer->cur;
 272   pfile->saved_rlimit = buffer->rlimit;
 273   pfile->saved_line_base = buffer->next_line;
 274   buffer->need_line = false;
 275
 276   buffer->cur = start;
 277   buffer->line_base = start;
 278   buffer->rlimit = start + len;
 279 }
 280
 281 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
 282 void
 283 _cpp_remove_overlay (cpp_reader *pfile)
 284 {
 285   cpp_buffer *buffer = pfile->overlaid_buffer;
 286
 287   buffer->cur = pfile->saved_cur;
 288   buffer->rlimit = pfile->saved_rlimit;
 289   buffer->line_base = pfile->saved_line_base;
 290   buffer->need_line = true;
 291
 292   pfile->overlaid_buffer = NULL;
 293 }
 294
 295 /* Reads a logical line into the output buffer.  Returns TRUE if there
 296    is more text left in the buffer.  */
 297 bool
 298 _cpp_read_logical_line_trad (cpp_reader *pfile)
 299 {
 300   do
 301     {
 302       if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
 303         return false;
 304     }
 305   while (!_cpp_scan_out_logical_line (pfile, NULL) || pfile->state.skipping);
 306
 307   return pfile->buffer != NULL;
 308 }
 309
 310 /* Set up state for finding the opening '(' of a function-like
 311    macro.  */
 312 static void
 313 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start, struct fun_macro *macro)
 314 {
 315   unsigned int n = node->value.macro->paramc + 1;
 316
 317   if (macro->buff)
 318     _cpp_release_buff (pfile, macro->buff);
 319   macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
 320   macro->args = (size_t *) BUFF_FRONT (macro->buff);
 321   macro->node = node;
 322   macro->offset = start - pfile->out.base;
 323   macro->argc = 0;
 324 }
 325
 326 /* Save the OFFSET of the start of the next argument to MACRO.  */
 327 static void
 328 save_argument (struct fun_macro *macro, size_t offset)
 329 {
 330   macro->argc++;
 331   if (macro->argc <= macro->node->value.macro->paramc)
 332     macro->args[macro->argc] = offset;
 333 }
 334
 335 /* Copies the next logical line in the current buffer (starting at
 336    buffer->cur) to the output buffer.  The output is guaranteed to
 337    terminate with a NUL character.  buffer->cur is updated.
 338
 339    If MACRO is non-NULL, then we are scanning the replacement list of
 340    MACRO, and we call save_replacement_text() every time we meet an
 341    argument.  */
 342 bool
 343 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro)
 344 {
 345   bool result = true;
 346   cpp_context *context;
 347   const uchar *cur;
 348   uchar *out;
 349   struct fun_macro fmacro;
 350   unsigned int c, paren_depth = 0, quote;
 351   enum ls lex_state = ls_none;
 352   bool header_ok;
 353   const uchar *start_of_input_line;
 354
 355   fmacro.buff = NULL;
 356
 357   quote = 0;
 358   header_ok = pfile->state.angled_headers;
 359   CUR (pfile->context) = pfile->buffer->cur;
 360   RLIMIT (pfile->context) = pfile->buffer->rlimit;
 361   pfile->out.cur = pfile->out.base;
 362   pfile->out.first_line = pfile->line_table->highest_line;
 363   /* start_of_input_line is needed to make sure that directives really,
 364      really start at the first character of the line.  */
 365   start_of_input_line = pfile->buffer->cur;
 366  new_context:
 367   context = pfile->context;
 368   cur = CUR (context);
 369   check_output_buffer (pfile, RLIMIT (context) - cur);
 370   out = pfile->out.cur;
 371
 372   for (;;)
 373     {
 374       if (!context->prev
 375           && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
 376         {
 377           pfile->buffer->cur = cur;
 378           _cpp_process_line_notes (pfile, false);
 379         }
 380       c = *cur++;
 381       *out++ = c;
 382
 383       /* Whitespace should "continue" out of the switch,
 384          non-whitespace should "break" out of it.  */
 385       switch (c)
 386         {
 387         case ' ':
 388         case '\t':
 389         case '\f':
 390         case '\v':
 391         case '\0':
 392           continue;
 393
 394         case '\n':
 395           /* If this is a macro's expansion, pop it.  */
 396           if (context->prev)
 397             {
 398               pfile->out.cur = out - 1;
 399               _cpp_pop_context (pfile);
 400               goto new_context;
 401             }
 402
 403           /* Omit the newline from the output buffer.  */
 404           pfile->out.cur = out - 1;
 405           pfile->buffer->cur = cur;
 406           pfile->buffer->need_line = true;
 407           CPP_INCREMENT_LINE (pfile, 0);
 408
 409           if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
 410               && !pfile->state.in_directive
 411               && _cpp_get_fresh_line (pfile))
 412             {
 413               /* Newlines in arguments become a space, but we don't
 414                  clear any in-progress quote.  */
 415               if (lex_state == ls_fun_close)
 416                 out[-1] = ' ';
 417               cur = pfile->buffer->cur;
 418               continue;
 419             }
 420           goto done;
 421
 422         case '<':
 423           if (header_ok)
 424             quote = '>';
 425           break;
 426         case '>':
 427           if (c == quote)
 428             quote = 0;
 429           break;
 430
 431         case '"':
 432         case '\'':
 433           if (c == quote)
 434             quote = 0;
 435           else if (!quote)
 436             quote = c;
 437           break;
 438
 439         case '\\':
 440           /* Skip escaped quotes here, it's easier than above.  */
 441           if (*cur == '\\' || *cur == '"' || *cur == '\'')
 442             *out++ = *cur++;
 443           break;
 444
 445         case '/':
 446           /* Traditional CPP does not recognize comments within
 447              literals.  */
 448           if (!quote && *cur == '*')
 449             {
 450               pfile->out.cur = out;
 451               cur = copy_comment (pfile, cur, macro != 0);
 452               out = pfile->out.cur;
 453               continue;
 454             }
 455           break;
 456
 457         case '_':
 458         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 459         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 460         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 461         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 462         case 'y': case 'z':
 463         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 464         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 465         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 466         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 467         case 'Y': case 'Z':
 468           if (!pfile->state.skipping && (quote == 0 || macro))
 469             {
 470               cpp_hashnode *node;
 471               uchar *out_start = out - 1;
 472
 473               pfile->out.cur = out_start;
 474               node = lex_identifier (pfile, cur - 1);
 475               out = pfile->out.cur;
 476               cur = CUR (context);
 477
 478               if (node->type == NT_MACRO
 479                   /* Should we expand for ls_answer?  */
 480                   && (lex_state == ls_none || lex_state == ls_fun_open)
 481                   && !pfile->state.prevent_expansion)
 482                 {
 483                   /* Macros invalidate MI optimization.  */
 484                   pfile->mi_valid = false;
 485                   if (! (node->flags & NODE_BUILTIN)
 486                       && node->value.macro->fun_like)
 487                     {
 488                       maybe_start_funlike (pfile, node, out_start, &fmacro);
 489                       lex_state = ls_fun_open;
 490                       fmacro.line = pfile->line_table->highest_line;
 491                       continue;
 492                     }
 493                   else if (!recursive_macro (pfile, node))
 494                     {
 495                       /* Remove the object-like macro's name from the
 496                          output, and push its replacement text.  */
 497                       pfile->out.cur = out_start;
 498                       push_replacement_text (pfile, node);
 499                       lex_state = ls_none;
 500                       goto new_context;
 501                     }
 502                 }
 503               else if (macro && (node->flags & NODE_MACRO_ARG) != 0)
 504                 {
 505                   /* Found a parameter in the replacement text of a
 506                      #define.  Remove its name from the output.  */
 507                   pfile->out.cur = out_start;
 508                   save_replacement_text (pfile, macro, node->value.arg_index);
 509                   out = pfile->out.base;
 510                 }
 511               else if (lex_state == ls_hash)
 512                 {
 513                   lex_state = ls_predicate;
 514                   continue;
 515                 }
 516               else if (pfile->state.in_expression
 517                        && node == pfile->spec_nodes.n_defined)
 518                 {
 519                   lex_state = ls_defined;
 520                   continue;
 521                 }
 522             }
 523           break;
 524
 525         case '(':
 526           if (quote == 0)
 527             {
 528               paren_depth++;
 529               if (lex_state == ls_fun_open)
 530                 {
 531                   if (recursive_macro (pfile, fmacro.node))
 532                     lex_state = ls_none;
 533                   else
 534                     {
 535                       lex_state = ls_fun_close;
 536                       paren_depth = 1;
 537                       out = pfile->out.base + fmacro.offset;
 538                       fmacro.args[0] = fmacro.offset;
 539                     }
 540                 }
 541               else if (lex_state == ls_predicate)
 542                 lex_state = ls_answer;
 543               else if (lex_state == ls_defined)
 544                 lex_state = ls_defined_close;
 545             }
 546           break;
 547
 548         case ',':
 549           if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
 550             save_argument (&fmacro, out - pfile->out.base);
 551           break;
 552
 553         case ')':
 554           if (quote == 0)
 555             {
 556               paren_depth--;
 557               if (lex_state == ls_fun_close && paren_depth == 0)
 558                 {
 559                   cpp_macro *m = fmacro.node->value.macro;
 560
 561                   m->used = 1;
 562                   lex_state = ls_none;
 563                   save_argument (&fmacro, out - pfile->out.base);
 564
 565                   /* A single zero-length argument is no argument.  */
 566                   if (fmacro.argc == 1
 567                       && m->paramc == 0
 568                       && out == pfile->out.base + fmacro.offset + 1)
 569                     fmacro.argc = 0;
 570
 571                   if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
 572                     {
 573                       /* Remove the macro's invocation from the
 574                          output, and push its replacement text.  */
 575                       pfile->out.cur = (pfile->out.base
 576                                              + fmacro.offset);
 577                       CUR (context) = cur;
 578                       replace_args_and_push (pfile, &fmacro);
 579                       goto new_context;
 580                     }
 581                 }
 582               else if (lex_state == ls_answer || lex_state == ls_defined_close)
 583                 lex_state = ls_none;
 584             }
 585           break;
 586
 587         case '#':
 588           if (cur - 1 == start_of_input_line
 589               /* A '#' from a macro doesn't start a directive.  */
 590               && !pfile->context->prev
 591               && !pfile->state.in_directive)
 592             {
 593               /* A directive.  With the way _cpp_handle_directive
 594                  currently works, we only want to call it if either we
 595                  know the directive is OK, or we want it to fail and
 596                  be removed from the output.  If we want it to be
 597                  passed through (the assembler case) then we must not
 598                  call _cpp_handle_directive.  */
 599               pfile->out.cur = out;
 600               cur = skip_whitespace (pfile, cur, true /* skip_comments */);
 601               out = pfile->out.cur;
 602
 603               if (*cur == '\n')
 604                 {
 605                   /* Null directive.  Ignore it and don't invalidate
 606                      the MI optimization.  */
 607                   pfile->buffer->need_line = true;
 608                   CPP_INCREMENT_LINE (pfile, 0);
 609                   result = false;
 610                   goto done;
 611                 }
 612               else
 613                 {
 614                   bool do_it = false;
 615
 616                   if (is_numstart (*cur)
 617                       && CPP_OPTION (pfile, lang) != CLK_ASM)
 618                     do_it = true;
 619                   else if (is_idstart (*cur))
 620                     /* Check whether we know this directive, but don't
 621                        advance.  */
 622                     do_it = lex_identifier (pfile, cur)->is_directive;
 623
 624                   if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
 625                     {
 626                       /* This is a kludge.  We want to have the ISO
 627                          preprocessor lex the next token.  */
 628                       pfile->buffer->cur = cur;
 629                       _cpp_handle_directive (pfile, false /* indented */);
 630                       result = false;
 631                       goto done;
 632                     }
 633                 }
 634             }
 635
 636           if (pfile->state.in_expression)
 637             {
 638               lex_state = ls_hash;
 639               continue;
 640             }
 641           break;
 642
 643         default:
 644           break;
 645         }
 646
 647       /* Non-whitespace disables MI optimization and stops treating
 648          '<' as a quote in #include.  */
 649       header_ok = false;
 650       if (!pfile->state.in_directive)
 651         pfile->mi_valid = false;
 652
 653       if (lex_state == ls_none)
 654         continue;
 655
 656       /* Some of these transitions of state are syntax errors.  The
 657          ISO preprocessor will issue errors later.  */
 658       if (lex_state == ls_fun_open)
 659         /* Missing '('.  */
 660         lex_state = ls_none;
 661       else if (lex_state == ls_hash
 662                || lex_state == ls_predicate
 663                || lex_state == ls_defined)
 664         lex_state = ls_none;
 665
 666       /* ls_answer and ls_defined_close keep going until ')'.  */
 667     }
 668
 669  done:
 670   if (fmacro.buff)
 671     _cpp_release_buff (pfile, fmacro.buff);
 672
 673   if (lex_state == ls_fun_close)
 674     cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
 675                          "unterminated argument list invoking macro \"%s\"",
 676                          NODE_NAME (fmacro.node));
 677   return result;
 678 }
 679
 680 /* Push a context holding the replacement text of the macro NODE on
 681    the context stack.  NODE is either object-like, or a function-like
 682    macro with no arguments.  */
 683 static void
 684 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
 685 {
 686   size_t len;
 687   const uchar *text;
 688   uchar *buf;
 689
 690   if (node->flags & NODE_BUILTIN)
 691     {
 692       text = _cpp_builtin_macro_text (pfile, node);
 693       len = ustrlen (text);
 694       buf = _cpp_unaligned_alloc (pfile, len + 1);
 695       memcpy (buf, text, len);
 696       buf[len]='\n';
 697       text = buf;
 698     }
 699   else
 700     {
 701       cpp_macro *macro = node->value.macro;
 702       macro->used = 1;
 703       text = macro->exp.text;
 704       macro->traditional = 1;
 705       len = macro->count;
 706     }
 707
 708   _cpp_push_text_context (pfile, node, text, len);
 709 }
 710
 711 /* Returns TRUE if traditional macro recursion is detected.  */
 712 static bool
 713 recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
 714 {
 715   bool recursing = !!(node->flags & NODE_DISABLED);
 716
 717   /* Object-like macros that are already expanding are necessarily
 718      recursive.
 719
 720      However, it is possible to have traditional function-like macros
 721      that are not infinitely recursive but recurse to any given depth.
 722      Further, it is easy to construct examples that get ever longer
 723      until the point they stop recursing.  So there is no easy way to
 724      detect true recursion; instead we assume any expansion more than
 725      20 deep since the first invocation of this macro must be
 726      recursing.  */
 727   if (recursing && node->value.macro->fun_like)
 728     {
 729       size_t depth = 0;
 730       cpp_context *context = pfile->context;
 731
 732       do
 733         {
 734           depth++;
 735           if (context->macro == node && depth > 20)
 736             break;
 737           context = context->prev;
 738         }
 739       while (context);
 740       recursing = context != NULL;
 741     }
 742
 743   if (recursing)
 744     cpp_error (pfile, CPP_DL_ERROR,
 745                "detected recursion whilst expanding macro \"%s\"",
 746                NODE_NAME (node));
 747
 748   return recursing;
 749 }
 750
 751 /* Return the length of the replacement text of a function-like or
 752    object-like non-builtin macro.  */
 753 size_t
 754 _cpp_replacement_text_len (const cpp_macro *macro)
 755 {
 756   size_t len;
 757
 758   if (macro->fun_like && (macro->paramc != 0))
 759     {
 760       const uchar *exp;
 761
 762       len = 0;
 763       for (exp = macro->exp.text;;)
 764         {
 765           struct block *b = (struct block *) exp;
 766
 767           len += b->text_len;
 768           if (b->arg_index == 0)
 769             break;
 770           len += NODE_LEN (macro->params[b->arg_index - 1]);
 771           exp += BLOCK_LEN (b->text_len);
 772         }
 773     }
 774   else
 775     len = macro->count;
 776
 777   return len;
 778 }
 779
 780 /* Copy the replacement text of MACRO to DEST, which must be of
 781    sufficient size.  It is not NUL-terminated.  The next character is
 782    returned.  */
 783 uchar *
 784 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
 785 {
 786   if (macro->fun_like && (macro->paramc != 0))
 787     {
 788       const uchar *exp;
 789
 790       for (exp = macro->exp.text;;)
 791         {
 792           struct block *b = (struct block *) exp;
 793           cpp_hashnode *param;
 794
 795           memcpy (dest, b->text, b->text_len);
 796           dest += b->text_len;
 797           if (b->arg_index == 0)
 798             break;
 799           param = macro->params[b->arg_index - 1];
 800           memcpy (dest, NODE_NAME (param), NODE_LEN (param));
 801           dest += NODE_LEN (param);
 802           exp += BLOCK_LEN (b->text_len);
 803         }
 804     }
 805   else
 806     {
 807       memcpy (dest, macro->exp.text, macro->count);
 808       dest += macro->count;
 809     }
 810
 811   return dest;
 812 }
 813
 814 /* Push a context holding the replacement text of the macro NODE on
 815    the context stack.  NODE is either object-like, or a function-like
 816    macro with no arguments.  */
 817 static void
 818 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
 819 {
 820   cpp_macro *macro = fmacro->node->value.macro;
 821
 822   if (macro->paramc == 0)
 823     push_replacement_text (pfile, fmacro->node);
 824   else
 825     {
 826       const uchar *exp;
 827       uchar *p;
 828       _cpp_buff *buff;
 829       size_t len = 0;
 830
 831       /* Calculate the length of the argument-replaced text.  */
 832       for (exp = macro->exp.text;;)
 833         {
 834           struct block *b = (struct block *) exp;
 835
 836           len += b->text_len;
 837           if (b->arg_index == 0)
 838             break;
 839           len += (fmacro->args[b->arg_index]
 840                   - fmacro->args[b->arg_index - 1] - 1);
 841           exp += BLOCK_LEN (b->text_len);
 842         }
 843
 844       /* Allocate room for the expansion plus \n.  */
 845       buff = _cpp_get_buff (pfile, len + 1);
 846
 847       /* Copy the expansion and replace arguments.  */
 848       p = BUFF_FRONT (buff);
 849       for (exp = macro->exp.text;;)
 850         {
 851           struct block *b = (struct block *) exp;
 852           size_t arglen;
 853
 854           memcpy (p, b->text, b->text_len);
 855           p += b->text_len;
 856           if (b->arg_index == 0)
 857             break;
 858           arglen = (fmacro->args[b->arg_index]
 859                     - fmacro->args[b->arg_index - 1] - 1);
 860           memcpy (p, pfile->out.base + fmacro->args[b->arg_index - 1],
 861                   arglen);
 862           p += arglen;
 863           exp += BLOCK_LEN (b->text_len);
 864         }
 865
 866       /* \n-terminate.  */
 867       *p = '\n';
 868       _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
 869
 870       /* So we free buffer allocation when macro is left.  */
 871       pfile->context->buff = buff;
 872     }
 873 }
 874
 875 /* Read and record the parameters, if any, of a function-like macro
 876    definition.  Destroys pfile->out.cur.
 877
 878    Returns true on success, false on failure (syntax error or a
 879    duplicate parameter).  On success, CUR (pfile->context) is just
 880    past the closing parenthesis.  */
 881 static bool
 882 scan_parameters (cpp_reader *pfile, cpp_macro *macro)
 883 {
 884   const uchar *cur = CUR (pfile->context) + 1;
 885   bool ok;
 886
 887   for (;;)
 888     {
 889       cur = skip_whitespace (pfile, cur, true /* skip_comments */);
 890
 891       if (is_idstart (*cur))
 892         {
 893           ok = false;
 894           if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
 895             break;
 896           cur = skip_whitespace (pfile, CUR (pfile->context),
 897                                  true /* skip_comments */);
 898           if (*cur == ',')
 899             {
 900               cur++;
 901               continue;
 902             }
 903           ok = (*cur == ')');
 904           break;
 905         }
 906
 907       ok = (*cur == ')' && macro->paramc == 0);
 908       break;
 909     }
 910
 911   if (!ok)
 912     cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
 913
 914   CUR (pfile->context) = cur + (*cur == ')');
 915
 916   return ok;
 917 }
 918
 919 /* Save the text from pfile->out.base to pfile->out.cur as
 920    the replacement text for the current macro, followed by argument
 921    ARG_INDEX, with zero indicating the end of the replacement
 922    text.  */
 923 static void
 924 save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
 925                        unsigned int arg_index)
 926 {
 927   size_t len = pfile->out.cur - pfile->out.base;
 928   uchar *exp;
 929
 930   if (macro->paramc == 0)
 931     {
 932       /* Object-like and function-like macros without parameters
 933          simply store their \n-terminated replacement text.  */
 934       exp = _cpp_unaligned_alloc (pfile, len + 1);
 935       memcpy (exp, pfile->out.base, len);
 936       exp[len] = '\n';
 937       macro->exp.text = exp;
 938       macro->traditional = 1;
 939       macro->count = len;
 940     }
 941   else
 942     {
 943       /* Store the text's length (unsigned int), the argument index
 944          (unsigned short, base 1) and then the text.  */
 945       size_t blen = BLOCK_LEN (len);
 946       struct block *block;
 947
 948       if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
 949         _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
 950
 951       exp = BUFF_FRONT (pfile->a_buff);
 952       block = (struct block *) (exp + macro->count);
 953       macro->exp.text = exp;
 954       macro->traditional = 1;
 955
 956       /* Write out the block information.  */
 957       block->text_len = len;
 958       block->arg_index = arg_index;
 959       memcpy (block->text, pfile->out.base, len);
 960
 961       /* Lex the rest into the start of the output buffer.  */
 962       pfile->out.cur = pfile->out.base;
 963
 964       macro->count += blen;
 965
 966       /* If we've finished, commit the memory.  */
 967       if (arg_index == 0)
 968         BUFF_FRONT (pfile->a_buff) += macro->count;
 969     }
 970 }
 971
 972 /* Analyze and save the replacement text of a macro.  Returns true on
 973    success.  */
 974 bool
 975 _cpp_create_trad_definition (cpp_reader *pfile, cpp_macro *macro)
 976 {
 977   const uchar *cur;
 978   uchar *limit;
 979   cpp_context *context = pfile->context;
 980
 981   /* The context has not been set up for command line defines, and CUR
 982      has not been updated for the macro name for in-file defines.  */
 983   pfile->out.cur = pfile->out.base;
 984   CUR (context) = pfile->buffer->cur;
 985   RLIMIT (context) = pfile->buffer->rlimit;
 986   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
 987
 988   /* Is this a function-like macro?  */
 989   if (* CUR (context) == '(')
 990     {
 991       bool ok = scan_parameters (pfile, macro);
 992
 993       /* Remember the params so we can clear NODE_MACRO_ARG flags.  */
 994       macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
 995
 996       /* Setting macro to NULL indicates an error occurred, and
 997          prevents unnecessary work in _cpp_scan_out_logical_line.  */
 998       if (!ok)
 999         macro = NULL;
1000       else
1001         {
1002           BUFF_FRONT (pfile->a_buff) = (uchar *) &macro->params[macro->paramc];
1003           macro->fun_like = 1;
1004         }
1005     }
1006
1007   /* Skip leading whitespace in the replacement text.  */
1008   pfile->buffer->cur
1009     = skip_whitespace (pfile, CUR (context),
1010                        CPP_OPTION (pfile, discard_comments_in_macro_exp));
1011
1012   pfile->state.prevent_expansion++;
1013   _cpp_scan_out_logical_line (pfile, macro);
1014   pfile->state.prevent_expansion--;
1015
1016   if (!macro)
1017     return false;
1018
1019   /* Skip trailing white space.  */
1020   cur = pfile->out.base;
1021   limit = pfile->out.cur;
1022   while (limit > cur && is_space (limit[-1]))
1023     limit--;
1024   pfile->out.cur = limit;
1025   save_replacement_text (pfile, macro, 0);
1026
1027   return true;
1028 }
1029
1030 /* Copy SRC of length LEN to DEST, but convert all contiguous
1031    whitespace to a single space, provided it is not in quotes.  The
1032    quote currently in effect is pointed to by PQUOTE, and is updated
1033    by the function.  Returns the number of bytes copied.  */
1034 static size_t
1035 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1036 {
1037   uchar *orig_dest = dest;
1038   uchar quote = *pquote;
1039
1040   while (len)
1041     {
1042       if (is_space (*src) && !quote)
1043         {
1044           do
1045             src++, len--;
1046           while (len && is_space (*src));
1047           *dest++ = ' ';
1048         }
1049       else
1050         {
1051           if (*src == '\'' || *src == '"')
1052             {
1053               if (!quote)
1054                 quote = *src;
1055               else if (quote == *src)
1056                 quote = 0;
1057             }
1058           *dest++ = *src++, len--;
1059         }
1060     }
1061
1062   *pquote = quote;
1063   return dest - orig_dest;
1064 }
1065
1066 /* Returns true if MACRO1 and MACRO2 have expansions different other
1067    than in the form of their whitespace.  */
1068 bool
1069 _cpp_expansions_different_trad (const cpp_macro *macro1,
1070                                 const cpp_macro *macro2)
1071 {
1072   uchar *p1 = xmalloc (macro1->count + macro2->count);
1073   uchar *p2 = p1 + macro1->count;
1074   uchar quote1 = 0, quote2 = 0;
1075   bool mismatch;
1076   size_t len1, len2;
1077
1078   if (macro1->paramc > 0)
1079     {
1080       const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1081
1082       mismatch = true;
1083       for (;;)
1084         {
1085           struct block *b1 = (struct block *) exp1;
1086           struct block *b2 = (struct block *) exp2;
1087
1088           if (b1->arg_index != b2->arg_index)
1089             break;
1090
1091           len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1092           len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1093           if (len1 != len2 || memcmp (p1, p2, len1))
1094             break;
1095           if (b1->arg_index == 0)
1096             {
1097               mismatch = false;
1098               break;
1099             }
1100           exp1 += BLOCK_LEN (b1->text_len);
1101           exp2 += BLOCK_LEN (b2->text_len);
1102         }
1103     }
1104   else
1105     {
1106       len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1107       len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1108       mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1109     }
1110
1111   free (p1);
1112   return mismatch;
1113 }