gcc/c-lex.c

   1 /* Lexical analyzer for C and Objective C.
   2    Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
   3    1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "input.h"
  28 #include "output.h"
  29 #include "c-lex.h"
  30 #include "c-tree.h"
  31 #include "flags.h"
  32 #include "timevar.h"
  33 #include "cpplib.h"
  34 #include "c-pragma.h"
  35 #include "toplev.h"
  36 #include "intl.h"
  37 #include "ggc.h"
  38 #include "tm_p.h"
  39 #include "splay-tree.h"
  40
  41 /* MULTIBYTE_CHARS support only works for native compilers.
  42    ??? Ideally what we want is to model widechar support after
  43    the current floating point support.  */
  44 #ifdef CROSS_COMPILE
  45 #undef MULTIBYTE_CHARS
  46 #endif
  47
  48 #ifdef MULTIBYTE_CHARS
  49 #include "mbchar.h"
  50 #include <locale.h>
  51 #endif /* MULTIBYTE_CHARS */
  52 #ifndef GET_ENVIRONMENT
  53 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
  54 #endif
  55
  56 #if USE_CPPLIB
  57 extern cpp_reader  parse_in;
  58 #else
  59 /* Stream for reading from the input file.  */
  60 FILE *finput;
  61 #endif
  62
  63 /* Private idea of the line number.  See discussion in c_lex().  */
  64 static int lex_lineno;
  65
  66 /* We may keep statistics about how long which files took to compile.  */
  67 static int header_time, body_time;
  68 static splay_tree file_info_tree;
  69
  70 /* Cause the `yydebug' variable to be defined.  */
  71 #define YYDEBUG 1
  72
  73 #if !USE_CPPLIB
  74
  75 struct putback_buffer
  76 {
  77   unsigned char *buffer;
  78   int   buffer_size;
  79   int   index;
  80 };
  81
  82 static struct putback_buffer putback = {NULL, 0, -1};
  83
  84 static inline int getch PARAMS ((void));
  85
  86 static inline int
  87 getch ()
  88 {
  89   if (putback.index != -1)
  90     {
  91       int ch = putback.buffer[putback.index];
  92       --putback.index;
  93       return ch;
  94     }
  95   return getc (finput);
  96 }
  97
  98 static inline void put_back PARAMS ((int));
  99
 100 static inline void
 101 put_back (ch)
 102      int ch;
 103 {
 104   if (ch != EOF)
 105     {
 106       if (putback.index == putback.buffer_size - 1)
 107         {
 108           putback.buffer_size += 16;
 109           putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
 110         }
 111       putback.buffer[++putback.index] = ch;
 112     }
 113 }
 114
 115 int linemode;
 116
 117 #endif
 118
 119 /* File used for outputting assembler code.  */
 120 extern FILE *asm_out_file;
 121
 122 #undef WCHAR_TYPE_SIZE
 123 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
 124
 125 /* Number of bytes in a wide character.  */
 126 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
 127
 128 #if !USE_CPPLIB
 129 static int maxtoken;            /* Current nominal length of token buffer.  */
 130 static char *token_buffer;      /* Pointer to token buffer.
 131                                    Actual allocated length is maxtoken + 2. */
 132 #endif
 133
 134 int indent_level;        /* Number of { minus number of }. */
 135 int pending_lang_change; /* If we need to switch languages - C++ only */
 136 int c_header_level;      /* depth in C headers - C++ only */
 137
 138 /* Nonzero tells yylex to ignore \ in string constants.  */
 139 static int ignore_escape_flag;
 140
 141 static const char *readescape   PARAMS ((const char *, const char *,
 142                                          unsigned int *));
 143 static const char *read_ucs     PARAMS ((const char *, const char *,
 144                                          unsigned int *, int));
 145 static void parse_float         PARAMS ((PTR));
 146 static tree lex_number          PARAMS ((const char *, unsigned int));
 147 static tree lex_string          PARAMS ((const char *, unsigned int, int));
 148 static tree lex_charconst       PARAMS ((const char *, unsigned int, int));
 149 static void update_header_times PARAMS ((const char *));
 150 static int dump_one_header      PARAMS ((splay_tree_node, void *));
 151
 152 #if !USE_CPPLIB
 153 static int skip_white_space             PARAMS ((int));
 154 static char *extend_token_buffer        PARAMS ((const char *));
 155 static void extend_token_buffer_to      PARAMS ((int));
 156 static int read_line_number             PARAMS ((int *));
 157 static void process_directive           PARAMS ((void));
 158 #else
 159 static void cb_ident            PARAMS ((cpp_reader *, const unsigned char *,
 160                                          unsigned int));
 161 static void cb_enter_file       PARAMS ((cpp_reader *));
 162 static void cb_leave_file       PARAMS ((cpp_reader *));
 163 static void cb_rename_file      PARAMS ((cpp_reader *));
 164 static void cb_def_pragma       PARAMS ((cpp_reader *));
 165 #endif
 166
 167 \f
 168 const char *
 169 init_c_lex (filename)
 170      const char *filename;
 171 {
 172   struct c_fileinfo *toplevel;
 173
 174   /* Set up filename timing.  Must happen before cpp_start_read.  */
 175   file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
 176                                    0,
 177                                    (splay_tree_delete_value_fn)free);
 178   toplevel = get_fileinfo ("<top level>");
 179   if (flag_detailed_statistics)
 180     {
 181       header_time = 0;
 182       body_time = get_run_time ();
 183       toplevel->time = body_time;
 184     }
 185
 186 #ifdef MULTIBYTE_CHARS
 187   /* Change to the native locale for multibyte conversions.  */
 188   setlocale (LC_CTYPE, "");
 189   GET_ENVIRONMENT (literal_codeset, "LANG");
 190 #endif
 191
 192 #if !USE_CPPLIB
 193   /* Open input file.  */
 194   if (filename == 0 || !strcmp (filename, "-"))
 195     {
 196       finput = stdin;
 197       filename = "stdin";
 198     }
 199   else
 200     finput = fopen (filename, "r");
 201   if (finput == 0)
 202     pfatal_with_name (filename);
 203
 204 #ifdef IO_BUFFER_SIZE
 205   setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
 206 #endif
 207 #else /* !USE_CPPLIB */
 208
 209   parse_in.cb.ident = cb_ident;
 210   parse_in.cb.enter_file = cb_enter_file;
 211   parse_in.cb.leave_file = cb_leave_file;
 212   parse_in.cb.rename_file = cb_rename_file;
 213   parse_in.cb.def_pragma = cb_def_pragma;
 214
 215   /* Make sure parse_in.digraphs matches flag_digraphs.  */
 216   CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
 217
 218   if (! cpp_start_read (&parse_in, 0 /* no printer */, filename))
 219     abort ();
 220
 221   if (filename == 0 || !strcmp (filename, "-"))
 222     filename = "stdin";
 223 #endif
 224
 225 #if !USE_CPPLIB
 226   maxtoken = 40;
 227   token_buffer = (char *) xmalloc (maxtoken + 2);
 228 #endif
 229   /* Start it at 0, because check_newline is called at the very beginning
 230      and will increment it to 1.  */
 231   lineno = lex_lineno = 0;
 232
 233   return filename;
 234 }
 235
 236 struct c_fileinfo *
 237 get_fileinfo (name)
 238      const char *name;
 239 {
 240   splay_tree_node n;
 241   struct c_fileinfo *fi;
 242
 243   n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
 244   if (n)
 245     return (struct c_fileinfo *) n->value;
 246
 247   fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
 248   fi->time = 0;
 249   fi->interface_only = 0;
 250   fi->interface_unknown = 1;
 251   splay_tree_insert (file_info_tree, (splay_tree_key) name,
 252                      (splay_tree_value) fi);
 253   return fi;
 254 }
 255
 256 static void
 257 update_header_times (name)
 258      const char *name;
 259 {
 260   /* Changing files again.  This means currently collected time
 261      is charged against header time, and body time starts back at 0.  */
 262   if (flag_detailed_statistics)
 263     {
 264       int this_time = get_run_time ();
 265       struct c_fileinfo *file = get_fileinfo (name);
 266       header_time += this_time - body_time;
 267       file->time += this_time - body_time;
 268       body_time = this_time;
 269     }
 270 }
 271
 272 static int
 273 dump_one_header (n, dummy)
 274      splay_tree_node n;
 275      void *dummy ATTRIBUTE_UNUSED;
 276 {
 277   print_time ((const char *) n->key,
 278               ((struct c_fileinfo *) n->value)->time);
 279   return 0;
 280 }
 281
 282 void
 283 dump_time_statistics ()
 284 {
 285   struct c_fileinfo *file = get_fileinfo (input_filename);
 286   int this_time = get_run_time ();
 287   file->time += this_time - body_time;
 288
 289   fprintf (stderr, "\n******\n");
 290   print_time ("header files (total)", header_time);
 291   print_time ("main file (total)", this_time - body_time);
 292   fprintf (stderr, "ratio = %g : 1\n",
 293            (double)header_time / (double)(this_time - body_time));
 294   fprintf (stderr, "\n******\n");
 295
 296   splay_tree_foreach (file_info_tree, dump_one_header, 0);
 297 }
 298
 299 #if !USE_CPPLIB
 300
 301 /* If C is not whitespace, return C.
 302    Otherwise skip whitespace and return first nonwhite char read.  */
 303
 304 static int
 305 skip_white_space (c)
 306      register int c;
 307 {
 308   for (;;)
 309     {
 310       switch (c)
 311         {
 312           /* There is no need to process comments, backslash-newline,
 313              or \r here.  None can occur in the output of cpp.  */
 314
 315         case '\n':
 316           if (linemode)
 317             {
 318               put_back (c);
 319               return EOF;
 320             }
 321           c = check_newline ();
 322           break;
 323
 324           /* Per C99, horizontal whitespace is just these four characters.  */
 325         case ' ':
 326         case '\t':
 327         case '\f':
 328         case '\v':
 329             c = getch ();
 330           break;
 331
 332         case '\\':
 333           error ("stray '\\' in program");
 334           c = getch ();
 335           break;
 336
 337         default:
 338           return (c);
 339         }
 340     }
 341 }
 342
 343 /* Skips all of the white space at the current location in the input file.  */
 344
 345 void
 346 position_after_white_space ()
 347 {
 348   register int c;
 349
 350   c = getch ();
 351
 352   put_back (skip_white_space (c));
 353 }
 354
 355 /* Make the token buffer longer, preserving the data in it.
 356    P should point to just beyond the last valid character in the old buffer.
 357    The value we return is a pointer to the new buffer
 358    at a place corresponding to P.  */
 359
 360 static void
 361 extend_token_buffer_to (size)
 362      int size;
 363 {
 364   do
 365     maxtoken = maxtoken * 2 + 10;
 366   while (maxtoken < size);
 367   token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
 368 }
 369
 370 static char *
 371 extend_token_buffer (p)
 372      const char *p;
 373 {
 374   int offset = p - token_buffer;
 375   extend_token_buffer_to (offset);
 376   return token_buffer + offset;
 377 }
 378 \f
 379
 380 static int
 381 read_line_number (num)
 382      int *num;
 383 {
 384   tree value;
 385   enum cpp_ttype token = c_lex (&value);
 386
 387   if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
 388     {
 389       *num = TREE_INT_CST_LOW (value);
 390       return 1;
 391     }
 392   else
 393     {
 394       if (token != CPP_EOF)
 395         error ("invalid #-line");
 396       return 0;
 397     }
 398 }
 399
 400 /* At the beginning of a line, increment the line number
 401    and process any #-directive on this line.
 402    If the line is a #-directive, read the entire line and return a newline.
 403    Otherwise, return the line's first non-whitespace character.  */
 404
 405 int
 406 check_newline ()
 407 {
 408   register int c;
 409
 410   /* Loop till we get a nonblank, non-directive line.  */
 411   for (;;)
 412     {
 413       /* Read first nonwhite char on the line.  */
 414       do
 415         c = getch ();
 416       while (c == ' ' || c == '\t');
 417
 418       lex_lineno++;
 419       if (c == '#')
 420         {
 421           process_directive ();
 422           return '\n';
 423         }
 424
 425       else if (c != '\n')
 426         break;
 427     }
 428   return c;
 429 }
 430
 431 static void
 432 process_directive ()
 433 {
 434   enum cpp_ttype token;
 435   tree value;
 436   int saw_line;
 437   enum { act_none, act_push, act_pop } action;
 438   int action_number, l;
 439   char *new_file;
 440 #ifndef NO_IMPLICIT_EXTERN_C
 441   int entering_c_header;
 442 #endif
 443
 444   /* Don't read beyond this line.  */
 445   saw_line = 0;
 446   linemode = 1;
 447
 448   token = c_lex (&value);
 449
 450   if (token == CPP_NAME)
 451     {
 452       /* If a letter follows, then if the word here is `line', skip
 453          it and ignore it; otherwise, ignore the line, with an error
 454          if the word isn't `pragma'.  */
 455
 456       const char *name = IDENTIFIER_POINTER (value);
 457
 458       if (!strcmp (name, "pragma"))
 459         {
 460 #ifdef HANDLE_GENERIC_PRAGMAS
 461           dispatch_pragma ();
 462 #endif
 463           goto skipline;
 464         }
 465       else if (!strcmp (name, "define"))
 466         {
 467           debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
 468           goto skipline;
 469         }
 470       else if (!strcmp (name, "undef"))
 471         {
 472           debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
 473           goto skipline;
 474         }
 475       else if (!strcmp (name, "line"))
 476         {
 477           saw_line = 1;
 478           token = c_lex (&value);
 479           goto linenum;
 480         }
 481       else if (!strcmp (name, "ident"))
 482         {
 483           /* #ident.  We expect a string constant here.
 484              The pedantic warning and syntax error are now in cpp.  */
 485
 486           token = c_lex (&value);
 487           if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
 488             goto skipline;
 489
 490 #ifdef ASM_OUTPUT_IDENT
 491           if (! flag_no_ident)
 492             {
 493               ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
 494             }
 495 #endif
 496
 497           /* Skip the rest of this line.  */
 498           goto skipline;
 499         }
 500
 501       error ("undefined or invalid # directive `%s'", name);
 502       goto skipline;
 503     }
 504
 505   /* If the # is the only nonwhite char on the line,
 506      just ignore it.  Check the new newline.  */
 507   if (token == CPP_EOF)
 508     goto skipline;
 509
 510 linenum:
 511   /* Here we have either `#line' or `# <nonletter>'.
 512      In either case, it should be a line number; a digit should follow.  */
 513
 514   if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
 515     {
 516       error ("invalid #-line");
 517       goto skipline;
 518     }
 519
 520   /* subtract one, because it is the following line that
 521      gets the specified number */
 522
 523   l = TREE_INT_CST_LOW (value) - 1;
 524
 525   /* More follows: it must be a string constant (filename).
 526      It would be neat to use cpplib to quickly process the string, but
 527      (1) we don't have a handy tokenization of the string, and
 528      (2) I don't know how well that would work in the presense
 529      of filenames that contain wide characters.  */
 530
 531   if (saw_line)
 532     {
 533       /* Don't treat \ as special if we are processing #line 1 "...".
 534          If you want it to be treated specially, use # 1 "...".  */
 535       ignore_escape_flag = 1;
 536     }
 537
 538   /* Read the string constant.  */
 539   token = c_lex (&value);
 540
 541   ignore_escape_flag = 0;
 542
 543   if (token == CPP_EOF)
 544     {
 545       /* No more: store the line number and check following line.  */
 546       lex_lineno = l;
 547       goto skipline;
 548     }
 549
 550   if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
 551     {
 552       error ("invalid #line");
 553       goto skipline;
 554     }
 555
 556   new_file = TREE_STRING_POINTER (value);
 557
 558   if (main_input_filename == 0)
 559     main_input_filename = new_file;
 560
 561   action = act_none;
 562   action_number = 0;
 563
 564   /* Each change of file name
 565      reinitializes whether we are now in a system header.  */
 566   in_system_header = 0;
 567
 568   if (!read_line_number (&action_number))
 569     {
 570       /* Update the name in the top element of input_file_stack.  */
 571       if (input_file_stack)
 572         input_file_stack->name = input_filename;
 573     }
 574
 575   /* `1' after file name means entering new file.
 576      `2' after file name means just left a file.  */
 577
 578   if (action_number == 1)
 579     {
 580       action = act_push;
 581       read_line_number (&action_number);
 582     }
 583   else if (action_number == 2)
 584     {
 585       action = act_pop;
 586       read_line_number (&action_number);
 587     }
 588   if (action_number == 3)
 589     {
 590       /* `3' after file name means this is a system header file.  */
 591       in_system_header = 1;
 592       read_line_number (&action_number);
 593     }
 594 #ifndef NO_IMPLICIT_EXTERN_C
 595   if (action_number == 4)
 596     {
 597       /* `4' after file name means this is a C header file.  */
 598       entering_c_header = 1;
 599       read_line_number (&action_number);
 600     }
 601 #endif
 602
 603   /* Do the actions implied by the preceding numbers.  */
 604   if (action == act_push)
 605     {
 606       lineno = lex_lineno;
 607       push_srcloc (input_filename, 1);
 608       input_file_stack->indent_level = indent_level;
 609       debug_start_source_file (input_filename);
 610 #ifndef NO_IMPLICIT_EXTERN_C
 611       if (c_header_level)
 612         ++c_header_level;
 613       else if (entering_c_header)
 614         {
 615           c_header_level = 1;
 616           ++pending_lang_change;
 617         }
 618 #endif
 619     }
 620   else if (action == act_pop)
 621     {
 622       /* Popping out of a file.  */
 623       if (input_file_stack->next)
 624         {
 625 #ifndef NO_IMPLICIT_EXTERN_C
 626           if (c_header_level && --c_header_level == 0)
 627             {
 628               if (entering_c_header)
 629                 warning ("badly nested C headers from preprocessor");
 630               --pending_lang_change;
 631             }
 632 #endif
 633 #if 0
 634           if (indent_level != input_file_stack->indent_level)
 635             {
 636               warning_with_file_and_line
 637                 (input_filename, lex_lineno,
 638                  "This file contains more '%c's than '%c's.",
 639                  indent_level > input_file_stack->indent_level ? '{' : '}',
 640                  indent_level > input_file_stack->indent_level ? '}' : '{');
 641             }
 642 #endif
 643           pop_srcloc ();
 644           debug_end_source_file (input_file_stack->line);
 645         }
 646       else
 647         error ("#-lines for entering and leaving files don't match");
 648     }
 649
 650   update_header_times (new_file);
 651
 652   input_filename = new_file;
 653   lex_lineno = l;
 654
 655   /* Hook for C++.  */
 656   extract_interface_info ();
 657
 658   /* skip the rest of this line.  */
 659  skipline:
 660   linemode = 0;
 661
 662   while (getch () != '\n');
 663 }
 664 #else /* USE_CPPLIB */
 665
 666 /* Not yet handled: #pragma, #define, #undef.
 667    No need to deal with linemarkers under normal conditions.  */
 668
 669 static void
 670 cb_ident (pfile, str, len)
 671      cpp_reader *pfile ATTRIBUTE_UNUSED;
 672      const unsigned char *str;
 673      unsigned int len;
 674 {
 675 #ifdef ASM_OUTPUT_IDENT
 676   if (! flag_no_ident)
 677     {
 678       /* Convert escapes in the string.  */
 679       tree value = lex_string ((const char *)str, len, 0);
 680       ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
 681     }
 682 #endif
 683 }
 684
 685 static void
 686 cb_enter_file (pfile)
 687      cpp_reader *pfile;
 688 {
 689   cpp_buffer *ip = CPP_BUFFER (pfile);
 690   /* Bleah, need a better interface to this.  */
 691   const char *flags = cpp_syshdr_flags (pfile, ip);
 692
 693   /* Mustn't stack the main buffer on the input stack.  (Ick.)  */
 694   if (ip->prev)
 695     {
 696       lex_lineno = lineno = ip->prev->lineno - 1;
 697       push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1);
 698       input_file_stack->indent_level = indent_level;
 699       debug_start_source_file (ip->nominal_fname);
 700     }
 701   else
 702     lex_lineno = 1;
 703
 704   update_header_times (ip->nominal_fname);
 705
 706   /* Hook for C++.  */
 707   extract_interface_info ();
 708
 709   in_system_header = (flags[0] != 0);
 710 #ifndef NO_IMPLICIT_EXTERN_C
 711   if (c_header_level)
 712     ++c_header_level;
 713   else if (flags[2] != 0)
 714     {
 715       c_header_level = 1;
 716       ++pending_lang_change;
 717     }
 718 #endif
 719 }
 720
 721 static void
 722 cb_leave_file (pfile)
 723      cpp_reader *pfile;
 724 {
 725   /* Bleah, need a better interface to this.  */
 726   const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
 727 #if 0
 728   if (indent_level != input_file_stack->indent_level)
 729     {
 730       warning_with_file_and_line
 731         (input_filename, lex_lineno,
 732          "This file contains more '%c's than '%c's.",
 733          indent_level > input_file_stack->indent_level ? '{' : '}',
 734          indent_level > input_file_stack->indent_level ? '}' : '{');
 735     }
 736 #endif
 737   /* We get called for the main buffer, but we mustn't pop it.  */
 738   if (input_file_stack->next)
 739     pop_srcloc ();
 740   in_system_header = (flags[0] != 0);
 741 #ifndef NO_IMPLICIT_EXTERN_C
 742   if (c_header_level && --c_header_level == 0)
 743     {
 744       if (flags[2] != 0)
 745         warning ("badly nested C headers from preprocessor");
 746       --pending_lang_change;
 747     }
 748 #endif
 749   lex_lineno = CPP_BUFFER (pfile)->lineno;
 750   debug_end_source_file (input_file_stack->line);
 751
 752   update_header_times (input_file_stack->name);
 753   /* Hook for C++.  */
 754   extract_interface_info ();
 755 }
 756
 757 static void
 758 cb_rename_file (pfile)
 759      cpp_reader *pfile;
 760 {
 761   cpp_buffer *ip = CPP_BUFFER (pfile);
 762   /* Bleah, need a better interface to this.  */
 763   const char *flags = cpp_syshdr_flags (pfile, ip);
 764   input_filename = ggc_alloc_string (ip->nominal_fname, -1);
 765   lex_lineno = ip->lineno;
 766   in_system_header = (flags[0] != 0);
 767
 768   update_header_times (ip->nominal_fname);
 769   /* Hook for C++.  */
 770   extract_interface_info ();
 771 }
 772
 773 static void
 774 cb_def_pragma (pfile)
 775      cpp_reader *pfile;
 776 {
 777   /* Issue a warning message if we have been asked to do so.  Ignore
 778      unknown pragmas in system headers unless an explicit
 779      -Wunknown-pragmas has been given. */
 780   if (warn_unknown_pragmas > in_system_header)
 781     {
 782       const unsigned char *space, *name;
 783       const cpp_token *t = pfile->first_directive_token + 2;
 784
 785       space = t[0].val.node->name;
 786       name  = t[1].type == CPP_NAME ? t[1].val.node->name : 0;
 787       if (name)
 788         warning ("ignoring #pragma %s %s", space, name);
 789       else
 790         warning ("ignoring #pragma %s", space);
 791     }
 792 }
 793 #endif /* USE_CPPLIB */
 794
 795 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
 796
 797    [lex.charset]: The character designated by the universal-character-name
 798    \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
 799    is NNNNNNNN; the character designated by the universal-character-name
 800    \uNNNN is that character whose character short name in ISO/IEC 10646 is
 801    0000NNNN. If the hexadecimal value for a universal character name is
 802    less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
 803    universal character name designates a character in the basic source
 804    character set, then the program is ill-formed.
 805
 806    We assume that wchar_t is Unicode, so we don't need to do any
 807    mapping.  Is this ever wrong?  */
 808
 809 static const char *
 810 read_ucs (p, limit, cptr, length)
 811      const char *p;
 812      const char *limit;
 813      unsigned int *cptr;
 814      int length;
 815 {
 816   unsigned int code = 0;
 817   int c;
 818
 819   for (; length; --length)
 820     {
 821       if (p >= limit)
 822         {
 823           error ("incomplete universal-character-name");
 824           break;
 825         }
 826
 827       c = *p++;
 828       if (! ISXDIGIT (c))
 829         {
 830           error ("non hex digit '%c' in universal-character-name", c);
 831           p--;
 832           break;
 833         }
 834
 835       code <<= 4;
 836       if (c >= 'a' && c <= 'f')
 837         code += c - 'a' + 10;
 838       if (c >= 'A' && c <= 'F')
 839         code += c - 'A' + 10;
 840       if (c >= '0' && c <= '9')
 841         code += c - '0';
 842     }
 843
 844 #ifdef TARGET_EBCDIC
 845   sorry ("universal-character-name on EBCDIC target");
 846   *cptr = 0x3f;  /* EBCDIC invalid character */
 847   return p;
 848 #endif
 849
 850   if (code > 0x9f && !(code & 0x80000000))
 851     /* True extended character, OK.  */;
 852   else if (code >= 0x20 && code < 0x7f)
 853     {
 854       /* ASCII printable character.  The C character set consists of all of
 855          these except $, @ and `.  We use hex escapes so that this also
 856          works with EBCDIC hosts.  */
 857       if (code != 0x24 && code != 0x40 && code != 0x60)
 858         error ("universal-character-name used for '%c'", code);
 859     }
 860   else
 861     error ("invalid universal-character-name");
 862
 863   *cptr = code;
 864   return p;
 865 }
 866
 867 /* Read an escape sequence and write its character equivalent into *CPTR.
 868    P is the input pointer, which is just after the backslash.  LIMIT
 869    is how much text we have.
 870    Returns the updated input pointer.  */
 871
 872 static const char *
 873 readescape (p, limit, cptr)
 874      const char *p;
 875      const char *limit;
 876      unsigned int *cptr;
 877 {
 878   unsigned int c, code, count;
 879   unsigned firstdig = 0;
 880   int nonnull;
 881
 882   if (p == limit)
 883     {
 884       /* cpp has already issued an error for this.  */
 885       *cptr = 0;
 886       return p;
 887     }
 888
 889   c = *p++;
 890
 891   switch (c)
 892     {
 893     case 'x':
 894       if (warn_traditional && !in_system_header)
 895         warning ("the meaning of `\\x' varies with -traditional");
 896
 897       if (flag_traditional)
 898         {
 899           *cptr = 'x';
 900           return p;
 901         }
 902
 903       code = 0;
 904       count = 0;
 905       nonnull = 0;
 906       while (p < limit)
 907         {
 908           c = *p++;
 909           if (! ISXDIGIT (c))
 910             {
 911               p--;
 912               break;
 913             }
 914           code *= 16;
 915           if (c >= 'a' && c <= 'f')
 916             code += c - 'a' + 10;
 917           if (c >= 'A' && c <= 'F')
 918             code += c - 'A' + 10;
 919           if (c >= '0' && c <= '9')
 920             code += c - '0';
 921           if (code != 0 || count != 0)
 922             {
 923               if (count == 0)
 924                 firstdig = code;
 925               count++;
 926             }
 927           nonnull = 1;
 928         }
 929       if (! nonnull)
 930         {
 931           warning ("\\x used with no following hex digits");
 932           *cptr = 'x';
 933           return p;
 934         }
 935       else if (count == 0)
 936         /* Digits are all 0's.  Ok.  */
 937         ;
 938       else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
 939                || (count > 1
 940                    && (((unsigned)1
 941                         << (TYPE_PRECISION (integer_type_node)
 942                             - (count - 1) * 4))
 943                        <= firstdig)))
 944         pedwarn ("hex escape out of range");
 945       *cptr = code;
 946       return p;
 947
 948     case '0':  case '1':  case '2':  case '3':  case '4':
 949     case '5':  case '6':  case '7':
 950       code = 0;
 951       for (count = 0; count < 3; count++)
 952         {
 953           if (c < '0' || c > '7')
 954             {
 955               p--;
 956               break;
 957             }
 958           code = (code * 8) + (c - '0');
 959           if (p == limit)
 960             break;
 961           c = *p++;
 962         }
 963
 964       if (count == 3)
 965         p--;
 966
 967       *cptr = code;
 968       return p;
 969
 970     case '\\': case '\'': case '"': case '?':
 971       *cptr = c;
 972       return p;
 973
 974     case 'n': *cptr = TARGET_NEWLINE;   return p;
 975     case 't': *cptr = TARGET_TAB;       return p;
 976     case 'r': *cptr = TARGET_CR;        return p;
 977     case 'f': *cptr = TARGET_FF;        return p;
 978     case 'b': *cptr = TARGET_BS;        return p;
 979     case 'v': *cptr = TARGET_VT;        return p;
 980     case 'a':
 981       if (warn_traditional && !in_system_header)
 982         warning ("the meaning of '\\a' varies with -traditional");
 983       *cptr = flag_traditional ? c : TARGET_BELL;
 984       return p;
 985
 986       /* Warnings and support checks handled by read_ucs().  */
 987     case 'u': case 'U':
 988       if (c_language != clk_cplusplus && !flag_isoc99)
 989         break;
 990
 991       if (warn_traditional && !in_system_header)
 992         warning ("the meaning of '\\%c' varies with -traditional", c);
 993
 994       return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
 995
 996     case 'e': case 'E':
 997       if (pedantic)
 998         pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
 999       *cptr = TARGET_ESC; return p;
1000
1001       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1002          '\%' is used to prevent SCCS from getting confused.  */
1003     case '(': case '{': case '[': case '%':
1004       if (pedantic)
1005         pedwarn ("unknown escape sequence '\\%c'", c);
1006       *cptr = c;
1007       return p;
1008     }
1009
1010   if (ISGRAPH (c))
1011     pedwarn ("unknown escape sequence '\\%c'", c);
1012   else
1013     pedwarn ("unknown escape sequence: '\\' followed by char 0x%.2x", c);
1014
1015   *cptr = c;
1016   return p;
1017 }
1018
1019 #if 0 /* not yet */
1020 /* Returns nonzero if C is a universal-character-name.  Give an error if it
1021    is not one which may appear in an identifier, as per [extendid].
1022
1023    Note that extended character support in identifiers has not yet been
1024    implemented.  It is my personal opinion that this is not a desirable
1025    feature.  Portable code cannot count on support for more than the basic
1026    identifier character set.  */
1027
1028 static inline int
1029 is_extended_char (c)
1030      int c;
1031 {
1032 #ifdef TARGET_EBCDIC
1033   return 0;
1034 #else
1035   /* ASCII.  */
1036   if (c < 0x7f)
1037     return 0;
1038
1039   /* None of the valid chars are outside the Basic Multilingual Plane (the
1040      low 16 bits).  */
1041   if (c > 0xffff)
1042     {
1043       error ("universal-character-name '\\U%08x' not valid in identifier", c);
1044       return 1;
1045     }
1046
1047   /* Latin */
1048   if ((c >= 0x00c0 && c <= 0x00d6)
1049       || (c >= 0x00d8 && c <= 0x00f6)
1050       || (c >= 0x00f8 && c <= 0x01f5)
1051       || (c >= 0x01fa && c <= 0x0217)
1052       || (c >= 0x0250 && c <= 0x02a8)
1053       || (c >= 0x1e00 && c <= 0x1e9a)
1054       || (c >= 0x1ea0 && c <= 0x1ef9))
1055     return 1;
1056
1057   /* Greek */
1058   if ((c == 0x0384)
1059       || (c >= 0x0388 && c <= 0x038a)
1060       || (c == 0x038c)
1061       || (c >= 0x038e && c <= 0x03a1)
1062       || (c >= 0x03a3 && c <= 0x03ce)
1063       || (c >= 0x03d0 && c <= 0x03d6)
1064       || (c == 0x03da)
1065       || (c == 0x03dc)
1066       || (c == 0x03de)
1067       || (c == 0x03e0)
1068       || (c >= 0x03e2 && c <= 0x03f3)
1069       || (c >= 0x1f00 && c <= 0x1f15)
1070       || (c >= 0x1f18 && c <= 0x1f1d)
1071       || (c >= 0x1f20 && c <= 0x1f45)
1072       || (c >= 0x1f48 && c <= 0x1f4d)
1073       || (c >= 0x1f50 && c <= 0x1f57)
1074       || (c == 0x1f59)
1075       || (c == 0x1f5b)
1076       || (c == 0x1f5d)
1077       || (c >= 0x1f5f && c <= 0x1f7d)
1078       || (c >= 0x1f80 && c <= 0x1fb4)
1079       || (c >= 0x1fb6 && c <= 0x1fbc)
1080       || (c >= 0x1fc2 && c <= 0x1fc4)
1081       || (c >= 0x1fc6 && c <= 0x1fcc)
1082       || (c >= 0x1fd0 && c <= 0x1fd3)
1083       || (c >= 0x1fd6 && c <= 0x1fdb)
1084       || (c >= 0x1fe0 && c <= 0x1fec)
1085       || (c >= 0x1ff2 && c <= 0x1ff4)
1086       || (c >= 0x1ff6 && c <= 0x1ffc))
1087     return 1;
1088
1089   /* Cyrillic */
1090   if ((c >= 0x0401 && c <= 0x040d)
1091       || (c >= 0x040f && c <= 0x044f)
1092       || (c >= 0x0451 && c <= 0x045c)
1093       || (c >= 0x045e && c <= 0x0481)
1094       || (c >= 0x0490 && c <= 0x04c4)
1095       || (c >= 0x04c7 && c <= 0x04c8)
1096       || (c >= 0x04cb && c <= 0x04cc)
1097       || (c >= 0x04d0 && c <= 0x04eb)
1098       || (c >= 0x04ee && c <= 0x04f5)
1099       || (c >= 0x04f8 && c <= 0x04f9))
1100     return 1;
1101
1102   /* Armenian */
1103   if ((c >= 0x0531 && c <= 0x0556)
1104       || (c >= 0x0561 && c <= 0x0587))
1105     return 1;
1106
1107   /* Hebrew */
1108   if ((c >= 0x05d0 && c <= 0x05ea)
1109       || (c >= 0x05f0 && c <= 0x05f4))
1110     return 1;
1111
1112   /* Arabic */
1113   if ((c >= 0x0621 && c <= 0x063a)
1114       || (c >= 0x0640 && c <= 0x0652)
1115       || (c >= 0x0670 && c <= 0x06b7)
1116       || (c >= 0x06ba && c <= 0x06be)
1117       || (c >= 0x06c0 && c <= 0x06ce)
1118       || (c >= 0x06e5 && c <= 0x06e7))
1119     return 1;
1120
1121   /* Devanagari */
1122   if ((c >= 0x0905 && c <= 0x0939)
1123       || (c >= 0x0958 && c <= 0x0962))
1124     return 1;
1125
1126   /* Bengali */
1127   if ((c >= 0x0985 && c <= 0x098c)
1128       || (c >= 0x098f && c <= 0x0990)
1129       || (c >= 0x0993 && c <= 0x09a8)
1130       || (c >= 0x09aa && c <= 0x09b0)
1131       || (c == 0x09b2)
1132       || (c >= 0x09b6 && c <= 0x09b9)
1133       || (c >= 0x09dc && c <= 0x09dd)
1134       || (c >= 0x09df && c <= 0x09e1)
1135       || (c >= 0x09f0 && c <= 0x09f1))
1136     return 1;
1137
1138   /* Gurmukhi */
1139   if ((c >= 0x0a05 && c <= 0x0a0a)
1140       || (c >= 0x0a0f && c <= 0x0a10)
1141       || (c >= 0x0a13 && c <= 0x0a28)
1142       || (c >= 0x0a2a && c <= 0x0a30)
1143       || (c >= 0x0a32 && c <= 0x0a33)
1144       || (c >= 0x0a35 && c <= 0x0a36)
1145       || (c >= 0x0a38 && c <= 0x0a39)
1146       || (c >= 0x0a59 && c <= 0x0a5c)
1147       || (c == 0x0a5e))
1148     return 1;
1149
1150   /* Gujarati */
1151   if ((c >= 0x0a85 && c <= 0x0a8b)
1152       || (c == 0x0a8d)
1153       || (c >= 0x0a8f && c <= 0x0a91)
1154       || (c >= 0x0a93 && c <= 0x0aa8)
1155       || (c >= 0x0aaa && c <= 0x0ab0)
1156       || (c >= 0x0ab2 && c <= 0x0ab3)
1157       || (c >= 0x0ab5 && c <= 0x0ab9)
1158       || (c == 0x0ae0))
1159     return 1;
1160
1161   /* Oriya */
1162   if ((c >= 0x0b05 && c <= 0x0b0c)
1163       || (c >= 0x0b0f && c <= 0x0b10)
1164       || (c >= 0x0b13 && c <= 0x0b28)
1165       || (c >= 0x0b2a && c <= 0x0b30)
1166       || (c >= 0x0b32 && c <= 0x0b33)
1167       || (c >= 0x0b36 && c <= 0x0b39)
1168       || (c >= 0x0b5c && c <= 0x0b5d)
1169       || (c >= 0x0b5f && c <= 0x0b61))
1170     return 1;
1171
1172   /* Tamil */
1173   if ((c >= 0x0b85 && c <= 0x0b8a)
1174       || (c >= 0x0b8e && c <= 0x0b90)
1175       || (c >= 0x0b92 && c <= 0x0b95)
1176       || (c >= 0x0b99 && c <= 0x0b9a)
1177       || (c == 0x0b9c)
1178       || (c >= 0x0b9e && c <= 0x0b9f)
1179       || (c >= 0x0ba3 && c <= 0x0ba4)
1180       || (c >= 0x0ba8 && c <= 0x0baa)
1181       || (c >= 0x0bae && c <= 0x0bb5)
1182       || (c >= 0x0bb7 && c <= 0x0bb9))
1183     return 1;
1184
1185   /* Telugu */
1186   if ((c >= 0x0c05 && c <= 0x0c0c)
1187       || (c >= 0x0c0e && c <= 0x0c10)
1188       || (c >= 0x0c12 && c <= 0x0c28)
1189       || (c >= 0x0c2a && c <= 0x0c33)
1190       || (c >= 0x0c35 && c <= 0x0c39)
1191       || (c >= 0x0c60 && c <= 0x0c61))
1192     return 1;
1193
1194   /* Kannada */
1195   if ((c >= 0x0c85 && c <= 0x0c8c)
1196       || (c >= 0x0c8e && c <= 0x0c90)
1197       || (c >= 0x0c92 && c <= 0x0ca8)
1198       || (c >= 0x0caa && c <= 0x0cb3)
1199       || (c >= 0x0cb5 && c <= 0x0cb9)
1200       || (c >= 0x0ce0 && c <= 0x0ce1))
1201     return 1;
1202
1203   /* Malayalam */
1204   if ((c >= 0x0d05 && c <= 0x0d0c)
1205       || (c >= 0x0d0e && c <= 0x0d10)
1206       || (c >= 0x0d12 && c <= 0x0d28)
1207       || (c >= 0x0d2a && c <= 0x0d39)
1208       || (c >= 0x0d60 && c <= 0x0d61))
1209     return 1;
1210
1211   /* Thai */
1212   if ((c >= 0x0e01 && c <= 0x0e30)
1213       || (c >= 0x0e32 && c <= 0x0e33)
1214       || (c >= 0x0e40 && c <= 0x0e46)
1215       || (c >= 0x0e4f && c <= 0x0e5b))
1216     return 1;
1217
1218   /* Lao */
1219   if ((c >= 0x0e81 && c <= 0x0e82)
1220       || (c == 0x0e84)
1221       || (c == 0x0e87)
1222       || (c == 0x0e88)
1223       || (c == 0x0e8a)
1224       || (c == 0x0e0d)
1225       || (c >= 0x0e94 && c <= 0x0e97)
1226       || (c >= 0x0e99 && c <= 0x0e9f)
1227       || (c >= 0x0ea1 && c <= 0x0ea3)
1228       || (c == 0x0ea5)
1229       || (c == 0x0ea7)
1230       || (c == 0x0eaa)
1231       || (c == 0x0eab)
1232       || (c >= 0x0ead && c <= 0x0eb0)
1233       || (c == 0x0eb2)
1234       || (c == 0x0eb3)
1235       || (c == 0x0ebd)
1236       || (c >= 0x0ec0 && c <= 0x0ec4)
1237       || (c == 0x0ec6))
1238     return 1;
1239
1240   /* Georgian */
1241   if ((c >= 0x10a0 && c <= 0x10c5)
1242       || (c >= 0x10d0 && c <= 0x10f6))
1243     return 1;
1244
1245   /* Hiragana */
1246   if ((c >= 0x3041 && c <= 0x3094)
1247       || (c >= 0x309b && c <= 0x309e))
1248     return 1;
1249
1250   /* Katakana */
1251   if ((c >= 0x30a1 && c <= 0x30fe))
1252     return 1;
1253
1254   /* Bopmofo */
1255   if ((c >= 0x3105 && c <= 0x312c))
1256     return 1;
1257
1258   /* Hangul */
1259   if ((c >= 0x1100 && c <= 0x1159)
1260       || (c >= 0x1161 && c <= 0x11a2)
1261       || (c >= 0x11a8 && c <= 0x11f9))
1262     return 1;
1263
1264   /* CJK Unified Ideographs */
1265   if ((c >= 0xf900 && c <= 0xfa2d)
1266       || (c >= 0xfb1f && c <= 0xfb36)
1267       || (c >= 0xfb38 && c <= 0xfb3c)
1268       || (c == 0xfb3e)
1269       || (c >= 0xfb40 && c <= 0xfb41)
1270       || (c >= 0xfb42 && c <= 0xfb44)
1271       || (c >= 0xfb46 && c <= 0xfbb1)
1272       || (c >= 0xfbd3 && c <= 0xfd3f)
1273       || (c >= 0xfd50 && c <= 0xfd8f)
1274       || (c >= 0xfd92 && c <= 0xfdc7)
1275       || (c >= 0xfdf0 && c <= 0xfdfb)
1276       || (c >= 0xfe70 && c <= 0xfe72)
1277       || (c == 0xfe74)
1278       || (c >= 0xfe76 && c <= 0xfefc)
1279       || (c >= 0xff21 && c <= 0xff3a)
1280       || (c >= 0xff41 && c <= 0xff5a)
1281       || (c >= 0xff66 && c <= 0xffbe)
1282       || (c >= 0xffc2 && c <= 0xffc7)
1283       || (c >= 0xffca && c <= 0xffcf)
1284       || (c >= 0xffd2 && c <= 0xffd7)
1285       || (c >= 0xffda && c <= 0xffdc)
1286       || (c >= 0x4e00 && c <= 0x9fa5))
1287     return 1;
1288
1289   error ("universal-character-name '\\u%04x' not valid in identifier", c);
1290   return 1;
1291 #endif
1292 }
1293
1294 /* Add the UTF-8 representation of C to the token_buffer.  */
1295
1296 static void
1297 utf8_extend_token (c)
1298      int c;
1299 {
1300   int shift, mask;
1301
1302   if      (c <= 0x0000007f)
1303     {
1304       extend_token (c);
1305       return;
1306     }
1307   else if (c <= 0x000007ff)
1308     shift = 6, mask = 0xc0;
1309   else if (c <= 0x0000ffff)
1310     shift = 12, mask = 0xe0;
1311   else if (c <= 0x001fffff)
1312     shift = 18, mask = 0xf0;
1313   else if (c <= 0x03ffffff)
1314     shift = 24, mask = 0xf8;
1315   else
1316     shift = 30, mask = 0xfc;
1317
1318   extend_token (mask | (c >> shift));
1319   do
1320     {
1321       shift -= 6;
1322       extend_token ((unsigned char) (0x80 | (c >> shift)));
1323     }
1324   while (shift);
1325 }
1326 #endif
1327
1328 #if 0
1329 struct try_type
1330 {
1331   tree *node_var;
1332   char unsigned_flag;
1333   char long_flag;
1334   char long_long_flag;
1335 };
1336
1337 struct try_type type_sequence[] =
1338 {
1339   { &integer_type_node, 0, 0, 0},
1340   { &unsigned_type_node, 1, 0, 0},
1341   { &long_integer_type_node, 0, 1, 0},
1342   { &long_unsigned_type_node, 1, 1, 0},
1343   { &long_long_integer_type_node, 0, 1, 1},
1344   { &long_long_unsigned_type_node, 1, 1, 1}
1345 };
1346 #endif /* 0 */
1347 \f
1348 struct pf_args
1349 {
1350   /* Input */
1351   const char *str;
1352   int fflag;
1353   int lflag;
1354   int base;
1355   /* Output */
1356   int conversion_errno;
1357   REAL_VALUE_TYPE value;
1358   tree type;
1359 };
1360
1361 static void
1362 parse_float (data)
1363   PTR data;
1364 {
1365   struct pf_args * args = (struct pf_args *) data;
1366   const char *typename;
1367
1368   args->conversion_errno = 0;
1369   args->type = double_type_node;
1370   typename = "double";
1371
1372   /* The second argument, machine_mode, of REAL_VALUE_ATOF
1373      tells the desired precision of the binary result
1374      of decimal-to-binary conversion.  */
1375
1376   if (args->fflag)
1377     {
1378       if (args->lflag)
1379         error ("both 'f' and 'l' suffixes on floating constant");
1380
1381       args->type = float_type_node;
1382       typename = "float";
1383     }
1384   else if (args->lflag)
1385     {
1386       args->type = long_double_type_node;
1387       typename = "long double";
1388     }
1389   else if (flag_single_precision_constant)
1390     {
1391       args->type = float_type_node;
1392       typename = "float";
1393     }
1394
1395   errno = 0;
1396   if (args->base == 16)
1397     args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1398   else
1399     args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1400
1401   args->conversion_errno = errno;
1402   /* A diagnostic is required here by some ISO C testsuites.
1403      This is not pedwarn, because some people don't want
1404      an error for this.  */
1405   if (REAL_VALUE_ISINF (args->value) && pedantic)
1406     warning ("floating point number exceeds range of '%s'", typename);
1407 }
1408
1409 int
1410 c_lex (value)
1411      tree *value;
1412 {
1413 #if USE_CPPLIB
1414   const cpp_token *tok;
1415   enum cpp_ttype type;
1416
1417   retry:
1418   timevar_push (TV_CPP);
1419   tok = cpp_get_token (&parse_in);
1420   timevar_pop (TV_CPP);
1421
1422   /* The C++ front end does horrible things with the current line
1423      number.  To ensure an accurate line number, we must reset it
1424      every time we return a token.  If we reset it from tok->line
1425      every time, we'll get line numbers inside macros referring to the
1426      macro definition; this is nice, but we don't want to change the
1427      behavior until integrated mode is the only option.  So we keep our
1428      own idea of the line number, and reset it from tok->line at each
1429      new line (which never happens inside a macro).  */
1430   if (tok->flags & BOL)
1431     lex_lineno = tok->line;
1432
1433   *value = NULL_TREE;
1434   lineno = lex_lineno;
1435   type = tok->type;
1436   switch (type)
1437     {
1438     case CPP_OPEN_BRACE:  indent_level++;  break;
1439     case CPP_CLOSE_BRACE: indent_level--;  break;
1440
1441     /* Issue this error here, where we can get at tok->val.aux.  */
1442     case CPP_OTHER:
1443       if (ISGRAPH (tok->val.aux))
1444         error ("stray '%c' in program", tok->val.aux);
1445       else
1446         error ("stray '\\%#o' in program", tok->val.aux);
1447       goto retry;
1448
1449     case CPP_DEFINED:
1450       type = CPP_NAME;
1451     case CPP_NAME:
1452       *value = get_identifier ((const char *)tok->val.node->name);
1453       break;
1454
1455     case CPP_INT:
1456     case CPP_FLOAT:
1457     case CPP_NUMBER:
1458       *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len);
1459       break;
1460
1461     case CPP_CHAR:
1462     case CPP_WCHAR:
1463       *value = lex_charconst ((const char *)tok->val.str.text,
1464                               tok->val.str.len, tok->type == CPP_WCHAR);
1465       break;
1466
1467     case CPP_STRING:
1468     case CPP_WSTRING:
1469     case CPP_OSTRING:
1470       *value = lex_string ((const char *)tok->val.str.text,
1471                            tok->val.str.len, tok->type == CPP_WSTRING);
1472       break;
1473
1474       /* These tokens should not be visible outside cpplib.  */
1475     case CPP_HEADER_NAME:
1476     case CPP_COMMENT:
1477     case CPP_MACRO_ARG:
1478     case CPP_PLACEMARKER:
1479       abort ();
1480
1481     default: break;
1482     }
1483
1484   return type;
1485
1486 #else
1487   int c;
1488   char *p;
1489   int wide_flag = 0;
1490   int objc_flag = 0;
1491   int charconst = 0;
1492
1493   *value = NULL_TREE;
1494
1495  retry:
1496   c = getch ();
1497
1498   /* Effectively do c = skip_white_space (c)
1499      but do it faster in the usual cases.  */
1500   while (1)
1501     switch (c)
1502       {
1503       case ' ':
1504       case '\t':
1505       case '\f':
1506       case '\v':
1507           c = getch ();
1508         break;
1509
1510       case '\n':
1511         c = skip_white_space (c);
1512       default:
1513         goto found_nonwhite;
1514       }
1515  found_nonwhite:
1516
1517   lineno = lex_lineno;
1518
1519   switch (c)
1520     {
1521     case EOF:
1522       return CPP_EOF;
1523
1524     case 'L':
1525       /* Capital L may start a wide-string or wide-character constant.  */
1526       {
1527         register int c1 = getch();
1528         if (c1 == '\'')
1529           {
1530             wide_flag = 1;
1531             goto char_constant;
1532           }
1533         if (c1 == '"')
1534           {
1535             wide_flag = 1;
1536             goto string_constant;
1537           }
1538         put_back (c1);
1539       }
1540       goto letter;
1541
1542     case '@':
1543       if (!doing_objc_thang)
1544         goto straychar;
1545       else
1546         {
1547           /* '@' may start a constant string object.  */
1548           register int c1 = getch ();
1549           if (c1 == '"')
1550             {
1551               objc_flag = 1;
1552               goto string_constant;
1553             }
1554           put_back (c1);
1555           /* Fall through to treat '@' as the start of an identifier.  */
1556         }
1557
1558     case 'A':  case 'B':  case 'C':  case 'D':  case 'E':
1559     case 'F':  case 'G':  case 'H':  case 'I':  case 'J':
1560     case 'K':             case 'M':  case 'N':  case 'O':
1561     case 'P':  case 'Q':  case 'R':  case 'S':  case 'T':
1562     case 'U':  case 'V':  case 'W':  case 'X':  case 'Y':
1563     case 'Z':
1564     case 'a':  case 'b':  case 'c':  case 'd':  case 'e':
1565     case 'f':  case 'g':  case 'h':  case 'i':  case 'j':
1566     case 'k':  case 'l':  case 'm':  case 'n':  case 'o':
1567     case 'p':  case 'q':  case 'r':  case 's':  case 't':
1568     case 'u':  case 'v':  case 'w':  case 'x':  case 'y':
1569     case 'z':
1570     case '_':
1571     case '$':
1572     letter:
1573       p = token_buffer;
1574       while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
1575         {
1576           /* Make sure this char really belongs in an identifier.  */
1577           if (c == '$')
1578             {
1579               if (! dollars_in_ident)
1580                 error ("'$' in identifier");
1581               else if (pedantic)
1582                 pedwarn ("'$' in identifier");
1583             }
1584
1585           if (p >= token_buffer + maxtoken)
1586             p = extend_token_buffer (p);
1587
1588           *p++ = c;
1589           c = getch();
1590         }
1591
1592       put_back (c);
1593
1594       if (p >= token_buffer + maxtoken)
1595         p = extend_token_buffer (p);
1596       *p = 0;
1597
1598       *value = get_identifier (token_buffer);
1599       return CPP_NAME;
1600
1601     case '.':
1602         {
1603           /* It's hard to preserve tokenization on '.' because
1604              it could be a symbol by itself, or it could be the
1605              start of a floating point number and cpp won't tell us.  */
1606           int c1 = getch ();
1607           if (c1 == '.')
1608             {
1609               int c2 = getch ();
1610               if (c2 == '.')
1611                 return CPP_ELLIPSIS;
1612
1613               put_back (c2);
1614               error ("parse error at '..'");
1615             }
1616           else if (c1 == '*' && c_language == clk_cplusplus)
1617             return CPP_DOT_STAR;
1618
1619           put_back (c1);
1620           if (ISDIGIT (c1))
1621             goto number;
1622         }
1623         return CPP_DOT;
1624
1625     case '0':  case '1':  case '2':  case '3':  case '4':
1626     case '5':  case '6':  case '7':  case '8':  case '9':
1627     number:
1628       p = token_buffer;
1629       /* Scan the next preprocessing number.  All C numeric constants
1630          are preprocessing numbers, but not all preprocessing numbers
1631          are valid numeric constants.  Preprocessing numbers fit the
1632          regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1633          See C99 section 6.4.8. */
1634       for (;;)
1635         {
1636           if (p >= token_buffer + maxtoken)
1637             p = extend_token_buffer (p);
1638
1639           *p++ = c;
1640           c = getch();
1641
1642           if (c == '+' || c == '-')
1643             {
1644               int d = p[-1];
1645               if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1646                 continue;
1647             }
1648           if (ISALNUM (c) || c == '_' || c == '.')
1649             continue;
1650           break;
1651         }
1652       put_back (c);
1653
1654       *value = lex_number (token_buffer, p - token_buffer);
1655       return CPP_NUMBER;
1656
1657     case '\'':
1658     char_constant:
1659     charconst = 1;
1660
1661     case '"':
1662     string_constant:
1663       {
1664         int delimiter = charconst ? '\'' : '"';
1665 #ifdef MULTIBYTE_CHARS
1666         int longest_char = local_mb_cur_max ();
1667         (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1668 #endif
1669         c = getch ();
1670         p = token_buffer + 1;
1671
1672         while (c != delimiter && c != EOF)
1673           {
1674             if (p + 2 > token_buffer + maxtoken)
1675               p = extend_token_buffer (p);
1676
1677             /* ignore_escape_flag is set for reading the filename in #line.  */
1678             if (!ignore_escape_flag && c == '\\')
1679               {
1680                 *p++ = c;
1681                 *p++ = getch ();  /* escaped character */
1682                 c = getch ();
1683                 continue;
1684               }
1685             else
1686               {
1687 #ifdef MULTIBYTE_CHARS
1688                 int i;
1689                 int char_len = -1;
1690                 for (i = 0; i < longest_char; ++i)
1691                   {
1692                     if (p + i >= token_buffer + maxtoken)
1693                       p = extend_token_buffer (p);
1694                     p[i] = c;
1695
1696                     char_len = local_mblen (p, i + 1);
1697                     if (char_len != -1)
1698                       break;
1699                     c = getch ();
1700                   }
1701                 if (char_len == -1)
1702                   {
1703                     /* Replace all except the first byte.  */
1704                     put_back (c);
1705                     for (--i; i > 0; --i)
1706                       put_back (p[i]);
1707                     char_len = 1;
1708                   }
1709                 /* mbtowc sometimes needs an extra char before accepting */
1710                 else if (char_len <= i)
1711                   put_back (c);
1712
1713                 p += char_len;
1714 #else
1715                 *p++ = c;
1716 #endif
1717                 c = getch ();
1718               }
1719           }
1720       }
1721
1722       if (charconst)
1723         {
1724           *value =  lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1725                                    wide_flag);
1726           return wide_flag ? CPP_WCHAR : CPP_CHAR;
1727         }
1728       else
1729         {
1730           *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1731                                wide_flag);
1732           return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1733         }
1734
1735     case '+':
1736     case '-':
1737     case '&':
1738     case '|':
1739     case ':':
1740     case '<':
1741     case '>':
1742     case '*':
1743     case '/':
1744     case '%':
1745     case '^':
1746     case '!':
1747     case '=':
1748       {
1749         int c1;
1750         enum cpp_ttype type = CPP_EOF;
1751
1752         switch (c)
1753           {
1754           case '+': type = CPP_PLUS;    break;
1755           case '-': type = CPP_MINUS;   break;
1756           case '&': type = CPP_AND;     break;
1757           case '|': type = CPP_OR;      break;
1758           case ':': type = CPP_COLON;   break;
1759           case '<': type = CPP_LESS;    break;
1760           case '>': type = CPP_GREATER; break;
1761           case '*': type = CPP_MULT;    break;
1762           case '/': type = CPP_DIV;     break;
1763           case '%': type = CPP_MOD;     break;
1764           case '^': type = CPP_XOR;     break;
1765           case '!': type = CPP_NOT;     break;
1766           case '=': type = CPP_EQ;      break;
1767           }
1768
1769         c1 = getch ();
1770
1771         if (c1 == '=' && type < CPP_LAST_EQ)
1772           return type + (CPP_EQ_EQ - CPP_EQ);
1773         else if (c == c1)
1774           switch (c)
1775             {
1776             case '+':   return CPP_PLUS_PLUS;
1777             case '-':   return CPP_MINUS_MINUS;
1778             case '&':   return CPP_AND_AND;
1779             case '|':   return CPP_OR_OR;
1780             case ':':
1781               if (c_language == clk_cplusplus)
1782                 return CPP_SCOPE;
1783               break;
1784
1785             case '<':   type = CPP_LSHIFT;      goto do_triad;
1786             case '>':   type = CPP_RSHIFT;      goto do_triad;
1787             }
1788         else
1789           switch (c)
1790             {
1791             case '-':
1792               if (c1 == '>')
1793                 {
1794                   if (c_language == clk_cplusplus)
1795                     {
1796                       c1 = getch ();
1797                       if (c1 == '*')
1798                         return CPP_DEREF_STAR;
1799                       put_back (c1);
1800                     }
1801                   return CPP_DEREF;
1802                 }
1803               break;
1804
1805             case '>':
1806               if (c1 == '?' && c_language == clk_cplusplus)
1807                 { type = CPP_MAX; goto do_triad; }
1808               break;
1809
1810             case '<':
1811               if (c1 == ':' && flag_digraphs)
1812                 return CPP_OPEN_SQUARE;
1813               if (c1 == '%' && flag_digraphs)
1814                 { indent_level++; return CPP_OPEN_BRACE; }
1815               if (c1 == '?' && c_language == clk_cplusplus)
1816                 { type = CPP_MIN; goto do_triad; }
1817               break;
1818
1819             case ':':
1820               if (c1 == '>' && flag_digraphs)
1821                 return CPP_CLOSE_SQUARE;
1822               break;
1823             case '%':
1824               if (c1 == '>' && flag_digraphs)
1825                 { indent_level--; return CPP_CLOSE_BRACE; }
1826               break;
1827             }
1828
1829         put_back (c1);
1830         return type;
1831
1832       do_triad:
1833         c1 = getch ();
1834         if (c1 == '=')
1835           type += (CPP_EQ_EQ - CPP_EQ);
1836         else
1837           put_back (c1);
1838         return type;
1839       }
1840
1841     case '~':                   return CPP_COMPL;
1842     case '?':                   return CPP_QUERY;
1843     case ',':                   return CPP_COMMA;
1844     case '(':                   return CPP_OPEN_PAREN;
1845     case ')':                   return CPP_CLOSE_PAREN;
1846     case '[':                   return CPP_OPEN_SQUARE;
1847     case ']':                   return CPP_CLOSE_SQUARE;
1848     case '{': indent_level++;   return CPP_OPEN_BRACE;
1849     case '}': indent_level--;   return CPP_CLOSE_BRACE;
1850     case ';':                   return CPP_SEMICOLON;
1851
1852     straychar:
1853     default:
1854       if (ISGRAPH (c))
1855         error ("stray '%c' in program", c);
1856       else
1857         error ("stray '\\%#o' in program", c);
1858       goto retry;
1859     }
1860   /* NOTREACHED */
1861 #endif
1862 }
1863
1864
1865 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1866
1867 static tree
1868 lex_number (str, len)
1869      const char *str;
1870      unsigned int len;
1871 {
1872   int base = 10;
1873   int count = 0;
1874   int largest_digit = 0;
1875   int numdigits = 0;
1876   int overflow = 0;
1877   int c;
1878   tree value;
1879   const char *p;
1880   enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1881
1882   /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1883      The code below which fills the parts array assumes that a host
1884      int is at least twice as wide as a host char, and that
1885      HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1886      Two HOST_WIDE_INTs is the largest int literal we can store.
1887      In order to detect overflow below, the number of parts (TOTAL_PARTS)
1888      must be exactly the number of parts needed to hold the bits
1889      of two HOST_WIDE_INTs. */
1890 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1891   unsigned int parts[TOTAL_PARTS];
1892
1893   /* Optimize for most frequent case.  */
1894   if (len == 1)
1895     {
1896       if (*str == '0')
1897         return integer_zero_node;
1898       else if (*str == '1')
1899         return integer_one_node;
1900       else
1901         return build_int_2 (*str - '0', 0);
1902     }
1903
1904   for (count = 0; count < TOTAL_PARTS; count++)
1905     parts[count] = 0;
1906
1907   /* len is known to be >1 at this point.  */
1908   p = str;
1909
1910   if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1911     {
1912       base = 16;
1913       p = str + 2;
1914     }
1915   /* The ISDIGIT check is so we are not confused by a suffix on 0.  */
1916   else if (str[0] == '0' && ISDIGIT (str[1]))
1917     {
1918       base = 8;
1919       p = str + 1;
1920     }
1921
1922   do
1923     {
1924       c = *p++;
1925
1926       if (c == '.')
1927         {
1928           if (base == 16 && pedantic && !flag_isoc99)
1929             pedwarn ("floating constant may not be in radix 16");
1930           else if (floatflag == AFTER_POINT)
1931             ERROR ("too many decimal points in floating constant");
1932           else if (floatflag == AFTER_EXPON)
1933             ERROR ("decimal point in exponent - impossible!");
1934           else
1935             floatflag = AFTER_POINT;
1936
1937           if (base == 8)
1938             base = 10;
1939         }
1940       else if (c == '_')
1941         /* Possible future extension: silently ignore _ in numbers,
1942            permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1943            but somewhat easier to read.  Ada has this?  */
1944         ERROR ("underscore in number");
1945       else
1946         {
1947           int n;
1948           /* It is not a decimal point.
1949              It should be a digit (perhaps a hex digit).  */
1950
1951           if (ISDIGIT (c))
1952             {
1953               n = c - '0';
1954             }
1955           else if (base <= 10 && (c == 'e' || c == 'E'))
1956             {
1957               base = 10;
1958               floatflag = AFTER_EXPON;
1959               break;
1960             }
1961           else if (base == 16 && (c == 'p' || c == 'P'))
1962             {
1963               floatflag = AFTER_EXPON;
1964               break;   /* start of exponent */
1965             }
1966           else if (base == 16 && c >= 'a' && c <= 'f')
1967             {
1968               n = c - 'a' + 10;
1969             }
1970           else if (base == 16 && c >= 'A' && c <= 'F')
1971             {
1972               n = c - 'A' + 10;
1973             }
1974           else
1975             {
1976               p--;
1977               break;  /* start of suffix */
1978             }
1979
1980           if (n >= largest_digit)
1981             largest_digit = n;
1982           numdigits++;
1983
1984           for (count = 0; count < TOTAL_PARTS; count++)
1985             {
1986               parts[count] *= base;
1987               if (count)
1988                 {
1989                   parts[count]
1990                     += (parts[count-1] >> HOST_BITS_PER_CHAR);
1991                   parts[count-1]
1992                     &= (1 << HOST_BITS_PER_CHAR) - 1;
1993                 }
1994               else
1995                 parts[0] += n;
1996             }
1997
1998           /* If the highest-order part overflows (gets larger than
1999              a host char will hold) then the whole number has
2000              overflowed.  Record this and truncate the highest-order
2001              part. */
2002           if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
2003             {
2004               overflow = 1;
2005               parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
2006             }
2007         }
2008     }
2009   while (p < str + len);
2010
2011   /* This can happen on input like `int i = 0x;' */
2012   if (numdigits == 0)
2013     ERROR ("numeric constant with no digits");
2014
2015   if (largest_digit >= base)
2016     ERROR ("numeric constant contains digits beyond the radix");
2017
2018   if (floatflag != NOT_FLOAT)
2019     {
2020       tree type;
2021       int imag, fflag, lflag, conversion_errno;
2022       REAL_VALUE_TYPE real;
2023       struct pf_args args;
2024       char *copy;
2025
2026       if (base == 16 && floatflag != AFTER_EXPON)
2027         ERROR ("hexadecimal floating constant has no exponent");
2028
2029       /* Read explicit exponent if any, and put it in tokenbuf.  */
2030       if ((base == 10 && ((c == 'e') || (c == 'E')))
2031           || (base == 16 && (c == 'p' || c == 'P')))
2032         {
2033           if (p < str + len)
2034             c = *p++;
2035           if (p < str + len && (c == '+' || c == '-'))
2036             c = *p++;
2037           /* Exponent is decimal, even if string is a hex float.  */
2038           if (! ISDIGIT (c))
2039             ERROR ("floating constant exponent has no digits");
2040           while (p < str + len && ISDIGIT (c))
2041             c = *p++;
2042           if (! ISDIGIT (c))
2043             p--;
2044         }
2045
2046       /* Copy the float constant now; we don't want any suffixes in the
2047          string passed to parse_float.  */
2048       copy = alloca (p - str + 1);
2049       memcpy (copy, str, p - str);
2050       copy[p - str] = '\0';
2051
2052       /* Now parse suffixes.  */
2053       fflag = lflag = imag = 0;
2054       while (p < str + len)
2055         switch (*p++)
2056           {
2057           case 'f': case 'F':
2058             if (fflag)
2059               ERROR ("more than one 'f' suffix on floating constant");
2060             else if (warn_traditional && !in_system_header)
2061               warning ("traditional C rejects the 'f' suffix");
2062
2063             fflag = 1;
2064             break;
2065
2066           case 'l': case 'L':
2067             if (lflag)
2068               ERROR ("more than one 'l' suffix on floating constant");
2069             else if (warn_traditional && !in_system_header)
2070               warning ("traditional C rejects the 'l' suffix");
2071
2072             lflag = 1;
2073             break;
2074
2075           case 'i': case 'I':
2076           case 'j': case 'J':
2077             if (imag)
2078               ERROR ("more than one 'i' or 'j' suffix on floating constant");
2079             else if (pedantic)
2080               pedwarn ("ISO C forbids imaginary numeric constants");
2081             imag = 1;
2082             break;
2083
2084           default:
2085             ERROR ("invalid suffix on floating constant");
2086           }
2087
2088       /* Setup input for parse_float() */
2089       args.str = copy;
2090       args.fflag = fflag;
2091       args.lflag = lflag;
2092       args.base = base;
2093
2094       /* Convert string to a double, checking for overflow.  */
2095       if (do_float_handler (parse_float, (PTR) &args))
2096         {
2097           /* Receive output from parse_float() */
2098           real = args.value;
2099         }
2100       else
2101           /* We got an exception from parse_float() */
2102           ERROR ("floating constant out of range");
2103
2104       /* Receive output from parse_float() */
2105       conversion_errno = args.conversion_errno;
2106       type = args.type;
2107
2108 #ifdef ERANGE
2109       /* ERANGE is also reported for underflow,
2110          so test the value to distinguish overflow from that.  */
2111       if (conversion_errno == ERANGE && !flag_traditional && pedantic
2112           && (REAL_VALUES_LESS (dconst1, real)
2113               || REAL_VALUES_LESS (real, dconstm1)))
2114         warning ("floating point number exceeds range of 'double'");
2115 #endif
2116
2117       /* Create a node with determined type and value.  */
2118       if (imag)
2119         value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2120                                build_real (type, real));
2121       else
2122         value = build_real (type, real);
2123     }
2124   else
2125     {
2126       tree trad_type, ansi_type, type;
2127       HOST_WIDE_INT high, low;
2128       int spec_unsigned = 0;
2129       int spec_long = 0;
2130       int spec_long_long = 0;
2131       int spec_imag = 0;
2132       int suffix_lu = 0;
2133       int warn = 0, i;
2134
2135       trad_type = ansi_type = type = NULL_TREE;
2136       while (p < str + len)
2137         {
2138           c = *p++;
2139           switch (c)
2140             {
2141             case 'u': case 'U':
2142               if (spec_unsigned)
2143                 error ("two 'u' suffixes on integer constant");
2144               else if (warn_traditional && !in_system_header)
2145                 warning ("traditional C rejects the 'u' suffix");
2146
2147               spec_unsigned = 1;
2148               if (spec_long)
2149                 suffix_lu = 1;
2150               break;
2151
2152             case 'l': case 'L':
2153               if (spec_long)
2154                 {
2155                   if (spec_long_long)
2156                     error ("three 'l' suffixes on integer constant");
2157                   else if (suffix_lu)
2158                     error ("'lul' is not a valid integer suffix");
2159                   else if (c != spec_long)
2160                     error ("'Ll' and 'lL' are not valid integer suffixes");
2161                   else if (pedantic && ! flag_isoc99
2162                            && ! in_system_header && warn_long_long)
2163                     pedwarn ("ISO C89 forbids long long integer constants");
2164                   spec_long_long = 1;
2165                 }
2166               spec_long = c;
2167               break;
2168
2169             case 'i': case 'I': case 'j': case 'J':
2170               if (spec_imag)
2171                 error ("more than one 'i' or 'j' suffix on integer constant");
2172               else if (pedantic)
2173                 pedwarn ("ISO C forbids imaginary numeric constants");
2174               spec_imag = 1;
2175               break;
2176
2177             default:
2178               ERROR ("invalid suffix on integer constant");
2179             }
2180         }
2181
2182       /* If the literal overflowed, pedwarn about it now. */
2183       if (overflow)
2184         {
2185           warn = 1;
2186           pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2187         }
2188
2189       /* This is simplified by the fact that our constant
2190          is always positive.  */
2191
2192       high = low = 0;
2193
2194       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2195         {
2196           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2197                                               / HOST_BITS_PER_CHAR)]
2198                    << (i * HOST_BITS_PER_CHAR));
2199           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2200         }
2201
2202       value = build_int_2 (low, high);
2203       TREE_TYPE (value) = long_long_unsigned_type_node;
2204
2205       /* If warn_traditional, calculate both the ISO type and the
2206          traditional type, then see if they disagree.
2207          Otherwise, calculate only the type for the dialect in use.  */
2208       if (warn_traditional || flag_traditional)
2209         {
2210           /* Calculate the traditional type.  */
2211           /* Traditionally, any constant is signed; but if unsigned is
2212              specified explicitly, obey that.  Use the smallest size
2213              with the right number of bits, except for one special
2214              case with decimal constants.  */
2215           if (! spec_long && base != 10
2216               && int_fits_type_p (value, unsigned_type_node))
2217             trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2218           /* A decimal constant must be long if it does not fit in
2219              type int.  I think this is independent of whether the
2220              constant is signed.  */
2221           else if (! spec_long && base == 10
2222                    && int_fits_type_p (value, integer_type_node))
2223             trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2224           else if (! spec_long_long)
2225             trad_type = (spec_unsigned
2226                          ? long_unsigned_type_node
2227                          : long_integer_type_node);
2228           else if (int_fits_type_p (value,
2229                                     spec_unsigned
2230                                     ? long_long_unsigned_type_node
2231                                     : long_long_integer_type_node))
2232             trad_type = (spec_unsigned
2233                          ? long_long_unsigned_type_node
2234                          : long_long_integer_type_node);
2235           else
2236             trad_type = (spec_unsigned
2237                          ? widest_unsigned_literal_type_node
2238                          : widest_integer_literal_type_node);
2239         }
2240       if (warn_traditional || ! flag_traditional)
2241         {
2242           /* Calculate the ISO type.  */
2243           if (! spec_long && ! spec_unsigned
2244               && int_fits_type_p (value, integer_type_node))
2245             ansi_type = integer_type_node;
2246           else if (! spec_long && (base != 10 || spec_unsigned)
2247                    && int_fits_type_p (value, unsigned_type_node))
2248             ansi_type = unsigned_type_node;
2249           else if (! spec_unsigned && !spec_long_long
2250                    && int_fits_type_p (value, long_integer_type_node))
2251             ansi_type = long_integer_type_node;
2252           else if (! spec_long_long
2253                    && int_fits_type_p (value, long_unsigned_type_node))
2254             ansi_type = long_unsigned_type_node;
2255           else if (! spec_unsigned
2256                    && int_fits_type_p (value, long_long_integer_type_node))
2257             ansi_type = long_long_integer_type_node;
2258           else if (int_fits_type_p (value, long_long_unsigned_type_node))
2259             ansi_type = long_long_unsigned_type_node;
2260           else if (! spec_unsigned
2261                    && int_fits_type_p (value, widest_integer_literal_type_node))
2262             ansi_type = widest_integer_literal_type_node;
2263           else
2264             ansi_type = widest_unsigned_literal_type_node;
2265         }
2266
2267       type = flag_traditional ? trad_type : ansi_type;
2268
2269       /* We assume that constants specified in a non-decimal
2270          base are bit patterns, and that the programmer really
2271          meant what they wrote.  */
2272       if (warn_traditional && !in_system_header
2273           && base == 10 && trad_type != ansi_type)
2274         {
2275           if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2276             warning ("width of integer constant changes with -traditional");
2277           else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2278             warning ("integer constant is unsigned in ISO C, signed with -traditional");
2279           else
2280             warning ("width of integer constant may change on other systems with -traditional");
2281         }
2282
2283       if (pedantic && !flag_traditional && !spec_long_long && !warn
2284           && (TYPE_PRECISION (long_integer_type_node) < TYPE_PRECISION (type)))
2285         {
2286           warn = 1;
2287           pedwarn ("integer constant larger than the maximum value of an unsigned long int");
2288         }
2289
2290       if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2291         warning ("decimal constant is so large that it is unsigned");
2292
2293       if (spec_imag)
2294         {
2295           if (TYPE_PRECISION (type)
2296               <= TYPE_PRECISION (integer_type_node))
2297             value = build_complex (NULL_TREE, integer_zero_node,
2298                                    convert (integer_type_node, value));
2299           else
2300             ERROR ("complex integer constant is too wide for 'complex int'");
2301         }
2302       else if (flag_traditional && !int_fits_type_p (value, type))
2303         /* The traditional constant 0x80000000 is signed
2304            but doesn't fit in the range of int.
2305            This will change it to -0x80000000, which does fit.  */
2306         {
2307           TREE_TYPE (value) = unsigned_type (type);
2308           value = convert (type, value);
2309           TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2310         }
2311       else
2312         TREE_TYPE (value) = type;
2313
2314       /* If it's still an integer (not a complex), and it doesn't
2315          fit in the type we choose for it, then pedwarn. */
2316
2317       if (! warn
2318           && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2319           && ! int_fits_type_p (value, TREE_TYPE (value)))
2320         pedwarn ("integer constant is larger than the maximum value for its type");
2321     }
2322
2323   if (p < str + len)
2324     error ("missing white space after number '%.*s'", (int) (p - str), str);
2325
2326   return value;
2327
2328  syntax_error:
2329   return integer_zero_node;
2330 }
2331
2332 static tree
2333 lex_string (str, len, wide)
2334      const char *str;
2335      unsigned int len;
2336      int wide;
2337 {
2338   tree value;
2339   char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2340   char *q = buf;
2341   const char *p = str, *limit = str + len;
2342   unsigned int c;
2343   unsigned width = wide ? WCHAR_TYPE_SIZE
2344                         : TYPE_PRECISION (char_type_node);
2345
2346 #ifdef MULTIBYTE_CHARS
2347   /* Reset multibyte conversion state.  */
2348   (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2349 #endif
2350
2351   while (p < limit)
2352     {
2353 #ifdef MULTIBYTE_CHARS
2354       wchar_t wc;
2355       int char_len;
2356
2357       char_len = local_mbtowc (&wc, p, limit - p);
2358       if (char_len == -1)
2359         {
2360           warning ("Ignoring invalid multibyte character");
2361           char_len = 1;
2362           c = *p++;
2363         }
2364       else
2365         {
2366           p += char_len;
2367           c = wc;
2368         }
2369 #else
2370       c = *p++;
2371 #endif
2372
2373       if (c == '\\' && !ignore_escape_flag)
2374         {
2375           p = readescape (p, limit, &c);
2376           if (width < HOST_BITS_PER_INT
2377               && (unsigned) c >= ((unsigned)1 << width))
2378             pedwarn ("escape sequence out of range for character");
2379         }
2380
2381       /* Add this single character into the buffer either as a wchar_t
2382          or as a single byte.  */
2383       if (wide)
2384         {
2385           unsigned charwidth = TYPE_PRECISION (char_type_node);
2386           unsigned bytemask = (1 << width) - 1;
2387           int byte;
2388
2389           for (byte = 0; byte < WCHAR_BYTES; ++byte)
2390             {
2391               int n;
2392               if (byte >= (int) sizeof (c))
2393                 n = 0;
2394               else
2395                 n = (c >> (byte * charwidth)) & bytemask;
2396               if (BYTES_BIG_ENDIAN)
2397                 q[WCHAR_BYTES - byte - 1] = n;
2398               else
2399                 q[byte] = n;
2400             }
2401           q += WCHAR_BYTES;
2402         }
2403       else
2404         {
2405           *q++ = c;
2406         }
2407     }
2408
2409   /* Terminate the string value, either with a single byte zero
2410      or with a wide zero.  */
2411
2412   if (wide)
2413     {
2414       memset (q, 0, WCHAR_BYTES);
2415       q += WCHAR_BYTES;
2416     }
2417   else
2418     {
2419       *q++ = '\0';
2420     }
2421
2422   value = build_string (q - buf, buf);
2423
2424   if (wide)
2425     TREE_TYPE (value) = wchar_array_type_node;
2426   else
2427     TREE_TYPE (value) = char_array_type_node;
2428   return value;
2429 }
2430
2431 static tree
2432 lex_charconst (str, len, wide)
2433      const char *str;
2434      unsigned int len;
2435      int wide;
2436 {
2437   const char *limit = str + len;
2438   int result = 0;
2439   int num_chars = 0;
2440   int chars_seen = 0;
2441   unsigned width = TYPE_PRECISION (char_type_node);
2442   int max_chars;
2443   unsigned int c;
2444   tree value;
2445
2446 #ifdef MULTIBYTE_CHARS
2447   int longest_char = local_mb_cur_max ();
2448   (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2449 #endif
2450
2451   max_chars = TYPE_PRECISION (integer_type_node) / width;
2452   if (wide)
2453     width = WCHAR_TYPE_SIZE;
2454
2455   while (str < limit)
2456     {
2457 #ifdef MULTIBYTE_CHARS
2458       wchar_t wc;
2459       int char_len;
2460
2461       char_len = local_mbtowc (&wc, str, limit - str);
2462       if (char_len == -1)
2463         {
2464           warning ("Ignoring invalid multibyte character");
2465           char_len = 1;
2466           c = *str++;
2467         }
2468       else
2469         {
2470           p += char_len;
2471           c = wc;
2472         }
2473 #else
2474       c = *str++;
2475 #endif
2476
2477       ++chars_seen;
2478       if (c == '\\')
2479         {
2480           str = readescape (str, limit, &c);
2481           if (width < HOST_BITS_PER_INT
2482               && (unsigned) c >= ((unsigned)1 << width))
2483             pedwarn ("escape sequence out of range for character");
2484         }
2485 #ifdef MAP_CHARACTER
2486       if (ISPRINT (c))
2487         c = MAP_CHARACTER (c);
2488 #endif
2489
2490       /* Merge character into result; ignore excess chars.  */
2491       num_chars += (width / TYPE_PRECISION (char_type_node));
2492       if (num_chars < max_chars + 1)
2493         {
2494           if (width < HOST_BITS_PER_INT)
2495             result = (result << width) | (c & ((1 << width) - 1));
2496           else
2497             result = c;
2498         }
2499     }
2500
2501   if (chars_seen == 0)
2502     error ("empty character constant");
2503   else if (num_chars > max_chars)
2504     {
2505       num_chars = max_chars;
2506       error ("character constant too long");
2507     }
2508   else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2509     warning ("multi-character character constant");
2510
2511   /* If char type is signed, sign-extend the constant.  */
2512   if (! wide)
2513     {
2514       int num_bits = num_chars * width;
2515       if (num_bits == 0)
2516         /* We already got an error; avoid invalid shift.  */
2517         value = build_int_2 (0, 0);
2518       else if (TREE_UNSIGNED (char_type_node)
2519                || ((result >> (num_bits - 1)) & 1) == 0)
2520         value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2521                                        >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2522                              0);
2523       else
2524         value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2525                                         >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2526                              -1);
2527       /* In C, a character constant has type 'int'; in C++, 'char'.  */
2528       if (chars_seen <= 1 && c_language == clk_cplusplus)
2529         TREE_TYPE (value) = char_type_node;
2530       else
2531         TREE_TYPE (value) = integer_type_node;
2532     }
2533   else
2534     {
2535       value = build_int_2 (result, 0);
2536       TREE_TYPE (value) = wchar_type_node;
2537     }
2538
2539   return value;
2540 }