]> gcc.gnu.org Git - gcc.git/blame - gcc/c-lex.c
simplify-rtx.c (simplify_unary_operation): Add cases FLOAT_EXTEND and FLOAT_TRUNCATE...
[gcc.git] / gcc / c-lex.c
CommitLineData
d45cf215 1/* Lexical analyzer for C and Objective C.
517cbe13
JL
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
e8bbfc4e
RK
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
940d9d63
RK
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
e8bbfc4e 21
e9a25f70 22#include "config.h"
670ee920 23#include "system.h"
e8bbfc4e 24
e8bbfc4e 25#include "rtl.h"
8f17b5c5 26#include "expr.h"
e8bbfc4e
RK
27#include "tree.h"
28#include "input.h"
d6f4ec51 29#include "output.h"
e8bbfc4e
RK
30#include "c-lex.h"
31#include "c-tree.h"
32#include "flags.h"
0e5921e8 33#include "timevar.h"
8b97c5f8 34#include "cpplib.h"
3d6f7931 35#include "c-pragma.h"
5f6da302 36#include "toplev.h"
ab87f8c8 37#include "intl.h"
1526a060 38#include "ggc.h"
7bdb32b9 39#include "tm_p.h"
0e5921e8 40#include "splay-tree.h"
ab87f8c8 41
ab87f8c8
JL
42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
e8bbfc4e
RK
48
49#ifdef MULTIBYTE_CHARS
56f48ce9 50#include "mbchar.h"
e8bbfc4e 51#include <locale.h>
56f48ce9 52#endif /* MULTIBYTE_CHARS */
c5c76735
JL
53#ifndef GET_ENVIRONMENT
54#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
55#endif
e8bbfc4e 56
0e5921e8 57#if USE_CPPLIB
c8724862 58extern cpp_reader parse_in;
e56e519d
DB
59#else
60/* Stream for reading from the input file. */
61FILE *finput;
e3d1fd32
PB
62#endif
63
0e5921e8
ZW
64/* Private idea of the line number. See discussion in c_lex(). */
65static int lex_lineno;
66
67/* We may keep statistics about how long which files took to compile. */
68static int header_time, body_time;
69static splay_tree file_info_tree;
3ab6dd7c 70
e8bbfc4e
RK
71/* Cause the `yydebug' variable to be defined. */
72#define YYDEBUG 1
73
0e5921e8 74#if !USE_CPPLIB
505e0385 75
0e5921e8
ZW
76struct putback_buffer
77{
69f21756 78 unsigned char *buffer;
505e0385
DB
79 int buffer_size;
80 int index;
81};
82
83static struct putback_buffer putback = {NULL, 0, -1};
84
6e090c76 85static inline int getch PARAMS ((void));
93868d11 86
505e0385
DB
87static inline int
88getch ()
89{
90 if (putback.index != -1)
91 {
92 int ch = putback.buffer[putback.index];
93 --putback.index;
94 return ch;
95 }
96 return getc (finput);
97}
98
6e090c76 99static inline void put_back PARAMS ((int));
93868d11 100
505e0385
DB
101static inline void
102put_back (ch)
103 int ch;
104{
105 if (ch != EOF)
106 {
107 if (putback.index == putback.buffer_size - 1)
108 {
109 putback.buffer_size += 16;
110 putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
111 }
112 putback.buffer[++putback.index] = ch;
113 }
114}
e3d1fd32 115
fbb18613
JM
116int linemode;
117
0e5921e8 118#endif
e8bbfc4e
RK
119
120/* File used for outputting assembler code. */
121extern FILE *asm_out_file;
122
12a39b12
JM
123#undef WCHAR_TYPE_SIZE
124#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
e8bbfc4e
RK
125
126/* Number of bytes in a wide character. */
127#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
128
0e5921e8 129#if !USE_CPPLIB
e8bbfc4e 130static int maxtoken; /* Current nominal length of token buffer. */
0e5921e8
ZW
131static char *token_buffer; /* Pointer to token buffer.
132 Actual allocated length is maxtoken + 2. */
133#endif
e8bbfc4e 134
0e5921e8
ZW
135int indent_level; /* Number of { minus number of }. */
136int pending_lang_change; /* If we need to switch languages - C++ only */
137int c_header_level; /* depth in C headers - C++ only */
fbb18613
JM
138
139/* Nonzero tells yylex to ignore \ in string constants. */
140static int ignore_escape_flag;
e9a25f70 141
0e5921e8
ZW
142static const char *readescape PARAMS ((const char *, const char *,
143 unsigned int *));
144static const char *read_ucs PARAMS ((const char *, const char *,
145 unsigned int *, int));
146static void parse_float PARAMS ((PTR));
147static tree lex_number PARAMS ((const char *, unsigned int));
148static tree lex_string PARAMS ((const char *, unsigned int, int));
149static tree lex_charconst PARAMS ((const char *, unsigned int, int));
150static void update_header_times PARAMS ((const char *));
151static int dump_one_header PARAMS ((splay_tree_node, void *));
e8bbfc4e 152
0e5921e8 153#if !USE_CPPLIB
6e090c76
KG
154static int skip_white_space PARAMS ((int));
155static char *extend_token_buffer PARAMS ((const char *));
6e090c76
KG
156static void extend_token_buffer_to PARAMS ((int));
157static int read_line_number PARAMS ((int *));
0e5921e8
ZW
158static void process_directive PARAMS ((void));
159#else
160static void cb_ident PARAMS ((cpp_reader *, const unsigned char *,
161 unsigned int));
162static void cb_enter_file PARAMS ((cpp_reader *));
163static void cb_leave_file PARAMS ((cpp_reader *));
164static void cb_rename_file PARAMS ((cpp_reader *));
8b97c5f8 165static void cb_def_pragma PARAMS ((cpp_reader *));
0e5921e8 166#endif
e31c7eec 167
e31c7eec 168\f
3b304f5b 169const char *
0e5921e8 170init_c_lex (filename)
3b304f5b 171 const char *filename;
e3d1fd32 172{
0e5921e8
ZW
173 struct c_fileinfo *toplevel;
174
175 /* Set up filename timing. Must happen before cpp_start_read. */
176 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
177 0,
178 (splay_tree_delete_value_fn)free);
179 toplevel = get_fileinfo ("<top level>");
180 if (flag_detailed_statistics)
181 {
182 header_time = 0;
183 body_time = get_run_time ();
184 toplevel->time = body_time;
185 }
186
187#ifdef MULTIBYTE_CHARS
188 /* Change to the native locale for multibyte conversions. */
189 setlocale (LC_CTYPE, "");
190 GET_ENVIRONMENT (literal_codeset, "LANG");
191#endif
192
e56e519d
DB
193#if !USE_CPPLIB
194 /* Open input file. */
195 if (filename == 0 || !strcmp (filename, "-"))
196 {
197 finput = stdin;
198 filename = "stdin";
199 }
200 else
201 finput = fopen (filename, "r");
202 if (finput == 0)
203 pfatal_with_name (filename);
204
205#ifdef IO_BUFFER_SIZE
206 setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
207#endif
b12da25e 208#else /* !USE_CPPLIB */
0e5921e8
ZW
209
210 parse_in.cb.ident = cb_ident;
211 parse_in.cb.enter_file = cb_enter_file;
212 parse_in.cb.leave_file = cb_leave_file;
213 parse_in.cb.rename_file = cb_rename_file;
8b97c5f8 214 parse_in.cb.def_pragma = cb_def_pragma;
0e5921e8
ZW
215
216 /* Make sure parse_in.digraphs matches flag_digraphs. */
217 CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
218
219 if (! cpp_start_read (&parse_in, 0 /* no printer */, filename))
e3d1fd32 220 abort ();
add7091b 221
b12da25e
ZW
222 if (filename == 0 || !strcmp (filename, "-"))
223 filename = "stdin";
e56e519d 224#endif
5c60e5c0 225
0e5921e8
ZW
226#if !USE_CPPLIB
227 maxtoken = 40;
228 token_buffer = (char *) xmalloc (maxtoken + 2);
229#endif
230 /* Start it at 0, because check_newline is called at the very beginning
231 and will increment it to 1. */
232 lineno = lex_lineno = 0;
b12da25e 233
5c60e5c0 234 return filename;
e3d1fd32
PB
235}
236
0e5921e8
ZW
237struct c_fileinfo *
238get_fileinfo (name)
239 const char *name;
e3d1fd32 240{
0e5921e8
ZW
241 splay_tree_node n;
242 struct c_fileinfo *fi;
243
244 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
245 if (n)
246 return (struct c_fileinfo *) n->value;
247
248 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
249 fi->time = 0;
250 fi->interface_only = 0;
251 fi->interface_unknown = 1;
252 splay_tree_insert (file_info_tree, (splay_tree_key) name,
253 (splay_tree_value) fi);
254 return fi;
e56e519d 255}
e3d1fd32 256
0e5921e8
ZW
257static void
258update_header_times (name)
259 const char *name;
e8bbfc4e 260{
0e5921e8
ZW
261 /* Changing files again. This means currently collected time
262 is charged against header time, and body time starts back at 0. */
263 if (flag_detailed_statistics)
e8bbfc4e 264 {
0e5921e8
ZW
265 int this_time = get_run_time ();
266 struct c_fileinfo *file = get_fileinfo (name);
267 header_time += this_time - body_time;
268 file->time += this_time - body_time;
269 body_time = this_time;
e8bbfc4e
RK
270 }
271}
272
0e5921e8
ZW
273static int
274dump_one_header (n, dummy)
275 splay_tree_node n;
276 void *dummy ATTRIBUTE_UNUSED;
e8bbfc4e 277{
0e5921e8
ZW
278 print_time ((const char *) n->key,
279 ((struct c_fileinfo *) n->value)->time);
280 return 0;
e8bbfc4e 281}
e8bbfc4e
RK
282
283void
0e5921e8 284dump_time_statistics ()
e8bbfc4e 285{
0e5921e8
ZW
286 struct c_fileinfo *file = get_fileinfo (input_filename);
287 int this_time = get_run_time ();
288 file->time += this_time - body_time;
289
290 fprintf (stderr, "\n******\n");
291 print_time ("header files (total)", header_time);
292 print_time ("main file (total)", this_time - body_time);
293 fprintf (stderr, "ratio = %g : 1\n",
294 (double)header_time / (double)(this_time - body_time));
295 fprintf (stderr, "\n******\n");
296
297 splay_tree_foreach (file_info_tree, dump_one_header, 0);
e8bbfc4e 298}
a6124a42 299
0e5921e8 300#if !USE_CPPLIB
a6124a42 301
e8bbfc4e
RK
302/* If C is not whitespace, return C.
303 Otherwise skip whitespace and return first nonwhite char read. */
304
305static int
306skip_white_space (c)
307 register int c;
308{
e8bbfc4e
RK
309 for (;;)
310 {
311 switch (c)
312 {
0e5921e8
ZW
313 /* There is no need to process comments, backslash-newline,
314 or \r here. None can occur in the output of cpp. */
e8bbfc4e
RK
315
316 case '\n':
fbb18613
JM
317 if (linemode)
318 {
0e5921e8 319 put_back (c);
fbb18613
JM
320 return EOF;
321 }
e8bbfc4e
RK
322 c = check_newline ();
323 break;
324
0e5921e8 325 /* Per C99, horizontal whitespace is just these four characters. */
e8bbfc4e
RK
326 case ' ':
327 case '\t':
328 case '\f':
e8bbfc4e 329 case '\v':
0e5921e8 330 c = getch ();
0dcd8cee
RS
331 break;
332
e8bbfc4e 333 case '\\':
0e5921e8
ZW
334 error ("stray '\\' in program");
335 c = getch ();
e8bbfc4e
RK
336 break;
337
338 default:
339 return (c);
340 }
341 }
342}
343
fbb18613 344/* Skips all of the white space at the current location in the input file. */
e8bbfc4e
RK
345
346void
347position_after_white_space ()
348{
349 register int c;
350
0e5921e8 351 c = getch ();
e8bbfc4e 352
0e5921e8 353 put_back (skip_white_space (c));
e8bbfc4e
RK
354}
355
356/* Make the token buffer longer, preserving the data in it.
357 P should point to just beyond the last valid character in the old buffer.
358 The value we return is a pointer to the new buffer
359 at a place corresponding to P. */
360
fbb18613
JM
361static void
362extend_token_buffer_to (size)
363 int size;
364{
365 do
366 maxtoken = maxtoken * 2 + 10;
367 while (maxtoken < size);
368 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
369}
370
e8bbfc4e
RK
371static char *
372extend_token_buffer (p)
5d5993dd 373 const char *p;
e8bbfc4e
RK
374{
375 int offset = p - token_buffer;
fbb18613 376 extend_token_buffer_to (offset);
e8bbfc4e
RK
377 return token_buffer + offset;
378}
e3d1fd32 379\f
f09db6e0 380
fbb18613
JM
381static int
382read_line_number (num)
383 int *num;
384{
0e5921e8
ZW
385 tree value;
386 enum cpp_ttype token = c_lex (&value);
fbb18613 387
0e5921e8 388 if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
fbb18613 389 {
0e5921e8 390 *num = TREE_INT_CST_LOW (value);
fbb18613
JM
391 return 1;
392 }
393 else
394 {
0e5921e8 395 if (token != CPP_EOF)
fbb18613
JM
396 error ("invalid #-line");
397 return 0;
398 }
399}
0e5921e8 400
e8bbfc4e
RK
401/* At the beginning of a line, increment the line number
402 and process any #-directive on this line.
403 If the line is a #-directive, read the entire line and return a newline.
0e5921e8 404 Otherwise, return the line's first non-whitespace character. */
e8bbfc4e
RK
405
406int
407check_newline ()
408{
409 register int c;
e8bbfc4e 410
0e5921e8
ZW
411 /* Loop till we get a nonblank, non-directive line. */
412 for (;;)
e8bbfc4e 413 {
0e5921e8
ZW
414 /* Read first nonwhite char on the line. */
415 do
416 c = getch ();
417 while (c == ' ' || c == '\t');
418
419 lex_lineno++;
420 if (c == '#')
421 {
422 process_directive ();
423 return '\n';
424 }
fbb18613 425
0e5921e8
ZW
426 else if (c != '\n')
427 break;
e8bbfc4e 428 }
0e5921e8
ZW
429 return c;
430}
e8bbfc4e 431
0e5921e8
ZW
432static void
433process_directive ()
434{
435 enum cpp_ttype token;
436 tree value;
437 int saw_line;
438 enum { act_none, act_push, act_pop } action;
439 int action_number, l;
440 char *new_file;
441#ifndef NO_IMPLICIT_EXTERN_C
ae54392b 442 int entering_c_header = 0;
0e5921e8
ZW
443#endif
444
fbb18613
JM
445 /* Don't read beyond this line. */
446 saw_line = 0;
447 linemode = 1;
448
0e5921e8 449 token = c_lex (&value);
e8bbfc4e 450
0e5921e8 451 if (token == CPP_NAME)
e8bbfc4e 452 {
fbb18613
JM
453 /* If a letter follows, then if the word here is `line', skip
454 it and ignore it; otherwise, ignore the line, with an error
455 if the word isn't `pragma'. */
75cb8865 456
0e5921e8 457 const char *name = IDENTIFIER_POINTER (value);
75cb8865 458
fbb18613
JM
459 if (!strcmp (name, "pragma"))
460 {
0e5921e8 461 dispatch_pragma ();
30acbc3e 462 goto skipline;
e8bbfc4e 463 }
fbb18613 464 else if (!strcmp (name, "define"))
e8bbfc4e 465 {
0e5921e8 466 debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
fbb18613 467 goto skipline;
e8bbfc4e 468 }
fbb18613 469 else if (!strcmp (name, "undef"))
e8bbfc4e 470 {
0e5921e8 471 debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
fbb18613 472 goto skipline;
e8bbfc4e 473 }
fbb18613 474 else if (!strcmp (name, "line"))
e8bbfc4e 475 {
fbb18613 476 saw_line = 1;
0e5921e8 477 token = c_lex (&value);
fbb18613 478 goto linenum;
e8bbfc4e 479 }
fbb18613 480 else if (!strcmp (name, "ident"))
e8bbfc4e 481 {
0e5921e8
ZW
482 /* #ident. We expect a string constant here.
483 The pedantic warning and syntax error are now in cpp. */
e8bbfc4e 484
0e5921e8
ZW
485 token = c_lex (&value);
486 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
fbb18613 487 goto skipline;
e8bbfc4e 488
0e5921e8 489#ifdef ASM_OUTPUT_IDENT
fbb18613
JM
490 if (! flag_no_ident)
491 {
0e5921e8 492 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
e8bbfc4e 493 }
0e5921e8 494#endif
fbb18613
JM
495
496 /* Skip the rest of this line. */
497 goto skipline;
e8bbfc4e
RK
498 }
499
fbb18613 500 error ("undefined or invalid # directive `%s'", name);
e8bbfc4e
RK
501 goto skipline;
502 }
503
fbb18613
JM
504 /* If the # is the only nonwhite char on the line,
505 just ignore it. Check the new newline. */
0e5921e8 506 if (token == CPP_EOF)
fbb18613
JM
507 goto skipline;
508
e8bbfc4e
RK
509linenum:
510 /* Here we have either `#line' or `# <nonletter>'.
511 In either case, it should be a line number; a digit should follow. */
512
0e5921e8 513 if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
fbb18613
JM
514 {
515 error ("invalid #-line");
516 goto skipline;
517 }
e8bbfc4e 518
fbb18613
JM
519 /* subtract one, because it is the following line that
520 gets the specified number */
e8bbfc4e 521
0e5921e8 522 l = TREE_INT_CST_LOW (value) - 1;
e8bbfc4e 523
fbb18613
JM
524 /* More follows: it must be a string constant (filename).
525 It would be neat to use cpplib to quickly process the string, but
526 (1) we don't have a handy tokenization of the string, and
527 (2) I don't know how well that would work in the presense
528 of filenames that contain wide characters. */
e8bbfc4e 529
fbb18613 530 if (saw_line)
e8bbfc4e 531 {
fbb18613
JM
532 /* Don't treat \ as special if we are processing #line 1 "...".
533 If you want it to be treated specially, use # 1 "...". */
534 ignore_escape_flag = 1;
535 }
e8bbfc4e 536
fbb18613 537 /* Read the string constant. */
0e5921e8 538 token = c_lex (&value);
e8bbfc4e 539
fbb18613 540 ignore_escape_flag = 0;
e8bbfc4e 541
0e5921e8 542 if (token == CPP_EOF)
fbb18613
JM
543 {
544 /* No more: store the line number and check following line. */
0e5921e8 545 lex_lineno = l;
fbb18613
JM
546 goto skipline;
547 }
e8bbfc4e 548
0e5921e8 549 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
fbb18613
JM
550 {
551 error ("invalid #line");
552 goto skipline;
553 }
e8bbfc4e 554
0e5921e8 555 new_file = TREE_STRING_POINTER (value);
e8bbfc4e 556
fbb18613 557 if (main_input_filename == 0)
0e5921e8 558 main_input_filename = new_file;
e8bbfc4e 559
fbb18613
JM
560 action = act_none;
561 action_number = 0;
e8bbfc4e 562
fbb18613
JM
563 /* Each change of file name
564 reinitializes whether we are now in a system header. */
565 in_system_header = 0;
e8bbfc4e 566
fbb18613
JM
567 if (!read_line_number (&action_number))
568 {
569 /* Update the name in the top element of input_file_stack. */
5b450ae5
RS
570 if (input_file_stack)
571 input_file_stack->name = input_filename;
fbb18613 572 }
5b450ae5 573
fbb18613
JM
574 /* `1' after file name means entering new file.
575 `2' after file name means just left a file. */
e8bbfc4e 576
fbb18613
JM
577 if (action_number == 1)
578 {
579 action = act_push;
580 read_line_number (&action_number);
581 }
582 else if (action_number == 2)
583 {
584 action = act_pop;
585 read_line_number (&action_number);
586 }
587 if (action_number == 3)
588 {
e8bbfc4e 589 /* `3' after file name means this is a system header file. */
fbb18613
JM
590 in_system_header = 1;
591 read_line_number (&action_number);
592 }
0e5921e8
ZW
593#ifndef NO_IMPLICIT_EXTERN_C
594 if (action_number == 4)
595 {
596 /* `4' after file name means this is a C header file. */
597 entering_c_header = 1;
598 read_line_number (&action_number);
599 }
600#endif
e8bbfc4e 601
fbb18613 602 /* Do the actions implied by the preceding numbers. */
fbb18613
JM
603 if (action == act_push)
604 {
0e5921e8
ZW
605 lineno = lex_lineno;
606 push_srcloc (input_filename, 1);
607 input_file_stack->indent_level = indent_level;
fbb18613 608 debug_start_source_file (input_filename);
0e5921e8
ZW
609#ifndef NO_IMPLICIT_EXTERN_C
610 if (c_header_level)
611 ++c_header_level;
612 else if (entering_c_header)
613 {
614 c_header_level = 1;
615 ++pending_lang_change;
616 }
617#endif
fbb18613
JM
618 }
619 else if (action == act_pop)
620 {
621 /* Popping out of a file. */
622 if (input_file_stack->next)
0468bc75 623 {
0e5921e8
ZW
624#ifndef NO_IMPLICIT_EXTERN_C
625 if (c_header_level && --c_header_level == 0)
626 {
627 if (entering_c_header)
628 warning ("badly nested C headers from preprocessor");
629 --pending_lang_change;
630 }
631#endif
632#if 0
633 if (indent_level != input_file_stack->indent_level)
fbb18613
JM
634 {
635 warning_with_file_and_line
0e5921e8
ZW
636 (input_filename, lex_lineno,
637 "This file contains more '%c's than '%c's.",
638 indent_level > input_file_stack->indent_level ? '{' : '}',
639 indent_level > input_file_stack->indent_level ? '}' : '{');
fbb18613 640 }
0e5921e8
ZW
641#endif
642 pop_srcloc ();
fbb18613 643 debug_end_source_file (input_file_stack->line);
0468bc75 644 }
fbb18613
JM
645 else
646 error ("#-lines for entering and leaving files don't match");
e8bbfc4e 647 }
fbb18613 648
0e5921e8
ZW
649 update_header_times (new_file);
650
651 input_filename = new_file;
652 lex_lineno = l;
653
654 /* Hook for C++. */
655 extract_interface_info ();
e8bbfc4e
RK
656
657 /* skip the rest of this line. */
658 skipline:
fbb18613 659 linemode = 0;
fbb18613 660
0e5921e8 661 while (getch () != '\n');
e8bbfc4e 662}
0e5921e8 663#else /* USE_CPPLIB */
ca5b800a 664
0e5921e8
ZW
665/* Not yet handled: #pragma, #define, #undef.
666 No need to deal with linemarkers under normal conditions. */
ca5b800a 667
0e5921e8
ZW
668static void
669cb_ident (pfile, str, len)
670 cpp_reader *pfile ATTRIBUTE_UNUSED;
671 const unsigned char *str;
672 unsigned int len;
673{
674#ifdef ASM_OUTPUT_IDENT
675 if (! flag_no_ident)
676 {
677 /* Convert escapes in the string. */
678 tree value = lex_string ((const char *)str, len, 0);
679 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
680 }
681#endif
682}
ca5b800a 683
0e5921e8
ZW
684static void
685cb_enter_file (pfile)
686 cpp_reader *pfile;
ca5b800a 687{
0e5921e8
ZW
688 cpp_buffer *ip = CPP_BUFFER (pfile);
689 /* Bleah, need a better interface to this. */
690 const char *flags = cpp_syshdr_flags (pfile, ip);
691
692 /* Mustn't stack the main buffer on the input stack. (Ick.) */
693 if (ip->prev)
694 {
695 lex_lineno = lineno = ip->prev->lineno - 1;
696 push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1);
697 input_file_stack->indent_level = indent_level;
698 debug_start_source_file (ip->nominal_fname);
699 }
700 else
701 lex_lineno = 1;
702
703 update_header_times (ip->nominal_fname);
704
705 /* Hook for C++. */
706 extract_interface_info ();
707
708 in_system_header = (flags[0] != 0);
709#ifndef NO_IMPLICIT_EXTERN_C
710 if (c_header_level)
711 ++c_header_level;
712 else if (flags[2] != 0)
713 {
714 c_header_level = 1;
715 ++pending_lang_change;
716 }
717#endif
718}
719
720static void
721cb_leave_file (pfile)
722 cpp_reader *pfile;
723{
724 /* Bleah, need a better interface to this. */
725 const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
726#if 0
727 if (indent_level != input_file_stack->indent_level)
728 {
729 warning_with_file_and_line
730 (input_filename, lex_lineno,
731 "This file contains more '%c's than '%c's.",
732 indent_level > input_file_stack->indent_level ? '{' : '}',
733 indent_level > input_file_stack->indent_level ? '}' : '{');
734 }
735#endif
736 /* We get called for the main buffer, but we mustn't pop it. */
737 if (input_file_stack->next)
738 pop_srcloc ();
739 in_system_header = (flags[0] != 0);
740#ifndef NO_IMPLICIT_EXTERN_C
741 if (c_header_level && --c_header_level == 0)
742 {
743 if (flags[2] != 0)
744 warning ("badly nested C headers from preprocessor");
745 --pending_lang_change;
746 }
747#endif
748 lex_lineno = CPP_BUFFER (pfile)->lineno;
749 debug_end_source_file (input_file_stack->line);
750
751 update_header_times (input_file_stack->name);
752 /* Hook for C++. */
753 extract_interface_info ();
754}
755
756static void
757cb_rename_file (pfile)
758 cpp_reader *pfile;
759{
760 cpp_buffer *ip = CPP_BUFFER (pfile);
761 /* Bleah, need a better interface to this. */
762 const char *flags = cpp_syshdr_flags (pfile, ip);
763 input_filename = ggc_alloc_string (ip->nominal_fname, -1);
764 lex_lineno = ip->lineno;
765 in_system_header = (flags[0] != 0);
766
767 update_header_times (ip->nominal_fname);
768 /* Hook for C++. */
769 extract_interface_info ();
770}
8b97c5f8
ZW
771
772static void
773cb_def_pragma (pfile)
774 cpp_reader *pfile;
775{
776 /* Issue a warning message if we have been asked to do so. Ignore
777 unknown pragmas in system headers unless an explicit
778 -Wunknown-pragmas has been given. */
779 if (warn_unknown_pragmas > in_system_header)
780 {
781 const unsigned char *space, *name;
782 const cpp_token *t = pfile->first_directive_token + 2;
783
784 space = t[0].val.node->name;
785 name = t[1].type == CPP_NAME ? t[1].val.node->name : 0;
786 if (name)
787 warning ("ignoring #pragma %s %s", space, name);
788 else
789 warning ("ignoring #pragma %s", space);
790 }
791}
0e5921e8
ZW
792#endif /* USE_CPPLIB */
793
794/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
795
796 [lex.charset]: The character designated by the universal-character-name
797 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
798 is NNNNNNNN; the character designated by the universal-character-name
799 \uNNNN is that character whose character short name in ISO/IEC 10646 is
800 0000NNNN. If the hexadecimal value for a universal character name is
801 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
802 universal character name designates a character in the basic source
803 character set, then the program is ill-formed.
804
805 We assume that wchar_t is Unicode, so we don't need to do any
806 mapping. Is this ever wrong? */
807
808static const char *
809read_ucs (p, limit, cptr, length)
810 const char *p;
811 const char *limit;
812 unsigned int *cptr;
813 int length;
814{
815 unsigned int code = 0;
816 int c;
817
818 for (; length; --length)
ca5b800a 819 {
0e5921e8 820 if (p >= limit)
a3100298 821 {
0e5921e8 822 error ("incomplete universal-character-name");
a3100298 823 break;
0e5921e8 824 }
fbb18613 825
0e5921e8
ZW
826 c = *p++;
827 if (! ISXDIGIT (c))
828 {
829 error ("non hex digit '%c' in universal-character-name", c);
830 p--;
831 break;
a3100298 832 }
f09db6e0 833
0e5921e8
ZW
834 code <<= 4;
835 if (c >= 'a' && c <= 'f')
836 code += c - 'a' + 10;
837 if (c >= 'A' && c <= 'F')
838 code += c - 'A' + 10;
839 if (c >= '0' && c <= '9')
840 code += c - '0';
ca5b800a 841 }
ca5b800a 842
0e5921e8
ZW
843#ifdef TARGET_EBCDIC
844 sorry ("universal-character-name on EBCDIC target");
845 *cptr = 0x3f; /* EBCDIC invalid character */
846 return p;
847#endif
848
849 if (code > 0x9f && !(code & 0x80000000))
850 /* True extended character, OK. */;
851 else if (code >= 0x20 && code < 0x7f)
852 {
853 /* ASCII printable character. The C character set consists of all of
854 these except $, @ and `. We use hex escapes so that this also
855 works with EBCDIC hosts. */
856 if (code != 0x24 && code != 0x40 && code != 0x60)
857 error ("universal-character-name used for '%c'", code);
858 }
859 else
860 error ("invalid universal-character-name");
861
862 *cptr = code;
863 return p;
864}
e8bbfc4e 865
0e5921e8
ZW
866/* Read an escape sequence and write its character equivalent into *CPTR.
867 P is the input pointer, which is just after the backslash. LIMIT
868 is how much text we have.
869 Returns the updated input pointer. */
e8bbfc4e 870
0e5921e8
ZW
871static const char *
872readescape (p, limit, cptr)
873 const char *p;
874 const char *limit;
875 unsigned int *cptr;
e8bbfc4e 876{
0e5921e8 877 unsigned int c, code, count;
1c7b145e 878 unsigned firstdig = 0;
8696da34 879 int nonnull;
e8bbfc4e 880
0e5921e8
ZW
881 if (p == limit)
882 {
883 /* cpp has already issued an error for this. */
884 *cptr = 0;
885 return p;
886 }
887
888 c = *p++;
889
e8bbfc4e
RK
890 switch (c)
891 {
892 case 'x':
cde6e684 893 if (warn_traditional && !in_system_header)
e8bbfc4e
RK
894 warning ("the meaning of `\\x' varies with -traditional");
895
896 if (flag_traditional)
0e5921e8
ZW
897 {
898 *cptr = 'x';
899 return p;
900 }
e8bbfc4e
RK
901
902 code = 0;
903 count = 0;
8696da34 904 nonnull = 0;
0e5921e8 905 while (p < limit)
e8bbfc4e 906 {
0e5921e8 907 c = *p++;
fbb18613 908 if (! ISXDIGIT (c))
e8bbfc4e 909 {
0e5921e8 910 p--;
e8bbfc4e
RK
911 break;
912 }
913 code *= 16;
914 if (c >= 'a' && c <= 'f')
915 code += c - 'a' + 10;
916 if (c >= 'A' && c <= 'F')
917 code += c - 'A' + 10;
918 if (c >= '0' && c <= '9')
919 code += c - '0';
8696da34
RS
920 if (code != 0 || count != 0)
921 {
922 if (count == 0)
923 firstdig = code;
924 count++;
925 }
926 nonnull = 1;
e8bbfc4e 927 }
8696da34 928 if (! nonnull)
4082292a
AO
929 {
930 warning ("\\x used with no following hex digits");
0e5921e8
ZW
931 *cptr = 'x';
932 return p;
4082292a 933 }
be63d912
RS
934 else if (count == 0)
935 /* Digits are all 0's. Ok. */
936 ;
e8bbfc4e
RK
937 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
938 || (count > 1
fbb18613
JM
939 && (((unsigned)1
940 << (TYPE_PRECISION (integer_type_node)
941 - (count - 1) * 4))
e8bbfc4e
RK
942 <= firstdig)))
943 pedwarn ("hex escape out of range");
0e5921e8
ZW
944 *cptr = code;
945 return p;
e8bbfc4e
RK
946
947 case '0': case '1': case '2': case '3': case '4':
948 case '5': case '6': case '7':
949 code = 0;
0e5921e8 950 for (count = 0; count < 3; count++)
e8bbfc4e 951 {
0e5921e8
ZW
952 if (c < '0' || c > '7')
953 {
954 p--;
955 break;
956 }
e8bbfc4e 957 code = (code * 8) + (c - '0');
0e5921e8
ZW
958 if (p == limit)
959 break;
960 c = *p++;
e8bbfc4e 961 }
e8bbfc4e 962
0e5921e8
ZW
963 if (count == 3)
964 p--;
e8bbfc4e 965
0e5921e8
ZW
966 *cptr = code;
967 return p;
e8bbfc4e 968
0e5921e8
ZW
969 case '\\': case '\'': case '"': case '?':
970 *cptr = c;
971 return p;
e8bbfc4e 972
0e5921e8
ZW
973 case 'n': *cptr = TARGET_NEWLINE; return p;
974 case 't': *cptr = TARGET_TAB; return p;
975 case 'r': *cptr = TARGET_CR; return p;
976 case 'f': *cptr = TARGET_FF; return p;
977 case 'b': *cptr = TARGET_BS; return p;
978 case 'v': *cptr = TARGET_VT; return p;
979 case 'a':
980 if (warn_traditional && !in_system_header)
981 warning ("the meaning of '\\a' varies with -traditional");
982 *cptr = flag_traditional ? c : TARGET_BELL;
983 return p;
e8bbfc4e 984
0e5921e8
ZW
985 /* Warnings and support checks handled by read_ucs(). */
986 case 'u': case 'U':
987 if (c_language != clk_cplusplus && !flag_isoc99)
988 break;
e8bbfc4e 989
cde6e684 990 if (warn_traditional && !in_system_header)
0e5921e8 991 warning ("the meaning of '\\%c' varies with -traditional", c);
e8bbfc4e 992
0e5921e8
ZW
993 return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
994
995 case 'e': case 'E':
dad112ca 996 if (pedantic)
0e5921e8
ZW
997 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
998 *cptr = TARGET_ESC; return p;
e8bbfc4e 999
0e5921e8
ZW
1000 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1001 '\%' is used to prevent SCCS from getting confused. */
1002 case '(': case '{': case '[': case '%':
e8bbfc4e 1003 if (pedantic)
0e5921e8
ZW
1004 pedwarn ("unknown escape sequence '\\%c'", c);
1005 *cptr = c;
1006 return p;
e8bbfc4e 1007 }
0e5921e8 1008
fbb18613 1009 if (ISGRAPH (c))
0e5921e8 1010 pedwarn ("unknown escape sequence '\\%c'", c);
e8bbfc4e 1011 else
0e5921e8
ZW
1012 pedwarn ("unknown escape sequence: '\\' followed by char 0x%.2x", c);
1013
1014 *cptr = c;
1015 return p;
e8bbfc4e 1016}
0e5921e8
ZW
1017
1018#if 0 /* not yet */
1019/* Returns nonzero if C is a universal-character-name. Give an error if it
1020 is not one which may appear in an identifier, as per [extendid].
1021
1022 Note that extended character support in identifiers has not yet been
1023 implemented. It is my personal opinion that this is not a desirable
1024 feature. Portable code cannot count on support for more than the basic
1025 identifier character set. */
1026
1027static inline int
1028is_extended_char (c)
1029 int c;
1030{
1031#ifdef TARGET_EBCDIC
1032 return 0;
1033#else
1034 /* ASCII. */
1035 if (c < 0x7f)
1036 return 0;
1037
1038 /* None of the valid chars are outside the Basic Multilingual Plane (the
1039 low 16 bits). */
1040 if (c > 0xffff)
1041 {
1042 error ("universal-character-name '\\U%08x' not valid in identifier", c);
1043 return 1;
1044 }
1045
1046 /* Latin */
1047 if ((c >= 0x00c0 && c <= 0x00d6)
1048 || (c >= 0x00d8 && c <= 0x00f6)
1049 || (c >= 0x00f8 && c <= 0x01f5)
1050 || (c >= 0x01fa && c <= 0x0217)
1051 || (c >= 0x0250 && c <= 0x02a8)
1052 || (c >= 0x1e00 && c <= 0x1e9a)
1053 || (c >= 0x1ea0 && c <= 0x1ef9))
1054 return 1;
1055
1056 /* Greek */
1057 if ((c == 0x0384)
1058 || (c >= 0x0388 && c <= 0x038a)
1059 || (c == 0x038c)
1060 || (c >= 0x038e && c <= 0x03a1)
1061 || (c >= 0x03a3 && c <= 0x03ce)
1062 || (c >= 0x03d0 && c <= 0x03d6)
1063 || (c == 0x03da)
1064 || (c == 0x03dc)
1065 || (c == 0x03de)
1066 || (c == 0x03e0)
1067 || (c >= 0x03e2 && c <= 0x03f3)
1068 || (c >= 0x1f00 && c <= 0x1f15)
1069 || (c >= 0x1f18 && c <= 0x1f1d)
1070 || (c >= 0x1f20 && c <= 0x1f45)
1071 || (c >= 0x1f48 && c <= 0x1f4d)
1072 || (c >= 0x1f50 && c <= 0x1f57)
1073 || (c == 0x1f59)
1074 || (c == 0x1f5b)
1075 || (c == 0x1f5d)
1076 || (c >= 0x1f5f && c <= 0x1f7d)
1077 || (c >= 0x1f80 && c <= 0x1fb4)
1078 || (c >= 0x1fb6 && c <= 0x1fbc)
1079 || (c >= 0x1fc2 && c <= 0x1fc4)
1080 || (c >= 0x1fc6 && c <= 0x1fcc)
1081 || (c >= 0x1fd0 && c <= 0x1fd3)
1082 || (c >= 0x1fd6 && c <= 0x1fdb)
1083 || (c >= 0x1fe0 && c <= 0x1fec)
1084 || (c >= 0x1ff2 && c <= 0x1ff4)
1085 || (c >= 0x1ff6 && c <= 0x1ffc))
1086 return 1;
1087
1088 /* Cyrillic */
1089 if ((c >= 0x0401 && c <= 0x040d)
1090 || (c >= 0x040f && c <= 0x044f)
1091 || (c >= 0x0451 && c <= 0x045c)
1092 || (c >= 0x045e && c <= 0x0481)
1093 || (c >= 0x0490 && c <= 0x04c4)
1094 || (c >= 0x04c7 && c <= 0x04c8)
1095 || (c >= 0x04cb && c <= 0x04cc)
1096 || (c >= 0x04d0 && c <= 0x04eb)
1097 || (c >= 0x04ee && c <= 0x04f5)
1098 || (c >= 0x04f8 && c <= 0x04f9))
1099 return 1;
1100
1101 /* Armenian */
1102 if ((c >= 0x0531 && c <= 0x0556)
1103 || (c >= 0x0561 && c <= 0x0587))
1104 return 1;
1105
1106 /* Hebrew */
1107 if ((c >= 0x05d0 && c <= 0x05ea)
1108 || (c >= 0x05f0 && c <= 0x05f4))
1109 return 1;
1110
1111 /* Arabic */
1112 if ((c >= 0x0621 && c <= 0x063a)
1113 || (c >= 0x0640 && c <= 0x0652)
1114 || (c >= 0x0670 && c <= 0x06b7)
1115 || (c >= 0x06ba && c <= 0x06be)
1116 || (c >= 0x06c0 && c <= 0x06ce)
1117 || (c >= 0x06e5 && c <= 0x06e7))
1118 return 1;
1119
1120 /* Devanagari */
1121 if ((c >= 0x0905 && c <= 0x0939)
1122 || (c >= 0x0958 && c <= 0x0962))
1123 return 1;
1124
1125 /* Bengali */
1126 if ((c >= 0x0985 && c <= 0x098c)
1127 || (c >= 0x098f && c <= 0x0990)
1128 || (c >= 0x0993 && c <= 0x09a8)
1129 || (c >= 0x09aa && c <= 0x09b0)
1130 || (c == 0x09b2)
1131 || (c >= 0x09b6 && c <= 0x09b9)
1132 || (c >= 0x09dc && c <= 0x09dd)
1133 || (c >= 0x09df && c <= 0x09e1)
1134 || (c >= 0x09f0 && c <= 0x09f1))
1135 return 1;
1136
1137 /* Gurmukhi */
1138 if ((c >= 0x0a05 && c <= 0x0a0a)
1139 || (c >= 0x0a0f && c <= 0x0a10)
1140 || (c >= 0x0a13 && c <= 0x0a28)
1141 || (c >= 0x0a2a && c <= 0x0a30)
1142 || (c >= 0x0a32 && c <= 0x0a33)
1143 || (c >= 0x0a35 && c <= 0x0a36)
1144 || (c >= 0x0a38 && c <= 0x0a39)
1145 || (c >= 0x0a59 && c <= 0x0a5c)
1146 || (c == 0x0a5e))
1147 return 1;
1148
1149 /* Gujarati */
1150 if ((c >= 0x0a85 && c <= 0x0a8b)
1151 || (c == 0x0a8d)
1152 || (c >= 0x0a8f && c <= 0x0a91)
1153 || (c >= 0x0a93 && c <= 0x0aa8)
1154 || (c >= 0x0aaa && c <= 0x0ab0)
1155 || (c >= 0x0ab2 && c <= 0x0ab3)
1156 || (c >= 0x0ab5 && c <= 0x0ab9)
1157 || (c == 0x0ae0))
1158 return 1;
1159
1160 /* Oriya */
1161 if ((c >= 0x0b05 && c <= 0x0b0c)
1162 || (c >= 0x0b0f && c <= 0x0b10)
1163 || (c >= 0x0b13 && c <= 0x0b28)
1164 || (c >= 0x0b2a && c <= 0x0b30)
1165 || (c >= 0x0b32 && c <= 0x0b33)
1166 || (c >= 0x0b36 && c <= 0x0b39)
1167 || (c >= 0x0b5c && c <= 0x0b5d)
1168 || (c >= 0x0b5f && c <= 0x0b61))
1169 return 1;
1170
1171 /* Tamil */
1172 if ((c >= 0x0b85 && c <= 0x0b8a)
1173 || (c >= 0x0b8e && c <= 0x0b90)
1174 || (c >= 0x0b92 && c <= 0x0b95)
1175 || (c >= 0x0b99 && c <= 0x0b9a)
1176 || (c == 0x0b9c)
1177 || (c >= 0x0b9e && c <= 0x0b9f)
1178 || (c >= 0x0ba3 && c <= 0x0ba4)
1179 || (c >= 0x0ba8 && c <= 0x0baa)
1180 || (c >= 0x0bae && c <= 0x0bb5)
1181 || (c >= 0x0bb7 && c <= 0x0bb9))
1182 return 1;
1183
1184 /* Telugu */
1185 if ((c >= 0x0c05 && c <= 0x0c0c)
1186 || (c >= 0x0c0e && c <= 0x0c10)
1187 || (c >= 0x0c12 && c <= 0x0c28)
1188 || (c >= 0x0c2a && c <= 0x0c33)
1189 || (c >= 0x0c35 && c <= 0x0c39)
1190 || (c >= 0x0c60 && c <= 0x0c61))
1191 return 1;
1192
1193 /* Kannada */
1194 if ((c >= 0x0c85 && c <= 0x0c8c)
1195 || (c >= 0x0c8e && c <= 0x0c90)
1196 || (c >= 0x0c92 && c <= 0x0ca8)
1197 || (c >= 0x0caa && c <= 0x0cb3)
1198 || (c >= 0x0cb5 && c <= 0x0cb9)
1199 || (c >= 0x0ce0 && c <= 0x0ce1))
1200 return 1;
1201
1202 /* Malayalam */
1203 if ((c >= 0x0d05 && c <= 0x0d0c)
1204 || (c >= 0x0d0e && c <= 0x0d10)
1205 || (c >= 0x0d12 && c <= 0x0d28)
1206 || (c >= 0x0d2a && c <= 0x0d39)
1207 || (c >= 0x0d60 && c <= 0x0d61))
1208 return 1;
1209
1210 /* Thai */
1211 if ((c >= 0x0e01 && c <= 0x0e30)
1212 || (c >= 0x0e32 && c <= 0x0e33)
1213 || (c >= 0x0e40 && c <= 0x0e46)
1214 || (c >= 0x0e4f && c <= 0x0e5b))
1215 return 1;
1216
1217 /* Lao */
1218 if ((c >= 0x0e81 && c <= 0x0e82)
1219 || (c == 0x0e84)
1220 || (c == 0x0e87)
1221 || (c == 0x0e88)
1222 || (c == 0x0e8a)
1223 || (c == 0x0e0d)
1224 || (c >= 0x0e94 && c <= 0x0e97)
1225 || (c >= 0x0e99 && c <= 0x0e9f)
1226 || (c >= 0x0ea1 && c <= 0x0ea3)
1227 || (c == 0x0ea5)
1228 || (c == 0x0ea7)
1229 || (c == 0x0eaa)
1230 || (c == 0x0eab)
1231 || (c >= 0x0ead && c <= 0x0eb0)
1232 || (c == 0x0eb2)
1233 || (c == 0x0eb3)
1234 || (c == 0x0ebd)
1235 || (c >= 0x0ec0 && c <= 0x0ec4)
1236 || (c == 0x0ec6))
1237 return 1;
1238
1239 /* Georgian */
1240 if ((c >= 0x10a0 && c <= 0x10c5)
1241 || (c >= 0x10d0 && c <= 0x10f6))
1242 return 1;
1243
1244 /* Hiragana */
1245 if ((c >= 0x3041 && c <= 0x3094)
1246 || (c >= 0x309b && c <= 0x309e))
1247 return 1;
1248
1249 /* Katakana */
1250 if ((c >= 0x30a1 && c <= 0x30fe))
1251 return 1;
1252
1253 /* Bopmofo */
1254 if ((c >= 0x3105 && c <= 0x312c))
1255 return 1;
1256
1257 /* Hangul */
1258 if ((c >= 0x1100 && c <= 0x1159)
1259 || (c >= 0x1161 && c <= 0x11a2)
1260 || (c >= 0x11a8 && c <= 0x11f9))
1261 return 1;
1262
1263 /* CJK Unified Ideographs */
1264 if ((c >= 0xf900 && c <= 0xfa2d)
1265 || (c >= 0xfb1f && c <= 0xfb36)
1266 || (c >= 0xfb38 && c <= 0xfb3c)
1267 || (c == 0xfb3e)
1268 || (c >= 0xfb40 && c <= 0xfb41)
1269 || (c >= 0xfb42 && c <= 0xfb44)
1270 || (c >= 0xfb46 && c <= 0xfbb1)
1271 || (c >= 0xfbd3 && c <= 0xfd3f)
1272 || (c >= 0xfd50 && c <= 0xfd8f)
1273 || (c >= 0xfd92 && c <= 0xfdc7)
1274 || (c >= 0xfdf0 && c <= 0xfdfb)
1275 || (c >= 0xfe70 && c <= 0xfe72)
1276 || (c == 0xfe74)
1277 || (c >= 0xfe76 && c <= 0xfefc)
1278 || (c >= 0xff21 && c <= 0xff3a)
1279 || (c >= 0xff41 && c <= 0xff5a)
1280 || (c >= 0xff66 && c <= 0xffbe)
1281 || (c >= 0xffc2 && c <= 0xffc7)
1282 || (c >= 0xffca && c <= 0xffcf)
1283 || (c >= 0xffd2 && c <= 0xffd7)
1284 || (c >= 0xffda && c <= 0xffdc)
1285 || (c >= 0x4e00 && c <= 0x9fa5))
1286 return 1;
1287
1288 error ("universal-character-name '\\u%04x' not valid in identifier", c);
1289 return 1;
1290#endif
1291}
1292
1293/* Add the UTF-8 representation of C to the token_buffer. */
1294
1295static void
1296utf8_extend_token (c)
1297 int c;
e8bbfc4e 1298{
0e5921e8
ZW
1299 int shift, mask;
1300
1301 if (c <= 0x0000007f)
1302 {
1303 extend_token (c);
1304 return;
1305 }
1306 else if (c <= 0x000007ff)
1307 shift = 6, mask = 0xc0;
1308 else if (c <= 0x0000ffff)
1309 shift = 12, mask = 0xe0;
1310 else if (c <= 0x001fffff)
1311 shift = 18, mask = 0xf0;
1312 else if (c <= 0x03ffffff)
1313 shift = 24, mask = 0xf8;
e8bbfc4e 1314 else
0e5921e8
ZW
1315 shift = 30, mask = 0xfc;
1316
1317 extend_token (mask | (c >> shift));
1318 do
1319 {
1320 shift -= 6;
1321 extend_token ((unsigned char) (0x80 | (c >> shift)));
1322 }
1323 while (shift);
e8bbfc4e 1324}
0e5921e8 1325#endif
e8bbfc4e
RK
1326
1327#if 0
e8bbfc4e
RK
1328struct try_type
1329{
1330 tree *node_var;
1331 char unsigned_flag;
1332 char long_flag;
1333 char long_long_flag;
1334};
1335
75cb8865 1336struct try_type type_sequence[] =
e8bbfc4e
RK
1337{
1338 { &integer_type_node, 0, 0, 0},
1339 { &unsigned_type_node, 1, 0, 0},
1340 { &long_integer_type_node, 0, 1, 0},
1341 { &long_unsigned_type_node, 1, 1, 0},
1342 { &long_long_integer_type_node, 0, 1, 1},
1343 { &long_long_unsigned_type_node, 1, 1, 1}
1344};
1345#endif /* 0 */
1346\f
ed513abf
KG
1347struct pf_args
1348{
1349 /* Input */
0e5921e8
ZW
1350 const char *str;
1351 int fflag;
1352 int lflag;
ed513abf 1353 int base;
fbb18613 1354 /* Output */
ed513abf 1355 int conversion_errno;
ed513abf 1356 REAL_VALUE_TYPE value;
0e5921e8 1357 tree type;
ed513abf
KG
1358};
1359
1360static void
1361parse_float (data)
1362 PTR data;
1363{
1364 struct pf_args * args = (struct pf_args *) data;
0e5921e8
ZW
1365 const char *typename;
1366
fbb18613
JM
1367 args->conversion_errno = 0;
1368 args->type = double_type_node;
0e5921e8 1369 typename = "double";
ed513abf
KG
1370
1371 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1372 tells the desired precision of the binary result
1373 of decimal-to-binary conversion. */
1374
0e5921e8 1375 if (args->fflag)
ed513abf 1376 {
0e5921e8
ZW
1377 if (args->lflag)
1378 error ("both 'f' and 'l' suffixes on floating constant");
ed513abf
KG
1379
1380 args->type = float_type_node;
0e5921e8 1381 typename = "float";
ed513abf 1382 }
0e5921e8 1383 else if (args->lflag)
ed513abf
KG
1384 {
1385 args->type = long_double_type_node;
0e5921e8 1386 typename = "long double";
ed513abf 1387 }
0e5921e8 1388 else if (flag_single_precision_constant)
ed513abf 1389 {
0e5921e8
ZW
1390 args->type = float_type_node;
1391 typename = "float";
ed513abf 1392 }
0e5921e8
ZW
1393
1394 errno = 0;
1395 if (args->base == 16)
1396 args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1397 else
1398 args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1399
1400 args->conversion_errno = errno;
1401 /* A diagnostic is required here by some ISO C testsuites.
1402 This is not pedwarn, because some people don't want
1403 an error for this. */
1404 if (REAL_VALUE_ISINF (args->value) && pedantic)
1405 warning ("floating point number exceeds range of '%s'", typename);
ed513abf
KG
1406}
1407
0e5921e8
ZW
1408int
1409c_lex (value)
1410 tree *value;
fbb18613
JM
1411{
1412#if USE_CPPLIB
0e5921e8
ZW
1413 const cpp_token *tok;
1414 enum cpp_ttype type;
1415
1416 retry:
1417 timevar_push (TV_CPP);
1418 tok = cpp_get_token (&parse_in);
1419 timevar_pop (TV_CPP);
1420
1421 /* The C++ front end does horrible things with the current line
1422 number. To ensure an accurate line number, we must reset it
1423 every time we return a token. If we reset it from tok->line
1424 every time, we'll get line numbers inside macros referring to the
1425 macro definition; this is nice, but we don't want to change the
1426 behavior until integrated mode is the only option. So we keep our
1427 own idea of the line number, and reset it from tok->line at each
1428 new line (which never happens inside a macro). */
1429 if (tok->flags & BOL)
1430 lex_lineno = tok->line;
1431
1432 *value = NULL_TREE;
1433 lineno = lex_lineno;
1434 type = tok->type;
1435 switch (type)
1436 {
1437 case CPP_OPEN_BRACE: indent_level++; break;
1438 case CPP_CLOSE_BRACE: indent_level--; break;
1439
1440 /* Issue this error here, where we can get at tok->val.aux. */
1441 case CPP_OTHER:
1442 if (ISGRAPH (tok->val.aux))
1443 error ("stray '%c' in program", tok->val.aux);
1444 else
1445 error ("stray '\\%#o' in program", tok->val.aux);
1446 goto retry;
1447
1448 case CPP_DEFINED:
1449 type = CPP_NAME;
1450 case CPP_NAME:
1451 *value = get_identifier ((const char *)tok->val.node->name);
1452 break;
fbb18613 1453
0e5921e8
ZW
1454 case CPP_INT:
1455 case CPP_FLOAT:
1456 case CPP_NUMBER:
1457 *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len);
1458 break;
93868d11 1459
0e5921e8
ZW
1460 case CPP_CHAR:
1461 case CPP_WCHAR:
1462 *value = lex_charconst ((const char *)tok->val.str.text,
1463 tok->val.str.len, tok->type == CPP_WCHAR);
1464 break;
fbb18613 1465
0e5921e8
ZW
1466 case CPP_STRING:
1467 case CPP_WSTRING:
1468 case CPP_OSTRING:
1469 *value = lex_string ((const char *)tok->val.str.text,
1470 tok->val.str.len, tok->type == CPP_WSTRING);
1471 break;
fbb18613 1472
0e5921e8
ZW
1473 /* These tokens should not be visible outside cpplib. */
1474 case CPP_HEADER_NAME:
1475 case CPP_COMMENT:
1476 case CPP_MACRO_ARG:
1477 case CPP_PLACEMARKER:
1478 abort ();
1479
1480 default: break;
1481 }
1482
1483 return type;
1484
1485#else
1486 int c;
1487 char *p;
e8bbfc4e 1488 int wide_flag = 0;
e31c7eec 1489 int objc_flag = 0;
0e5921e8
ZW
1490 int charconst = 0;
1491
1492 *value = NULL_TREE;
e8bbfc4e 1493
0e5921e8
ZW
1494 retry:
1495 c = getch ();
e8bbfc4e
RK
1496
1497 /* Effectively do c = skip_white_space (c)
1498 but do it faster in the usual cases. */
1499 while (1)
1500 switch (c)
1501 {
e8bbfc4e
RK
1502 case ' ':
1503 case '\t':
1504 case '\f':
1505 case '\v':
0e5921e8 1506 c = getch ();
e8bbfc4e
RK
1507 break;
1508
1509 case '\n':
e8bbfc4e
RK
1510 c = skip_white_space (c);
1511 default:
1512 goto found_nonwhite;
1513 }
1514 found_nonwhite:
1515
0e5921e8 1516 lineno = lex_lineno;
e8bbfc4e
RK
1517
1518 switch (c)
1519 {
1520 case EOF:
0e5921e8 1521 return CPP_EOF;
e8bbfc4e 1522
e8bbfc4e
RK
1523 case 'L':
1524 /* Capital L may start a wide-string or wide-character constant. */
1525 {
0e5921e8
ZW
1526 register int c1 = getch();
1527 if (c1 == '\'')
e8bbfc4e
RK
1528 {
1529 wide_flag = 1;
1530 goto char_constant;
1531 }
0e5921e8 1532 if (c1 == '"')
e8bbfc4e
RK
1533 {
1534 wide_flag = 1;
1535 goto string_constant;
1536 }
0e5921e8 1537 put_back (c1);
e8bbfc4e
RK
1538 }
1539 goto letter;
1540
1541 case '@':
1542 if (!doing_objc_thang)
0e5921e8 1543 goto straychar;
e31c7eec 1544 else
e8bbfc4e 1545 {
e31c7eec 1546 /* '@' may start a constant string object. */
0e5921e8
ZW
1547 register int c1 = getch ();
1548 if (c1 == '"')
e31c7eec
TW
1549 {
1550 objc_flag = 1;
1551 goto string_constant;
1552 }
0e5921e8 1553 put_back (c1);
ddd5a7c1 1554 /* Fall through to treat '@' as the start of an identifier. */
e8bbfc4e
RK
1555 }
1556
e8bbfc4e
RK
1557 case 'A': case 'B': case 'C': case 'D': case 'E':
1558 case 'F': case 'G': case 'H': case 'I': case 'J':
1559 case 'K': case 'M': case 'N': case 'O':
1560 case 'P': case 'Q': case 'R': case 'S': case 'T':
1561 case 'U': case 'V': case 'W': case 'X': case 'Y':
1562 case 'Z':
1563 case 'a': case 'b': case 'c': case 'd': case 'e':
1564 case 'f': case 'g': case 'h': case 'i': case 'j':
1565 case 'k': case 'l': case 'm': case 'n': case 'o':
1566 case 'p': case 'q': case 'r': case 's': case 't':
1567 case 'u': case 'v': case 'w': case 'x': case 'y':
1568 case 'z':
1569 case '_':
f84cddb9 1570 case '$':
e8bbfc4e 1571 letter:
0e5921e8
ZW
1572 p = token_buffer;
1573 while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
e8bbfc4e 1574 {
0e5921e8
ZW
1575 /* Make sure this char really belongs in an identifier. */
1576 if (c == '$')
f84cddb9 1577 {
0e5921e8
ZW
1578 if (! dollars_in_ident)
1579 error ("'$' in identifier");
1580 else if (pedantic)
1581 pedwarn ("'$' in identifier");
fbb18613 1582 }
e8bbfc4e 1583
0e5921e8
ZW
1584 if (p >= token_buffer + maxtoken)
1585 p = extend_token_buffer (p);
e8bbfc4e 1586
0e5921e8
ZW
1587 *p++ = c;
1588 c = getch();
1589 }
75cb8865 1590
0e5921e8 1591 put_back (c);
e8bbfc4e 1592
0e5921e8
ZW
1593 if (p >= token_buffer + maxtoken)
1594 p = extend_token_buffer (p);
1595 *p = 0;
e8bbfc4e 1596
0e5921e8
ZW
1597 *value = get_identifier (token_buffer);
1598 return CPP_NAME;
e8bbfc4e 1599
fbb18613 1600 case '.':
fbb18613
JM
1601 {
1602 /* It's hard to preserve tokenization on '.' because
1603 it could be a symbol by itself, or it could be the
1604 start of a floating point number and cpp won't tell us. */
0e5921e8 1605 int c1 = getch ();
fbb18613
JM
1606 if (c1 == '.')
1607 {
0e5921e8
ZW
1608 int c2 = getch ();
1609 if (c2 == '.')
1610 return CPP_ELLIPSIS;
1611
1612 put_back (c2);
1613 error ("parse error at '..'");
fbb18613 1614 }
0e5921e8
ZW
1615 else if (c1 == '*' && c_language == clk_cplusplus)
1616 return CPP_DOT_STAR;
1617
1618 put_back (c1);
fbb18613 1619 if (ISDIGIT (c1))
0e5921e8 1620 goto number;
fbb18613 1621 }
0e5921e8 1622 return CPP_DOT;
d669f5da 1623
0e5921e8 1624 case '0': case '1': case '2': case '3': case '4':
e8bbfc4e 1625 case '5': case '6': case '7': case '8': case '9':
fbb18613 1626 number:
0e5921e8
ZW
1627 p = token_buffer;
1628 /* Scan the next preprocessing number. All C numeric constants
1629 are preprocessing numbers, but not all preprocessing numbers
1630 are valid numeric constants. Preprocessing numbers fit the
1631 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1632 See C99 section 6.4.8. */
1633 for (;;)
1634 {
1635 if (p >= token_buffer + maxtoken)
1636 p = extend_token_buffer (p);
cc144655 1637
0e5921e8
ZW
1638 *p++ = c;
1639 c = getch();
e8bbfc4e 1640
0e5921e8
ZW
1641 if (c == '+' || c == '-')
1642 {
1643 int d = p[-1];
1644 if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1645 continue;
1646 }
1647 if (ISALNUM (c) || c == '_' || c == '.')
1648 continue;
1649 break;
1650 }
1651 put_back (c);
e8bbfc4e 1652
0e5921e8
ZW
1653 *value = lex_number (token_buffer, p - token_buffer);
1654 return CPP_NUMBER;
e8bbfc4e 1655
0e5921e8
ZW
1656 case '\'':
1657 char_constant:
1658 charconst = 1;
e8bbfc4e 1659
0e5921e8
ZW
1660 case '"':
1661 string_constant:
1662 {
1663 int delimiter = charconst ? '\'' : '"';
1664#ifdef MULTIBYTE_CHARS
1665 int longest_char = local_mb_cur_max ();
1666 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1667#endif
1668 c = getch ();
1669 p = token_buffer + 1;
e8bbfc4e 1670
0e5921e8 1671 while (c != delimiter && c != EOF)
e8bbfc4e 1672 {
0e5921e8
ZW
1673 if (p + 2 > token_buffer + maxtoken)
1674 p = extend_token_buffer (p);
1675
1676 /* ignore_escape_flag is set for reading the filename in #line. */
1677 if (!ignore_escape_flag && c == '\\')
e8bbfc4e 1678 {
0e5921e8
ZW
1679 *p++ = c;
1680 *p++ = getch (); /* escaped character */
1681 c = getch ();
1682 continue;
e8bbfc4e
RK
1683 }
1684 else
1685 {
0e5921e8
ZW
1686#ifdef MULTIBYTE_CHARS
1687 int i;
1688 int char_len = -1;
1689 for (i = 0; i < longest_char; ++i)
e8bbfc4e 1690 {
0e5921e8
ZW
1691 if (p + i >= token_buffer + maxtoken)
1692 p = extend_token_buffer (p);
1693 p[i] = c;
e8bbfc4e 1694
0e5921e8
ZW
1695 char_len = local_mblen (p, i + 1);
1696 if (char_len != -1)
1697 break;
1698 c = getch ();
e8bbfc4e 1699 }
0e5921e8 1700 if (char_len == -1)
cc144655 1701 {
0e5921e8
ZW
1702 /* Replace all except the first byte. */
1703 put_back (c);
1704 for (--i; i > 0; --i)
1705 put_back (p[i]);
1706 char_len = 1;
cc144655 1707 }
0e5921e8
ZW
1708 /* mbtowc sometimes needs an extra char before accepting */
1709 else if (char_len <= i)
1710 put_back (c);
e8bbfc4e 1711
0e5921e8
ZW
1712 p += char_len;
1713#else
1714 *p++ = c;
1715#endif
1716 c = getch ();
e8bbfc4e
RK
1717 }
1718 }
0e5921e8 1719 }
e8bbfc4e 1720
0e5921e8
ZW
1721 if (charconst)
1722 {
1723 *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1724 wide_flag);
1725 return wide_flag ? CPP_WCHAR : CPP_CHAR;
1726 }
1727 else
1728 {
1729 *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1730 wide_flag);
1731 return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1732 }
e8bbfc4e 1733
0e5921e8
ZW
1734 case '+':
1735 case '-':
1736 case '&':
1737 case '|':
1738 case ':':
1739 case '<':
1740 case '>':
1741 case '*':
1742 case '/':
1743 case '%':
1744 case '^':
1745 case '!':
1746 case '=':
1747 {
1748 int c1;
1749 enum cpp_ttype type = CPP_EOF;
e8bbfc4e 1750
0e5921e8 1751 switch (c)
e8bbfc4e 1752 {
0e5921e8
ZW
1753 case '+': type = CPP_PLUS; break;
1754 case '-': type = CPP_MINUS; break;
1755 case '&': type = CPP_AND; break;
1756 case '|': type = CPP_OR; break;
1757 case ':': type = CPP_COLON; break;
1758 case '<': type = CPP_LESS; break;
1759 case '>': type = CPP_GREATER; break;
1760 case '*': type = CPP_MULT; break;
1761 case '/': type = CPP_DIV; break;
1762 case '%': type = CPP_MOD; break;
1763 case '^': type = CPP_XOR; break;
1764 case '!': type = CPP_NOT; break;
1765 case '=': type = CPP_EQ; break;
1766 }
e8bbfc4e 1767
0e5921e8 1768 c1 = getch ();
e8bbfc4e 1769
0e5921e8
ZW
1770 if (c1 == '=' && type < CPP_LAST_EQ)
1771 return type + (CPP_EQ_EQ - CPP_EQ);
1772 else if (c == c1)
1773 switch (c)
1774 {
1775 case '+': return CPP_PLUS_PLUS;
1776 case '-': return CPP_MINUS_MINUS;
1777 case '&': return CPP_AND_AND;
1778 case '|': return CPP_OR_OR;
1779 case ':':
1780 if (c_language == clk_cplusplus)
1781 return CPP_SCOPE;
1782 break;
e8bbfc4e 1783
0e5921e8
ZW
1784 case '<': type = CPP_LSHIFT; goto do_triad;
1785 case '>': type = CPP_RSHIFT; goto do_triad;
1786 }
1787 else
1788 switch (c)
1789 {
1790 case '-':
1791 if (c1 == '>')
1792 {
1793 if (c_language == clk_cplusplus)
1794 {
1795 c1 = getch ();
1796 if (c1 == '*')
1797 return CPP_DEREF_STAR;
1798 put_back (c1);
1799 }
1800 return CPP_DEREF;
1801 }
1802 break;
ed513abf 1803
0e5921e8
ZW
1804 case '>':
1805 if (c1 == '?' && c_language == clk_cplusplus)
1806 { type = CPP_MAX; goto do_triad; }
1807 break;
e26ceb28 1808
0e5921e8
ZW
1809 case '<':
1810 if (c1 == ':' && flag_digraphs)
1811 return CPP_OPEN_SQUARE;
1812 if (c1 == '%' && flag_digraphs)
1813 { indent_level++; return CPP_OPEN_BRACE; }
1814 if (c1 == '?' && c_language == clk_cplusplus)
1815 { type = CPP_MIN; goto do_triad; }
1816 break;
e8bbfc4e 1817
0e5921e8
ZW
1818 case ':':
1819 if (c1 == '>' && flag_digraphs)
1820 return CPP_CLOSE_SQUARE;
1821 break;
1822 case '%':
1823 if (c1 == '>' && flag_digraphs)
1824 { indent_level--; return CPP_CLOSE_BRACE; }
1825 break;
1826 }
a47a0ed5 1827
0e5921e8
ZW
1828 put_back (c1);
1829 return type;
e8bbfc4e 1830
0e5921e8
ZW
1831 do_triad:
1832 c1 = getch ();
1833 if (c1 == '=')
1834 type += (CPP_EQ_EQ - CPP_EQ);
1835 else
1836 put_back (c1);
1837 return type;
1838 }
e8bbfc4e 1839
0e5921e8
ZW
1840 case '~': return CPP_COMPL;
1841 case '?': return CPP_QUERY;
1842 case ',': return CPP_COMMA;
1843 case '(': return CPP_OPEN_PAREN;
1844 case ')': return CPP_CLOSE_PAREN;
1845 case '[': return CPP_OPEN_SQUARE;
1846 case ']': return CPP_CLOSE_SQUARE;
1847 case '{': indent_level++; return CPP_OPEN_BRACE;
1848 case '}': indent_level--; return CPP_CLOSE_BRACE;
1849 case ';': return CPP_SEMICOLON;
1850
1851 straychar:
1852 default:
1853 if (ISGRAPH (c))
1854 error ("stray '%c' in program", c);
1855 else
1856 error ("stray '\\%#o' in program", c);
1857 goto retry;
1858 }
1859 /* NOTREACHED */
1860#endif
1861}
8d9bfdc5 1862
8d9bfdc5 1863
0e5921e8 1864#define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
75cb8865 1865
0e5921e8
ZW
1866static tree
1867lex_number (str, len)
1868 const char *str;
1869 unsigned int len;
1870{
1871 int base = 10;
1872 int count = 0;
1873 int largest_digit = 0;
1874 int numdigits = 0;
1875 int overflow = 0;
1876 int c;
1877 tree value;
1878 const char *p;
1879 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1880
1881 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1882 The code below which fills the parts array assumes that a host
1883 int is at least twice as wide as a host char, and that
1884 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1885 Two HOST_WIDE_INTs is the largest int literal we can store.
1886 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1887 must be exactly the number of parts needed to hold the bits
1888 of two HOST_WIDE_INTs. */
1889#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1890 unsigned int parts[TOTAL_PARTS];
1891
1892 /* Optimize for most frequent case. */
1893 if (len == 1)
1894 {
1895 if (*str == '0')
1896 return integer_zero_node;
1897 else if (*str == '1')
1898 return integer_one_node;
1899 else
1900 return build_int_2 (*str - '0', 0);
1901 }
e8bbfc4e 1902
0e5921e8
ZW
1903 for (count = 0; count < TOTAL_PARTS; count++)
1904 parts[count] = 0;
e8bbfc4e 1905
0e5921e8
ZW
1906 /* len is known to be >1 at this point. */
1907 p = str;
e8bbfc4e 1908
0e5921e8
ZW
1909 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1910 {
1911 base = 16;
1912 p = str + 2;
1913 }
1914 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1915 else if (str[0] == '0' && ISDIGIT (str[1]))
1916 {
1917 base = 8;
1918 p = str + 1;
1919 }
e8bbfc4e 1920
0e5921e8
ZW
1921 do
1922 {
1923 c = *p++;
88d92ca5 1924
0e5921e8
ZW
1925 if (c == '.')
1926 {
1927 if (base == 16 && pedantic && !flag_isoc99)
1928 pedwarn ("floating constant may not be in radix 16");
1929 else if (floatflag == AFTER_POINT)
1930 ERROR ("too many decimal points in floating constant");
1931 else if (floatflag == AFTER_EXPON)
1932 ERROR ("decimal point in exponent - impossible!");
1933 else
1934 floatflag = AFTER_POINT;
c832a30e 1935
0e5921e8
ZW
1936 if (base == 8)
1937 base = 10;
1938 }
1939 else if (c == '_')
1940 /* Possible future extension: silently ignore _ in numbers,
1941 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1942 but somewhat easier to read. Ada has this? */
1943 ERROR ("underscore in number");
1944 else
1945 {
1946 int n;
1947 /* It is not a decimal point.
1948 It should be a digit (perhaps a hex digit). */
0e0fda0d 1949
0e5921e8
ZW
1950 if (ISDIGIT (c))
1951 {
1952 n = c - '0';
1953 }
1954 else if (base <= 10 && (c == 'e' || c == 'E'))
1955 {
1956 base = 10;
1957 floatflag = AFTER_EXPON;
1958 break;
1959 }
1960 else if (base == 16 && (c == 'p' || c == 'P'))
1961 {
1962 floatflag = AFTER_EXPON;
1963 break; /* start of exponent */
1964 }
1965 else if (base == 16 && c >= 'a' && c <= 'f')
1966 {
1967 n = c - 'a' + 10;
1968 }
1969 else if (base == 16 && c >= 'A' && c <= 'F')
1970 {
1971 n = c - 'A' + 10;
1972 }
1973 else
1974 {
1975 p--;
1976 break; /* start of suffix */
1977 }
0e0fda0d 1978
0e5921e8
ZW
1979 if (n >= largest_digit)
1980 largest_digit = n;
1981 numdigits++;
0e0fda0d 1982
0e5921e8
ZW
1983 for (count = 0; count < TOTAL_PARTS; count++)
1984 {
1985 parts[count] *= base;
1986 if (count)
1987 {
1988 parts[count]
1989 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1990 parts[count-1]
1991 &= (1 << HOST_BITS_PER_CHAR) - 1;
1992 }
1993 else
1994 parts[0] += n;
1995 }
e8bbfc4e 1996
0e5921e8
ZW
1997 /* If the highest-order part overflows (gets larger than
1998 a host char will hold) then the whole number has
1999 overflowed. Record this and truncate the highest-order
2000 part. */
2001 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
2002 {
2003 overflow = 1;
2004 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
2005 }
2006 }
2007 }
2008 while (p < str + len);
91b0989e 2009
0e5921e8
ZW
2010 /* This can happen on input like `int i = 0x;' */
2011 if (numdigits == 0)
2012 ERROR ("numeric constant with no digits");
91b0989e 2013
0e5921e8
ZW
2014 if (largest_digit >= base)
2015 ERROR ("numeric constant contains digits beyond the radix");
e8bbfc4e 2016
0e5921e8
ZW
2017 if (floatflag != NOT_FLOAT)
2018 {
2019 tree type;
2020 int imag, fflag, lflag, conversion_errno;
2021 REAL_VALUE_TYPE real;
2022 struct pf_args args;
2023 char *copy;
2024
2025 if (base == 16 && floatflag != AFTER_EXPON)
2026 ERROR ("hexadecimal floating constant has no exponent");
2027
2028 /* Read explicit exponent if any, and put it in tokenbuf. */
2029 if ((base == 10 && ((c == 'e') || (c == 'E')))
2030 || (base == 16 && (c == 'p' || c == 'P')))
2031 {
2032 if (p < str + len)
2033 c = *p++;
2034 if (p < str + len && (c == '+' || c == '-'))
2035 c = *p++;
2036 /* Exponent is decimal, even if string is a hex float. */
2037 if (! ISDIGIT (c))
2038 ERROR ("floating constant exponent has no digits");
2039 while (p < str + len && ISDIGIT (c))
2040 c = *p++;
2041 if (! ISDIGIT (c))
2042 p--;
2043 }
56f48ce9 2044
0e5921e8
ZW
2045 /* Copy the float constant now; we don't want any suffixes in the
2046 string passed to parse_float. */
2047 copy = alloca (p - str + 1);
2048 memcpy (copy, str, p - str);
2049 copy[p - str] = '\0';
e8bbfc4e 2050
0e5921e8
ZW
2051 /* Now parse suffixes. */
2052 fflag = lflag = imag = 0;
2053 while (p < str + len)
2054 switch (*p++)
e8bbfc4e 2055 {
0e5921e8
ZW
2056 case 'f': case 'F':
2057 if (fflag)
2058 ERROR ("more than one 'f' suffix on floating constant");
2059 else if (warn_traditional && !in_system_header)
2060 warning ("traditional C rejects the 'f' suffix");
e8bbfc4e 2061
0e5921e8
ZW
2062 fflag = 1;
2063 break;
e8bbfc4e 2064
0e5921e8
ZW
2065 case 'l': case 'L':
2066 if (lflag)
2067 ERROR ("more than one 'l' suffix on floating constant");
2068 else if (warn_traditional && !in_system_header)
2069 warning ("traditional C rejects the 'l' suffix");
56f48ce9 2070
0e5921e8
ZW
2071 lflag = 1;
2072 break;
e8bbfc4e 2073
0e5921e8
ZW
2074 case 'i': case 'I':
2075 case 'j': case 'J':
2076 if (imag)
2077 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2078 else if (pedantic)
2079 pedwarn ("ISO C forbids imaginary numeric constants");
2080 imag = 1;
2081 break;
e8bbfc4e 2082
0e5921e8
ZW
2083 default:
2084 ERROR ("invalid suffix on floating constant");
e8bbfc4e
RK
2085 }
2086
0e5921e8
ZW
2087 /* Setup input for parse_float() */
2088 args.str = copy;
2089 args.fflag = fflag;
2090 args.lflag = lflag;
2091 args.base = base;
e8bbfc4e 2092
0e5921e8
ZW
2093 /* Convert string to a double, checking for overflow. */
2094 if (do_float_handler (parse_float, (PTR) &args))
2095 {
2096 /* Receive output from parse_float() */
2097 real = args.value;
2098 }
2099 else
2100 /* We got an exception from parse_float() */
2101 ERROR ("floating constant out of range");
e8bbfc4e 2102
0e5921e8
ZW
2103 /* Receive output from parse_float() */
2104 conversion_errno = args.conversion_errno;
2105 type = args.type;
2106
2107#ifdef ERANGE
2108 /* ERANGE is also reported for underflow,
2109 so test the value to distinguish overflow from that. */
2110 if (conversion_errno == ERANGE && !flag_traditional && pedantic
2111 && (REAL_VALUES_LESS (dconst1, real)
2112 || REAL_VALUES_LESS (real, dconstm1)))
2113 warning ("floating point number exceeds range of 'double'");
56f48ce9 2114#endif
e8bbfc4e 2115
0e5921e8
ZW
2116 /* Create a node with determined type and value. */
2117 if (imag)
2118 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2119 build_real (type, real));
2120 else
2121 value = build_real (type, real);
2122 }
2123 else
2124 {
2125 tree trad_type, ansi_type, type;
2126 HOST_WIDE_INT high, low;
2127 int spec_unsigned = 0;
2128 int spec_long = 0;
2129 int spec_long_long = 0;
2130 int spec_imag = 0;
2131 int suffix_lu = 0;
2132 int warn = 0, i;
2133
2134 trad_type = ansi_type = type = NULL_TREE;
2135 while (p < str + len)
2136 {
2137 c = *p++;
2138 switch (c)
2139 {
2140 case 'u': case 'U':
2141 if (spec_unsigned)
2142 error ("two 'u' suffixes on integer constant");
2143 else if (warn_traditional && !in_system_header)
2144 warning ("traditional C rejects the 'u' suffix");
2145
2146 spec_unsigned = 1;
2147 if (spec_long)
2148 suffix_lu = 1;
2149 break;
e8bbfc4e 2150
0e5921e8
ZW
2151 case 'l': case 'L':
2152 if (spec_long)
2153 {
2154 if (spec_long_long)
2155 error ("three 'l' suffixes on integer constant");
2156 else if (suffix_lu)
2157 error ("'lul' is not a valid integer suffix");
2158 else if (c != spec_long)
2159 error ("'Ll' and 'lL' are not valid integer suffixes");
2160 else if (pedantic && ! flag_isoc99
2161 && ! in_system_header && warn_long_long)
2162 pedwarn ("ISO C89 forbids long long integer constants");
2163 spec_long_long = 1;
2164 }
2165 spec_long = c;
2166 break;
56f48ce9 2167
0e5921e8
ZW
2168 case 'i': case 'I': case 'j': case 'J':
2169 if (spec_imag)
2170 error ("more than one 'i' or 'j' suffix on integer constant");
2171 else if (pedantic)
2172 pedwarn ("ISO C forbids imaginary numeric constants");
2173 spec_imag = 1;
2174 break;
56f48ce9 2175
0e5921e8
ZW
2176 default:
2177 ERROR ("invalid suffix on integer constant");
2178 }
2179 }
56f48ce9 2180
0e5921e8
ZW
2181 /* If the literal overflowed, pedwarn about it now. */
2182 if (overflow)
2183 {
2184 warn = 1;
2185 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2186 }
e8bbfc4e 2187
0e5921e8
ZW
2188 /* This is simplified by the fact that our constant
2189 is always positive. */
56f48ce9 2190
0e5921e8 2191 high = low = 0;
e8bbfc4e 2192
0e5921e8
ZW
2193 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2194 {
2195 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2196 / HOST_BITS_PER_CHAR)]
2197 << (i * HOST_BITS_PER_CHAR));
2198 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2199 }
0468bc75 2200
0e5921e8
ZW
2201 value = build_int_2 (low, high);
2202 TREE_TYPE (value) = long_long_unsigned_type_node;
e8bbfc4e 2203
0e5921e8
ZW
2204 /* If warn_traditional, calculate both the ISO type and the
2205 traditional type, then see if they disagree.
2206 Otherwise, calculate only the type for the dialect in use. */
2207 if (warn_traditional || flag_traditional)
2208 {
2209 /* Calculate the traditional type. */
2210 /* Traditionally, any constant is signed; but if unsigned is
2211 specified explicitly, obey that. Use the smallest size
2212 with the right number of bits, except for one special
2213 case with decimal constants. */
2214 if (! spec_long && base != 10
2215 && int_fits_type_p (value, unsigned_type_node))
2216 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2217 /* A decimal constant must be long if it does not fit in
2218 type int. I think this is independent of whether the
2219 constant is signed. */
2220 else if (! spec_long && base == 10
2221 && int_fits_type_p (value, integer_type_node))
2222 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2223 else if (! spec_long_long)
2224 trad_type = (spec_unsigned
2225 ? long_unsigned_type_node
2226 : long_integer_type_node);
2227 else if (int_fits_type_p (value,
2228 spec_unsigned
2229 ? long_long_unsigned_type_node
2230 : long_long_integer_type_node))
2231 trad_type = (spec_unsigned
2232 ? long_long_unsigned_type_node
2233 : long_long_integer_type_node);
2234 else
2235 trad_type = (spec_unsigned
2236 ? widest_unsigned_literal_type_node
2237 : widest_integer_literal_type_node);
2238 }
2239 if (warn_traditional || ! flag_traditional)
2240 {
2241 /* Calculate the ISO type. */
2242 if (! spec_long && ! spec_unsigned
2243 && int_fits_type_p (value, integer_type_node))
2244 ansi_type = integer_type_node;
2245 else if (! spec_long && (base != 10 || spec_unsigned)
2246 && int_fits_type_p (value, unsigned_type_node))
2247 ansi_type = unsigned_type_node;
2248 else if (! spec_unsigned && !spec_long_long
2249 && int_fits_type_p (value, long_integer_type_node))
2250 ansi_type = long_integer_type_node;
2251 else if (! spec_long_long
2252 && int_fits_type_p (value, long_unsigned_type_node))
2253 ansi_type = long_unsigned_type_node;
2254 else if (! spec_unsigned
2255 && int_fits_type_p (value, long_long_integer_type_node))
2256 ansi_type = long_long_integer_type_node;
2257 else if (int_fits_type_p (value, long_long_unsigned_type_node))
2258 ansi_type = long_long_unsigned_type_node;
2259 else if (! spec_unsigned
2260 && int_fits_type_p (value, widest_integer_literal_type_node))
2261 ansi_type = widest_integer_literal_type_node;
2262 else
2263 ansi_type = widest_unsigned_literal_type_node;
2264 }
e8bbfc4e 2265
0e5921e8 2266 type = flag_traditional ? trad_type : ansi_type;
e8bbfc4e 2267
0e5921e8
ZW
2268 /* We assume that constants specified in a non-decimal
2269 base are bit patterns, and that the programmer really
2270 meant what they wrote. */
2271 if (warn_traditional && !in_system_header
2272 && base == 10 && trad_type != ansi_type)
2273 {
2274 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2275 warning ("width of integer constant changes with -traditional");
2276 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2277 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2278 else
2279 warning ("width of integer constant may change on other systems with -traditional");
2280 }
e8bbfc4e 2281
0e5921e8
ZW
2282 if (pedantic && !flag_traditional && !spec_long_long && !warn
2283 && (TYPE_PRECISION (long_integer_type_node) < TYPE_PRECISION (type)))
2284 {
2285 warn = 1;
2286 pedwarn ("integer constant larger than the maximum value of an unsigned long int");
2287 }
e8bbfc4e 2288
0e5921e8
ZW
2289 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2290 warning ("decimal constant is so large that it is unsigned");
e8bbfc4e 2291
0e5921e8
ZW
2292 if (spec_imag)
2293 {
2294 if (TYPE_PRECISION (type)
2295 <= TYPE_PRECISION (integer_type_node))
2296 value = build_complex (NULL_TREE, integer_zero_node,
2297 convert (integer_type_node, value));
2298 else
2299 ERROR ("complex integer constant is too wide for 'complex int'");
2300 }
2301 else if (flag_traditional && !int_fits_type_p (value, type))
2302 /* The traditional constant 0x80000000 is signed
2303 but doesn't fit in the range of int.
2304 This will change it to -0x80000000, which does fit. */
2305 {
2306 TREE_TYPE (value) = unsigned_type (type);
2307 value = convert (type, value);
2308 TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2309 }
2310 else
2311 TREE_TYPE (value) = type;
e8bbfc4e 2312
0e5921e8
ZW
2313 /* If it's still an integer (not a complex), and it doesn't
2314 fit in the type we choose for it, then pedwarn. */
fbb18613 2315
0e5921e8
ZW
2316 if (! warn
2317 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2318 && ! int_fits_type_p (value, TREE_TYPE (value)))
2319 pedwarn ("integer constant is larger than the maximum value for its type");
2320 }
fbb18613 2321
0e5921e8
ZW
2322 if (p < str + len)
2323 error ("missing white space after number '%.*s'", (int) (p - str), str);
e8bbfc4e 2324
0e5921e8 2325 return value;
e8bbfc4e 2326
0e5921e8
ZW
2327 syntax_error:
2328 return integer_zero_node;
2329}
e8bbfc4e 2330
0e5921e8
ZW
2331static tree
2332lex_string (str, len, wide)
2333 const char *str;
2334 unsigned int len;
2335 int wide;
2336{
2337 tree value;
2338 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2339 char *q = buf;
2340 const char *p = str, *limit = str + len;
2341 unsigned int c;
2342 unsigned width = wide ? WCHAR_TYPE_SIZE
2343 : TYPE_PRECISION (char_type_node);
e9a25f70 2344
0e5921e8
ZW
2345#ifdef MULTIBYTE_CHARS
2346 /* Reset multibyte conversion state. */
2347 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2348#endif
e9a25f70 2349
0e5921e8
ZW
2350 while (p < limit)
2351 {
2352#ifdef MULTIBYTE_CHARS
2353 wchar_t wc;
2354 int char_len;
2355
2356 char_len = local_mbtowc (&wc, p, limit - p);
2357 if (char_len == -1)
2358 {
2359 warning ("Ignoring invalid multibyte character");
2360 char_len = 1;
2361 c = *p++;
2362 }
2363 else
2364 {
2365 p += char_len;
2366 c = wc;
2367 }
2368#else
2369 c = *p++;
2370#endif
2371
2372 if (c == '\\' && !ignore_escape_flag)
2373 {
2374 p = readescape (p, limit, &c);
2375 if (width < HOST_BITS_PER_INT
2376 && (unsigned) c >= ((unsigned)1 << width))
2377 pedwarn ("escape sequence out of range for character");
2378 }
2379
2380 /* Add this single character into the buffer either as a wchar_t
2381 or as a single byte. */
2382 if (wide)
2383 {
2384 unsigned charwidth = TYPE_PRECISION (char_type_node);
5c80f6e6 2385 unsigned bytemask = (1 << charwidth) - 1;
0e5921e8
ZW
2386 int byte;
2387
2388 for (byte = 0; byte < WCHAR_BYTES; ++byte)
2389 {
2390 int n;
2391 if (byte >= (int) sizeof (c))
2392 n = 0;
2393 else
2394 n = (c >> (byte * charwidth)) & bytemask;
2395 if (BYTES_BIG_ENDIAN)
2396 q[WCHAR_BYTES - byte - 1] = n;
2397 else
2398 q[byte] = n;
2399 }
2400 q += WCHAR_BYTES;
2401 }
2402 else
2403 {
2404 *q++ = c;
2405 }
e8bbfc4e
RK
2406 }
2407
0e5921e8
ZW
2408 /* Terminate the string value, either with a single byte zero
2409 or with a wide zero. */
e8bbfc4e 2410
0e5921e8
ZW
2411 if (wide)
2412 {
2413 memset (q, 0, WCHAR_BYTES);
2414 q += WCHAR_BYTES;
2415 }
2416 else
2417 {
2418 *q++ = '\0';
2419 }
2420
2421 value = build_string (q - buf, buf);
2422
2423 if (wide)
2424 TREE_TYPE (value) = wchar_array_type_node;
2425 else
2426 TREE_TYPE (value) = char_array_type_node;
e8bbfc4e
RK
2427 return value;
2428}
2429
0e5921e8
ZW
2430static tree
2431lex_charconst (str, len, wide)
2432 const char *str;
2433 unsigned int len;
2434 int wide;
e8bbfc4e 2435{
0e5921e8
ZW
2436 const char *limit = str + len;
2437 int result = 0;
2438 int num_chars = 0;
2439 int chars_seen = 0;
2440 unsigned width = TYPE_PRECISION (char_type_node);
2441 int max_chars;
2442 unsigned int c;
2443 tree value;
2444
2445#ifdef MULTIBYTE_CHARS
2446 int longest_char = local_mb_cur_max ();
2447 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2448#endif
2449
2450 max_chars = TYPE_PRECISION (integer_type_node) / width;
2451 if (wide)
2452 width = WCHAR_TYPE_SIZE;
2453
2454 while (str < limit)
2455 {
2456#ifdef MULTIBYTE_CHARS
2457 wchar_t wc;
2458 int char_len;
2459
2460 char_len = local_mbtowc (&wc, str, limit - str);
2461 if (char_len == -1)
2462 {
2463 warning ("Ignoring invalid multibyte character");
2464 char_len = 1;
2465 c = *str++;
2466 }
2467 else
2468 {
2469 p += char_len;
2470 c = wc;
2471 }
e8bbfc4e 2472#else
0e5921e8
ZW
2473 c = *str++;
2474#endif
2475
2476 ++chars_seen;
2477 if (c == '\\')
2478 {
2479 str = readescape (str, limit, &c);
2480 if (width < HOST_BITS_PER_INT
2481 && (unsigned) c >= ((unsigned)1 << width))
2482 pedwarn ("escape sequence out of range for character");
2483 }
2484#ifdef MAP_CHARACTER
2485 if (ISPRINT (c))
2486 c = MAP_CHARACTER (c);
e8bbfc4e 2487#endif
0e5921e8
ZW
2488
2489 /* Merge character into result; ignore excess chars. */
2490 num_chars += (width / TYPE_PRECISION (char_type_node));
2491 if (num_chars < max_chars + 1)
2492 {
2493 if (width < HOST_BITS_PER_INT)
2494 result = (result << width) | (c & ((1 << width) - 1));
2495 else
2496 result = c;
2497 }
2498 }
2499
2500 if (chars_seen == 0)
2501 error ("empty character constant");
2502 else if (num_chars > max_chars)
2503 {
2504 num_chars = max_chars;
2505 error ("character constant too long");
2506 }
2507 else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2508 warning ("multi-character character constant");
2509
2510 /* If char type is signed, sign-extend the constant. */
2511 if (! wide)
2512 {
2513 int num_bits = num_chars * width;
2514 if (num_bits == 0)
2515 /* We already got an error; avoid invalid shift. */
2516 value = build_int_2 (0, 0);
2517 else if (TREE_UNSIGNED (char_type_node)
2518 || ((result >> (num_bits - 1)) & 1) == 0)
2519 value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2520 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2521 0);
2522 else
2523 value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2524 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2525 -1);
2526 /* In C, a character constant has type 'int'; in C++, 'char'. */
2527 if (chars_seen <= 1 && c_language == clk_cplusplus)
2528 TREE_TYPE (value) = char_type_node;
2529 else
2530 TREE_TYPE (value) = integer_type_node;
2531 }
2532 else
2533 {
2534 value = build_int_2 (result, 0);
2535 TREE_TYPE (value) = wchar_type_node;
2536 }
2537
2538 return value;
e8bbfc4e 2539}
This page took 1.673295 seconds and 5 git commands to generate.