]> gcc.gnu.org Git - gcc.git/blame - gcc/c-lex.c
Correct last entry.
[gcc.git] / gcc / c-lex.c
CommitLineData
d45cf215 1/* Lexical analyzer for C and Objective C.
517cbe13
JL
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
e8bbfc4e
RK
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
940d9d63
RK
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
e8bbfc4e 21
e9a25f70 22#include "config.h"
670ee920 23#include "system.h"
e8bbfc4e 24
e8bbfc4e
RK
25#include "rtl.h"
26#include "tree.h"
27#include "input.h"
d6f4ec51 28#include "output.h"
e8bbfc4e
RK
29#include "c-lex.h"
30#include "c-tree.h"
31#include "flags.h"
0e5921e8 32#include "timevar.h"
8b97c5f8 33#include "cpplib.h"
3d6f7931 34#include "c-pragma.h"
5f6da302 35#include "toplev.h"
ab87f8c8 36#include "intl.h"
1526a060 37#include "ggc.h"
7bdb32b9 38#include "tm_p.h"
0e5921e8 39#include "splay-tree.h"
ab87f8c8 40
ab87f8c8
JL
41/* MULTIBYTE_CHARS support only works for native compilers.
42 ??? Ideally what we want is to model widechar support after
43 the current floating point support. */
44#ifdef CROSS_COMPILE
45#undef MULTIBYTE_CHARS
46#endif
e8bbfc4e
RK
47
48#ifdef MULTIBYTE_CHARS
56f48ce9 49#include "mbchar.h"
e8bbfc4e 50#include <locale.h>
56f48ce9 51#endif /* MULTIBYTE_CHARS */
c5c76735
JL
52#ifndef GET_ENVIRONMENT
53#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
54#endif
e8bbfc4e 55
0e5921e8 56#if USE_CPPLIB
c8724862 57extern cpp_reader parse_in;
e56e519d
DB
58#else
59/* Stream for reading from the input file. */
60FILE *finput;
e3d1fd32
PB
61#endif
62
0e5921e8
ZW
63/* Private idea of the line number. See discussion in c_lex(). */
64static int lex_lineno;
65
66/* We may keep statistics about how long which files took to compile. */
67static int header_time, body_time;
68static splay_tree file_info_tree;
3ab6dd7c 69
e8bbfc4e
RK
70/* Cause the `yydebug' variable to be defined. */
71#define YYDEBUG 1
72
0e5921e8 73#if !USE_CPPLIB
505e0385 74
0e5921e8
ZW
75struct putback_buffer
76{
69f21756 77 unsigned char *buffer;
505e0385
DB
78 int buffer_size;
79 int index;
80};
81
82static struct putback_buffer putback = {NULL, 0, -1};
83
6e090c76 84static inline int getch PARAMS ((void));
93868d11 85
505e0385
DB
86static inline int
87getch ()
88{
89 if (putback.index != -1)
90 {
91 int ch = putback.buffer[putback.index];
92 --putback.index;
93 return ch;
94 }
95 return getc (finput);
96}
97
6e090c76 98static inline void put_back PARAMS ((int));
93868d11 99
505e0385
DB
100static inline void
101put_back (ch)
102 int ch;
103{
104 if (ch != EOF)
105 {
106 if (putback.index == putback.buffer_size - 1)
107 {
108 putback.buffer_size += 16;
109 putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
110 }
111 putback.buffer[++putback.index] = ch;
112 }
113}
e3d1fd32 114
fbb18613
JM
115int linemode;
116
0e5921e8 117#endif
e8bbfc4e
RK
118
119/* File used for outputting assembler code. */
120extern FILE *asm_out_file;
121
12a39b12
JM
122#undef WCHAR_TYPE_SIZE
123#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
e8bbfc4e
RK
124
125/* Number of bytes in a wide character. */
126#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
127
0e5921e8 128#if !USE_CPPLIB
e8bbfc4e 129static int maxtoken; /* Current nominal length of token buffer. */
0e5921e8
ZW
130static char *token_buffer; /* Pointer to token buffer.
131 Actual allocated length is maxtoken + 2. */
132#endif
e8bbfc4e 133
0e5921e8
ZW
134int indent_level; /* Number of { minus number of }. */
135int pending_lang_change; /* If we need to switch languages - C++ only */
136int c_header_level; /* depth in C headers - C++ only */
fbb18613
JM
137
138/* Nonzero tells yylex to ignore \ in string constants. */
139static int ignore_escape_flag;
e9a25f70 140
0e5921e8
ZW
141static const char *readescape PARAMS ((const char *, const char *,
142 unsigned int *));
143static const char *read_ucs PARAMS ((const char *, const char *,
144 unsigned int *, int));
145static void parse_float PARAMS ((PTR));
146static tree lex_number PARAMS ((const char *, unsigned int));
147static tree lex_string PARAMS ((const char *, unsigned int, int));
148static tree lex_charconst PARAMS ((const char *, unsigned int, int));
149static void update_header_times PARAMS ((const char *));
150static int dump_one_header PARAMS ((splay_tree_node, void *));
e8bbfc4e 151
0e5921e8 152#if !USE_CPPLIB
6e090c76
KG
153static int skip_white_space PARAMS ((int));
154static char *extend_token_buffer PARAMS ((const char *));
6e090c76
KG
155static void extend_token_buffer_to PARAMS ((int));
156static int read_line_number PARAMS ((int *));
0e5921e8
ZW
157static void process_directive PARAMS ((void));
158#else
159static void cb_ident PARAMS ((cpp_reader *, const unsigned char *,
160 unsigned int));
161static void cb_enter_file PARAMS ((cpp_reader *));
162static void cb_leave_file PARAMS ((cpp_reader *));
163static void cb_rename_file PARAMS ((cpp_reader *));
8b97c5f8 164static void cb_def_pragma PARAMS ((cpp_reader *));
0e5921e8 165#endif
e31c7eec 166
e31c7eec 167\f
3b304f5b 168const char *
0e5921e8 169init_c_lex (filename)
3b304f5b 170 const char *filename;
e3d1fd32 171{
0e5921e8
ZW
172 struct c_fileinfo *toplevel;
173
174 /* Set up filename timing. Must happen before cpp_start_read. */
175 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
176 0,
177 (splay_tree_delete_value_fn)free);
178 toplevel = get_fileinfo ("<top level>");
179 if (flag_detailed_statistics)
180 {
181 header_time = 0;
182 body_time = get_run_time ();
183 toplevel->time = body_time;
184 }
185
186#ifdef MULTIBYTE_CHARS
187 /* Change to the native locale for multibyte conversions. */
188 setlocale (LC_CTYPE, "");
189 GET_ENVIRONMENT (literal_codeset, "LANG");
190#endif
191
e56e519d
DB
192#if !USE_CPPLIB
193 /* Open input file. */
194 if (filename == 0 || !strcmp (filename, "-"))
195 {
196 finput = stdin;
197 filename = "stdin";
198 }
199 else
200 finput = fopen (filename, "r");
201 if (finput == 0)
202 pfatal_with_name (filename);
203
204#ifdef IO_BUFFER_SIZE
205 setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
206#endif
b12da25e 207#else /* !USE_CPPLIB */
0e5921e8
ZW
208
209 parse_in.cb.ident = cb_ident;
210 parse_in.cb.enter_file = cb_enter_file;
211 parse_in.cb.leave_file = cb_leave_file;
212 parse_in.cb.rename_file = cb_rename_file;
8b97c5f8 213 parse_in.cb.def_pragma = cb_def_pragma;
0e5921e8
ZW
214
215 /* Make sure parse_in.digraphs matches flag_digraphs. */
216 CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
217
218 if (! cpp_start_read (&parse_in, 0 /* no printer */, filename))
e3d1fd32 219 abort ();
add7091b 220
b12da25e
ZW
221 if (filename == 0 || !strcmp (filename, "-"))
222 filename = "stdin";
e56e519d 223#endif
5c60e5c0 224
0e5921e8
ZW
225#if !USE_CPPLIB
226 maxtoken = 40;
227 token_buffer = (char *) xmalloc (maxtoken + 2);
228#endif
229 /* Start it at 0, because check_newline is called at the very beginning
230 and will increment it to 1. */
231 lineno = lex_lineno = 0;
b12da25e 232
5c60e5c0 233 return filename;
e3d1fd32
PB
234}
235
0e5921e8
ZW
236struct c_fileinfo *
237get_fileinfo (name)
238 const char *name;
e3d1fd32 239{
0e5921e8
ZW
240 splay_tree_node n;
241 struct c_fileinfo *fi;
242
243 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
244 if (n)
245 return (struct c_fileinfo *) n->value;
246
247 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
248 fi->time = 0;
249 fi->interface_only = 0;
250 fi->interface_unknown = 1;
251 splay_tree_insert (file_info_tree, (splay_tree_key) name,
252 (splay_tree_value) fi);
253 return fi;
e56e519d 254}
e3d1fd32 255
0e5921e8
ZW
256static void
257update_header_times (name)
258 const char *name;
e8bbfc4e 259{
0e5921e8
ZW
260 /* Changing files again. This means currently collected time
261 is charged against header time, and body time starts back at 0. */
262 if (flag_detailed_statistics)
e8bbfc4e 263 {
0e5921e8
ZW
264 int this_time = get_run_time ();
265 struct c_fileinfo *file = get_fileinfo (name);
266 header_time += this_time - body_time;
267 file->time += this_time - body_time;
268 body_time = this_time;
e8bbfc4e
RK
269 }
270}
271
0e5921e8
ZW
272static int
273dump_one_header (n, dummy)
274 splay_tree_node n;
275 void *dummy ATTRIBUTE_UNUSED;
e8bbfc4e 276{
0e5921e8
ZW
277 print_time ((const char *) n->key,
278 ((struct c_fileinfo *) n->value)->time);
279 return 0;
e8bbfc4e 280}
e8bbfc4e
RK
281
282void
0e5921e8 283dump_time_statistics ()
e8bbfc4e 284{
0e5921e8
ZW
285 struct c_fileinfo *file = get_fileinfo (input_filename);
286 int this_time = get_run_time ();
287 file->time += this_time - body_time;
288
289 fprintf (stderr, "\n******\n");
290 print_time ("header files (total)", header_time);
291 print_time ("main file (total)", this_time - body_time);
292 fprintf (stderr, "ratio = %g : 1\n",
293 (double)header_time / (double)(this_time - body_time));
294 fprintf (stderr, "\n******\n");
295
296 splay_tree_foreach (file_info_tree, dump_one_header, 0);
e8bbfc4e 297}
a6124a42 298
0e5921e8 299#if !USE_CPPLIB
a6124a42 300
e8bbfc4e
RK
301/* If C is not whitespace, return C.
302 Otherwise skip whitespace and return first nonwhite char read. */
303
304static int
305skip_white_space (c)
306 register int c;
307{
e8bbfc4e
RK
308 for (;;)
309 {
310 switch (c)
311 {
0e5921e8
ZW
312 /* There is no need to process comments, backslash-newline,
313 or \r here. None can occur in the output of cpp. */
e8bbfc4e
RK
314
315 case '\n':
fbb18613
JM
316 if (linemode)
317 {
0e5921e8 318 put_back (c);
fbb18613
JM
319 return EOF;
320 }
e8bbfc4e
RK
321 c = check_newline ();
322 break;
323
0e5921e8 324 /* Per C99, horizontal whitespace is just these four characters. */
e8bbfc4e
RK
325 case ' ':
326 case '\t':
327 case '\f':
e8bbfc4e 328 case '\v':
0e5921e8 329 c = getch ();
0dcd8cee
RS
330 break;
331
e8bbfc4e 332 case '\\':
0e5921e8
ZW
333 error ("stray '\\' in program");
334 c = getch ();
e8bbfc4e
RK
335 break;
336
337 default:
338 return (c);
339 }
340 }
341}
342
fbb18613 343/* Skips all of the white space at the current location in the input file. */
e8bbfc4e
RK
344
345void
346position_after_white_space ()
347{
348 register int c;
349
0e5921e8 350 c = getch ();
e8bbfc4e 351
0e5921e8 352 put_back (skip_white_space (c));
e8bbfc4e
RK
353}
354
355/* Make the token buffer longer, preserving the data in it.
356 P should point to just beyond the last valid character in the old buffer.
357 The value we return is a pointer to the new buffer
358 at a place corresponding to P. */
359
fbb18613
JM
360static void
361extend_token_buffer_to (size)
362 int size;
363{
364 do
365 maxtoken = maxtoken * 2 + 10;
366 while (maxtoken < size);
367 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
368}
369
e8bbfc4e
RK
370static char *
371extend_token_buffer (p)
5d5993dd 372 const char *p;
e8bbfc4e
RK
373{
374 int offset = p - token_buffer;
fbb18613 375 extend_token_buffer_to (offset);
e8bbfc4e
RK
376 return token_buffer + offset;
377}
e3d1fd32 378\f
f09db6e0 379
fbb18613
JM
380static int
381read_line_number (num)
382 int *num;
383{
0e5921e8
ZW
384 tree value;
385 enum cpp_ttype token = c_lex (&value);
fbb18613 386
0e5921e8 387 if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
fbb18613 388 {
0e5921e8 389 *num = TREE_INT_CST_LOW (value);
fbb18613
JM
390 return 1;
391 }
392 else
393 {
0e5921e8 394 if (token != CPP_EOF)
fbb18613
JM
395 error ("invalid #-line");
396 return 0;
397 }
398}
0e5921e8 399
e8bbfc4e
RK
400/* At the beginning of a line, increment the line number
401 and process any #-directive on this line.
402 If the line is a #-directive, read the entire line and return a newline.
0e5921e8 403 Otherwise, return the line's first non-whitespace character. */
e8bbfc4e
RK
404
405int
406check_newline ()
407{
408 register int c;
e8bbfc4e 409
0e5921e8
ZW
410 /* Loop till we get a nonblank, non-directive line. */
411 for (;;)
e8bbfc4e 412 {
0e5921e8
ZW
413 /* Read first nonwhite char on the line. */
414 do
415 c = getch ();
416 while (c == ' ' || c == '\t');
417
418 lex_lineno++;
419 if (c == '#')
420 {
421 process_directive ();
422 return '\n';
423 }
fbb18613 424
0e5921e8
ZW
425 else if (c != '\n')
426 break;
e8bbfc4e 427 }
0e5921e8
ZW
428 return c;
429}
e8bbfc4e 430
0e5921e8
ZW
431static void
432process_directive ()
433{
434 enum cpp_ttype token;
435 tree value;
436 int saw_line;
437 enum { act_none, act_push, act_pop } action;
438 int action_number, l;
439 char *new_file;
440#ifndef NO_IMPLICIT_EXTERN_C
ae54392b 441 int entering_c_header = 0;
0e5921e8
ZW
442#endif
443
fbb18613
JM
444 /* Don't read beyond this line. */
445 saw_line = 0;
446 linemode = 1;
447
0e5921e8 448 token = c_lex (&value);
e8bbfc4e 449
0e5921e8 450 if (token == CPP_NAME)
e8bbfc4e 451 {
fbb18613
JM
452 /* If a letter follows, then if the word here is `line', skip
453 it and ignore it; otherwise, ignore the line, with an error
454 if the word isn't `pragma'. */
75cb8865 455
0e5921e8 456 const char *name = IDENTIFIER_POINTER (value);
75cb8865 457
fbb18613
JM
458 if (!strcmp (name, "pragma"))
459 {
0e5921e8 460 dispatch_pragma ();
30acbc3e 461 goto skipline;
e8bbfc4e 462 }
fbb18613 463 else if (!strcmp (name, "define"))
e8bbfc4e 464 {
0e5921e8 465 debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
fbb18613 466 goto skipline;
e8bbfc4e 467 }
fbb18613 468 else if (!strcmp (name, "undef"))
e8bbfc4e 469 {
0e5921e8 470 debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
fbb18613 471 goto skipline;
e8bbfc4e 472 }
fbb18613 473 else if (!strcmp (name, "line"))
e8bbfc4e 474 {
fbb18613 475 saw_line = 1;
0e5921e8 476 token = c_lex (&value);
fbb18613 477 goto linenum;
e8bbfc4e 478 }
fbb18613 479 else if (!strcmp (name, "ident"))
e8bbfc4e 480 {
0e5921e8
ZW
481 /* #ident. We expect a string constant here.
482 The pedantic warning and syntax error are now in cpp. */
e8bbfc4e 483
0e5921e8
ZW
484 token = c_lex (&value);
485 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
fbb18613 486 goto skipline;
e8bbfc4e 487
0e5921e8 488#ifdef ASM_OUTPUT_IDENT
fbb18613
JM
489 if (! flag_no_ident)
490 {
0e5921e8 491 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
e8bbfc4e 492 }
0e5921e8 493#endif
fbb18613
JM
494
495 /* Skip the rest of this line. */
496 goto skipline;
e8bbfc4e
RK
497 }
498
fbb18613 499 error ("undefined or invalid # directive `%s'", name);
e8bbfc4e
RK
500 goto skipline;
501 }
502
fbb18613
JM
503 /* If the # is the only nonwhite char on the line,
504 just ignore it. Check the new newline. */
0e5921e8 505 if (token == CPP_EOF)
fbb18613
JM
506 goto skipline;
507
e8bbfc4e
RK
508linenum:
509 /* Here we have either `#line' or `# <nonletter>'.
510 In either case, it should be a line number; a digit should follow. */
511
0e5921e8 512 if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
fbb18613
JM
513 {
514 error ("invalid #-line");
515 goto skipline;
516 }
e8bbfc4e 517
fbb18613
JM
518 /* subtract one, because it is the following line that
519 gets the specified number */
e8bbfc4e 520
0e5921e8 521 l = TREE_INT_CST_LOW (value) - 1;
e8bbfc4e 522
fbb18613
JM
523 /* More follows: it must be a string constant (filename).
524 It would be neat to use cpplib to quickly process the string, but
525 (1) we don't have a handy tokenization of the string, and
526 (2) I don't know how well that would work in the presense
527 of filenames that contain wide characters. */
e8bbfc4e 528
fbb18613 529 if (saw_line)
e8bbfc4e 530 {
fbb18613
JM
531 /* Don't treat \ as special if we are processing #line 1 "...".
532 If you want it to be treated specially, use # 1 "...". */
533 ignore_escape_flag = 1;
534 }
e8bbfc4e 535
fbb18613 536 /* Read the string constant. */
0e5921e8 537 token = c_lex (&value);
e8bbfc4e 538
fbb18613 539 ignore_escape_flag = 0;
e8bbfc4e 540
0e5921e8 541 if (token == CPP_EOF)
fbb18613
JM
542 {
543 /* No more: store the line number and check following line. */
0e5921e8 544 lex_lineno = l;
fbb18613
JM
545 goto skipline;
546 }
e8bbfc4e 547
0e5921e8 548 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
fbb18613
JM
549 {
550 error ("invalid #line");
551 goto skipline;
552 }
e8bbfc4e 553
0e5921e8 554 new_file = TREE_STRING_POINTER (value);
e8bbfc4e 555
fbb18613 556 if (main_input_filename == 0)
0e5921e8 557 main_input_filename = new_file;
e8bbfc4e 558
fbb18613
JM
559 action = act_none;
560 action_number = 0;
e8bbfc4e 561
fbb18613
JM
562 /* Each change of file name
563 reinitializes whether we are now in a system header. */
564 in_system_header = 0;
e8bbfc4e 565
fbb18613
JM
566 if (!read_line_number (&action_number))
567 {
568 /* Update the name in the top element of input_file_stack. */
5b450ae5
RS
569 if (input_file_stack)
570 input_file_stack->name = input_filename;
fbb18613 571 }
5b450ae5 572
fbb18613
JM
573 /* `1' after file name means entering new file.
574 `2' after file name means just left a file. */
e8bbfc4e 575
fbb18613
JM
576 if (action_number == 1)
577 {
578 action = act_push;
579 read_line_number (&action_number);
580 }
581 else if (action_number == 2)
582 {
583 action = act_pop;
584 read_line_number (&action_number);
585 }
586 if (action_number == 3)
587 {
e8bbfc4e 588 /* `3' after file name means this is a system header file. */
fbb18613
JM
589 in_system_header = 1;
590 read_line_number (&action_number);
591 }
0e5921e8
ZW
592#ifndef NO_IMPLICIT_EXTERN_C
593 if (action_number == 4)
594 {
595 /* `4' after file name means this is a C header file. */
596 entering_c_header = 1;
597 read_line_number (&action_number);
598 }
599#endif
e8bbfc4e 600
fbb18613 601 /* Do the actions implied by the preceding numbers. */
fbb18613
JM
602 if (action == act_push)
603 {
0e5921e8
ZW
604 lineno = lex_lineno;
605 push_srcloc (input_filename, 1);
606 input_file_stack->indent_level = indent_level;
fbb18613 607 debug_start_source_file (input_filename);
0e5921e8
ZW
608#ifndef NO_IMPLICIT_EXTERN_C
609 if (c_header_level)
610 ++c_header_level;
611 else if (entering_c_header)
612 {
613 c_header_level = 1;
614 ++pending_lang_change;
615 }
616#endif
fbb18613
JM
617 }
618 else if (action == act_pop)
619 {
620 /* Popping out of a file. */
621 if (input_file_stack->next)
0468bc75 622 {
0e5921e8
ZW
623#ifndef NO_IMPLICIT_EXTERN_C
624 if (c_header_level && --c_header_level == 0)
625 {
626 if (entering_c_header)
627 warning ("badly nested C headers from preprocessor");
628 --pending_lang_change;
629 }
630#endif
631#if 0
632 if (indent_level != input_file_stack->indent_level)
fbb18613
JM
633 {
634 warning_with_file_and_line
0e5921e8
ZW
635 (input_filename, lex_lineno,
636 "This file contains more '%c's than '%c's.",
637 indent_level > input_file_stack->indent_level ? '{' : '}',
638 indent_level > input_file_stack->indent_level ? '}' : '{');
fbb18613 639 }
0e5921e8
ZW
640#endif
641 pop_srcloc ();
fbb18613 642 debug_end_source_file (input_file_stack->line);
0468bc75 643 }
fbb18613
JM
644 else
645 error ("#-lines for entering and leaving files don't match");
e8bbfc4e 646 }
fbb18613 647
0e5921e8
ZW
648 update_header_times (new_file);
649
650 input_filename = new_file;
651 lex_lineno = l;
652
653 /* Hook for C++. */
654 extract_interface_info ();
e8bbfc4e
RK
655
656 /* skip the rest of this line. */
657 skipline:
fbb18613 658 linemode = 0;
fbb18613 659
0e5921e8 660 while (getch () != '\n');
e8bbfc4e 661}
0e5921e8 662#else /* USE_CPPLIB */
ca5b800a 663
0e5921e8
ZW
664/* Not yet handled: #pragma, #define, #undef.
665 No need to deal with linemarkers under normal conditions. */
ca5b800a 666
0e5921e8
ZW
667static void
668cb_ident (pfile, str, len)
669 cpp_reader *pfile ATTRIBUTE_UNUSED;
670 const unsigned char *str;
671 unsigned int len;
672{
673#ifdef ASM_OUTPUT_IDENT
674 if (! flag_no_ident)
675 {
676 /* Convert escapes in the string. */
677 tree value = lex_string ((const char *)str, len, 0);
678 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
679 }
680#endif
681}
ca5b800a 682
0e5921e8
ZW
683static void
684cb_enter_file (pfile)
685 cpp_reader *pfile;
ca5b800a 686{
0e5921e8
ZW
687 cpp_buffer *ip = CPP_BUFFER (pfile);
688 /* Bleah, need a better interface to this. */
689 const char *flags = cpp_syshdr_flags (pfile, ip);
690
691 /* Mustn't stack the main buffer on the input stack. (Ick.) */
692 if (ip->prev)
693 {
694 lex_lineno = lineno = ip->prev->lineno - 1;
695 push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1);
696 input_file_stack->indent_level = indent_level;
697 debug_start_source_file (ip->nominal_fname);
698 }
699 else
700 lex_lineno = 1;
701
702 update_header_times (ip->nominal_fname);
703
704 /* Hook for C++. */
705 extract_interface_info ();
706
707 in_system_header = (flags[0] != 0);
708#ifndef NO_IMPLICIT_EXTERN_C
709 if (c_header_level)
710 ++c_header_level;
711 else if (flags[2] != 0)
712 {
713 c_header_level = 1;
714 ++pending_lang_change;
715 }
716#endif
717}
718
719static void
720cb_leave_file (pfile)
721 cpp_reader *pfile;
722{
723 /* Bleah, need a better interface to this. */
724 const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
725#if 0
726 if (indent_level != input_file_stack->indent_level)
727 {
728 warning_with_file_and_line
729 (input_filename, lex_lineno,
730 "This file contains more '%c's than '%c's.",
731 indent_level > input_file_stack->indent_level ? '{' : '}',
732 indent_level > input_file_stack->indent_level ? '}' : '{');
733 }
734#endif
735 /* We get called for the main buffer, but we mustn't pop it. */
736 if (input_file_stack->next)
737 pop_srcloc ();
738 in_system_header = (flags[0] != 0);
739#ifndef NO_IMPLICIT_EXTERN_C
740 if (c_header_level && --c_header_level == 0)
741 {
742 if (flags[2] != 0)
743 warning ("badly nested C headers from preprocessor");
744 --pending_lang_change;
745 }
746#endif
747 lex_lineno = CPP_BUFFER (pfile)->lineno;
748 debug_end_source_file (input_file_stack->line);
749
750 update_header_times (input_file_stack->name);
751 /* Hook for C++. */
752 extract_interface_info ();
753}
754
755static void
756cb_rename_file (pfile)
757 cpp_reader *pfile;
758{
759 cpp_buffer *ip = CPP_BUFFER (pfile);
760 /* Bleah, need a better interface to this. */
761 const char *flags = cpp_syshdr_flags (pfile, ip);
762 input_filename = ggc_alloc_string (ip->nominal_fname, -1);
763 lex_lineno = ip->lineno;
764 in_system_header = (flags[0] != 0);
765
766 update_header_times (ip->nominal_fname);
767 /* Hook for C++. */
768 extract_interface_info ();
769}
8b97c5f8
ZW
770
771static void
772cb_def_pragma (pfile)
773 cpp_reader *pfile;
774{
775 /* Issue a warning message if we have been asked to do so. Ignore
776 unknown pragmas in system headers unless an explicit
777 -Wunknown-pragmas has been given. */
778 if (warn_unknown_pragmas > in_system_header)
779 {
780 const unsigned char *space, *name;
781 const cpp_token *t = pfile->first_directive_token + 2;
782
783 space = t[0].val.node->name;
784 name = t[1].type == CPP_NAME ? t[1].val.node->name : 0;
785 if (name)
786 warning ("ignoring #pragma %s %s", space, name);
787 else
788 warning ("ignoring #pragma %s", space);
789 }
790}
0e5921e8
ZW
791#endif /* USE_CPPLIB */
792
793/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
794
795 [lex.charset]: The character designated by the universal-character-name
796 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
797 is NNNNNNNN; the character designated by the universal-character-name
798 \uNNNN is that character whose character short name in ISO/IEC 10646 is
799 0000NNNN. If the hexadecimal value for a universal character name is
800 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
801 universal character name designates a character in the basic source
802 character set, then the program is ill-formed.
803
804 We assume that wchar_t is Unicode, so we don't need to do any
805 mapping. Is this ever wrong? */
806
807static const char *
808read_ucs (p, limit, cptr, length)
809 const char *p;
810 const char *limit;
811 unsigned int *cptr;
812 int length;
813{
814 unsigned int code = 0;
815 int c;
816
817 for (; length; --length)
ca5b800a 818 {
0e5921e8 819 if (p >= limit)
a3100298 820 {
0e5921e8 821 error ("incomplete universal-character-name");
a3100298 822 break;
0e5921e8 823 }
fbb18613 824
0e5921e8
ZW
825 c = *p++;
826 if (! ISXDIGIT (c))
827 {
828 error ("non hex digit '%c' in universal-character-name", c);
829 p--;
830 break;
a3100298 831 }
f09db6e0 832
0e5921e8
ZW
833 code <<= 4;
834 if (c >= 'a' && c <= 'f')
835 code += c - 'a' + 10;
836 if (c >= 'A' && c <= 'F')
837 code += c - 'A' + 10;
838 if (c >= '0' && c <= '9')
839 code += c - '0';
ca5b800a 840 }
ca5b800a 841
0e5921e8
ZW
842#ifdef TARGET_EBCDIC
843 sorry ("universal-character-name on EBCDIC target");
844 *cptr = 0x3f; /* EBCDIC invalid character */
845 return p;
846#endif
847
848 if (code > 0x9f && !(code & 0x80000000))
849 /* True extended character, OK. */;
850 else if (code >= 0x20 && code < 0x7f)
851 {
852 /* ASCII printable character. The C character set consists of all of
853 these except $, @ and `. We use hex escapes so that this also
854 works with EBCDIC hosts. */
855 if (code != 0x24 && code != 0x40 && code != 0x60)
856 error ("universal-character-name used for '%c'", code);
857 }
858 else
859 error ("invalid universal-character-name");
860
861 *cptr = code;
862 return p;
863}
e8bbfc4e 864
0e5921e8
ZW
865/* Read an escape sequence and write its character equivalent into *CPTR.
866 P is the input pointer, which is just after the backslash. LIMIT
867 is how much text we have.
868 Returns the updated input pointer. */
e8bbfc4e 869
0e5921e8
ZW
870static const char *
871readescape (p, limit, cptr)
872 const char *p;
873 const char *limit;
874 unsigned int *cptr;
e8bbfc4e 875{
0e5921e8 876 unsigned int c, code, count;
1c7b145e 877 unsigned firstdig = 0;
8696da34 878 int nonnull;
e8bbfc4e 879
0e5921e8
ZW
880 if (p == limit)
881 {
882 /* cpp has already issued an error for this. */
883 *cptr = 0;
884 return p;
885 }
886
887 c = *p++;
888
e8bbfc4e
RK
889 switch (c)
890 {
891 case 'x':
cde6e684 892 if (warn_traditional && !in_system_header)
e8bbfc4e
RK
893 warning ("the meaning of `\\x' varies with -traditional");
894
895 if (flag_traditional)
0e5921e8
ZW
896 {
897 *cptr = 'x';
898 return p;
899 }
e8bbfc4e
RK
900
901 code = 0;
902 count = 0;
8696da34 903 nonnull = 0;
0e5921e8 904 while (p < limit)
e8bbfc4e 905 {
0e5921e8 906 c = *p++;
fbb18613 907 if (! ISXDIGIT (c))
e8bbfc4e 908 {
0e5921e8 909 p--;
e8bbfc4e
RK
910 break;
911 }
912 code *= 16;
913 if (c >= 'a' && c <= 'f')
914 code += c - 'a' + 10;
915 if (c >= 'A' && c <= 'F')
916 code += c - 'A' + 10;
917 if (c >= '0' && c <= '9')
918 code += c - '0';
8696da34
RS
919 if (code != 0 || count != 0)
920 {
921 if (count == 0)
922 firstdig = code;
923 count++;
924 }
925 nonnull = 1;
e8bbfc4e 926 }
8696da34 927 if (! nonnull)
4082292a
AO
928 {
929 warning ("\\x used with no following hex digits");
0e5921e8
ZW
930 *cptr = 'x';
931 return p;
4082292a 932 }
be63d912
RS
933 else if (count == 0)
934 /* Digits are all 0's. Ok. */
935 ;
e8bbfc4e
RK
936 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
937 || (count > 1
fbb18613
JM
938 && (((unsigned)1
939 << (TYPE_PRECISION (integer_type_node)
940 - (count - 1) * 4))
e8bbfc4e
RK
941 <= firstdig)))
942 pedwarn ("hex escape out of range");
0e5921e8
ZW
943 *cptr = code;
944 return p;
e8bbfc4e
RK
945
946 case '0': case '1': case '2': case '3': case '4':
947 case '5': case '6': case '7':
948 code = 0;
0e5921e8 949 for (count = 0; count < 3; count++)
e8bbfc4e 950 {
0e5921e8
ZW
951 if (c < '0' || c > '7')
952 {
953 p--;
954 break;
955 }
e8bbfc4e 956 code = (code * 8) + (c - '0');
0e5921e8
ZW
957 if (p == limit)
958 break;
959 c = *p++;
e8bbfc4e 960 }
e8bbfc4e 961
0e5921e8
ZW
962 if (count == 3)
963 p--;
e8bbfc4e 964
0e5921e8
ZW
965 *cptr = code;
966 return p;
e8bbfc4e 967
0e5921e8
ZW
968 case '\\': case '\'': case '"': case '?':
969 *cptr = c;
970 return p;
e8bbfc4e 971
0e5921e8
ZW
972 case 'n': *cptr = TARGET_NEWLINE; return p;
973 case 't': *cptr = TARGET_TAB; return p;
974 case 'r': *cptr = TARGET_CR; return p;
975 case 'f': *cptr = TARGET_FF; return p;
976 case 'b': *cptr = TARGET_BS; return p;
977 case 'v': *cptr = TARGET_VT; return p;
978 case 'a':
979 if (warn_traditional && !in_system_header)
980 warning ("the meaning of '\\a' varies with -traditional");
981 *cptr = flag_traditional ? c : TARGET_BELL;
982 return p;
e8bbfc4e 983
0e5921e8
ZW
984 /* Warnings and support checks handled by read_ucs(). */
985 case 'u': case 'U':
986 if (c_language != clk_cplusplus && !flag_isoc99)
987 break;
e8bbfc4e 988
cde6e684 989 if (warn_traditional && !in_system_header)
0e5921e8 990 warning ("the meaning of '\\%c' varies with -traditional", c);
e8bbfc4e 991
0e5921e8
ZW
992 return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
993
994 case 'e': case 'E':
dad112ca 995 if (pedantic)
0e5921e8
ZW
996 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
997 *cptr = TARGET_ESC; return p;
e8bbfc4e 998
0e5921e8
ZW
999 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1000 '\%' is used to prevent SCCS from getting confused. */
1001 case '(': case '{': case '[': case '%':
e8bbfc4e 1002 if (pedantic)
0e5921e8
ZW
1003 pedwarn ("unknown escape sequence '\\%c'", c);
1004 *cptr = c;
1005 return p;
e8bbfc4e 1006 }
0e5921e8 1007
fbb18613 1008 if (ISGRAPH (c))
0e5921e8 1009 pedwarn ("unknown escape sequence '\\%c'", c);
e8bbfc4e 1010 else
0e5921e8
ZW
1011 pedwarn ("unknown escape sequence: '\\' followed by char 0x%.2x", c);
1012
1013 *cptr = c;
1014 return p;
e8bbfc4e 1015}
0e5921e8
ZW
1016
1017#if 0 /* not yet */
1018/* Returns nonzero if C is a universal-character-name. Give an error if it
1019 is not one which may appear in an identifier, as per [extendid].
1020
1021 Note that extended character support in identifiers has not yet been
1022 implemented. It is my personal opinion that this is not a desirable
1023 feature. Portable code cannot count on support for more than the basic
1024 identifier character set. */
1025
1026static inline int
1027is_extended_char (c)
1028 int c;
1029{
1030#ifdef TARGET_EBCDIC
1031 return 0;
1032#else
1033 /* ASCII. */
1034 if (c < 0x7f)
1035 return 0;
1036
1037 /* None of the valid chars are outside the Basic Multilingual Plane (the
1038 low 16 bits). */
1039 if (c > 0xffff)
1040 {
1041 error ("universal-character-name '\\U%08x' not valid in identifier", c);
1042 return 1;
1043 }
1044
1045 /* Latin */
1046 if ((c >= 0x00c0 && c <= 0x00d6)
1047 || (c >= 0x00d8 && c <= 0x00f6)
1048 || (c >= 0x00f8 && c <= 0x01f5)
1049 || (c >= 0x01fa && c <= 0x0217)
1050 || (c >= 0x0250 && c <= 0x02a8)
1051 || (c >= 0x1e00 && c <= 0x1e9a)
1052 || (c >= 0x1ea0 && c <= 0x1ef9))
1053 return 1;
1054
1055 /* Greek */
1056 if ((c == 0x0384)
1057 || (c >= 0x0388 && c <= 0x038a)
1058 || (c == 0x038c)
1059 || (c >= 0x038e && c <= 0x03a1)
1060 || (c >= 0x03a3 && c <= 0x03ce)
1061 || (c >= 0x03d0 && c <= 0x03d6)
1062 || (c == 0x03da)
1063 || (c == 0x03dc)
1064 || (c == 0x03de)
1065 || (c == 0x03e0)
1066 || (c >= 0x03e2 && c <= 0x03f3)
1067 || (c >= 0x1f00 && c <= 0x1f15)
1068 || (c >= 0x1f18 && c <= 0x1f1d)
1069 || (c >= 0x1f20 && c <= 0x1f45)
1070 || (c >= 0x1f48 && c <= 0x1f4d)
1071 || (c >= 0x1f50 && c <= 0x1f57)
1072 || (c == 0x1f59)
1073 || (c == 0x1f5b)
1074 || (c == 0x1f5d)
1075 || (c >= 0x1f5f && c <= 0x1f7d)
1076 || (c >= 0x1f80 && c <= 0x1fb4)
1077 || (c >= 0x1fb6 && c <= 0x1fbc)
1078 || (c >= 0x1fc2 && c <= 0x1fc4)
1079 || (c >= 0x1fc6 && c <= 0x1fcc)
1080 || (c >= 0x1fd0 && c <= 0x1fd3)
1081 || (c >= 0x1fd6 && c <= 0x1fdb)
1082 || (c >= 0x1fe0 && c <= 0x1fec)
1083 || (c >= 0x1ff2 && c <= 0x1ff4)
1084 || (c >= 0x1ff6 && c <= 0x1ffc))
1085 return 1;
1086
1087 /* Cyrillic */
1088 if ((c >= 0x0401 && c <= 0x040d)
1089 || (c >= 0x040f && c <= 0x044f)
1090 || (c >= 0x0451 && c <= 0x045c)
1091 || (c >= 0x045e && c <= 0x0481)
1092 || (c >= 0x0490 && c <= 0x04c4)
1093 || (c >= 0x04c7 && c <= 0x04c8)
1094 || (c >= 0x04cb && c <= 0x04cc)
1095 || (c >= 0x04d0 && c <= 0x04eb)
1096 || (c >= 0x04ee && c <= 0x04f5)
1097 || (c >= 0x04f8 && c <= 0x04f9))
1098 return 1;
1099
1100 /* Armenian */
1101 if ((c >= 0x0531 && c <= 0x0556)
1102 || (c >= 0x0561 && c <= 0x0587))
1103 return 1;
1104
1105 /* Hebrew */
1106 if ((c >= 0x05d0 && c <= 0x05ea)
1107 || (c >= 0x05f0 && c <= 0x05f4))
1108 return 1;
1109
1110 /* Arabic */
1111 if ((c >= 0x0621 && c <= 0x063a)
1112 || (c >= 0x0640 && c <= 0x0652)
1113 || (c >= 0x0670 && c <= 0x06b7)
1114 || (c >= 0x06ba && c <= 0x06be)
1115 || (c >= 0x06c0 && c <= 0x06ce)
1116 || (c >= 0x06e5 && c <= 0x06e7))
1117 return 1;
1118
1119 /* Devanagari */
1120 if ((c >= 0x0905 && c <= 0x0939)
1121 || (c >= 0x0958 && c <= 0x0962))
1122 return 1;
1123
1124 /* Bengali */
1125 if ((c >= 0x0985 && c <= 0x098c)
1126 || (c >= 0x098f && c <= 0x0990)
1127 || (c >= 0x0993 && c <= 0x09a8)
1128 || (c >= 0x09aa && c <= 0x09b0)
1129 || (c == 0x09b2)
1130 || (c >= 0x09b6 && c <= 0x09b9)
1131 || (c >= 0x09dc && c <= 0x09dd)
1132 || (c >= 0x09df && c <= 0x09e1)
1133 || (c >= 0x09f0 && c <= 0x09f1))
1134 return 1;
1135
1136 /* Gurmukhi */
1137 if ((c >= 0x0a05 && c <= 0x0a0a)
1138 || (c >= 0x0a0f && c <= 0x0a10)
1139 || (c >= 0x0a13 && c <= 0x0a28)
1140 || (c >= 0x0a2a && c <= 0x0a30)
1141 || (c >= 0x0a32 && c <= 0x0a33)
1142 || (c >= 0x0a35 && c <= 0x0a36)
1143 || (c >= 0x0a38 && c <= 0x0a39)
1144 || (c >= 0x0a59 && c <= 0x0a5c)
1145 || (c == 0x0a5e))
1146 return 1;
1147
1148 /* Gujarati */
1149 if ((c >= 0x0a85 && c <= 0x0a8b)
1150 || (c == 0x0a8d)
1151 || (c >= 0x0a8f && c <= 0x0a91)
1152 || (c >= 0x0a93 && c <= 0x0aa8)
1153 || (c >= 0x0aaa && c <= 0x0ab0)
1154 || (c >= 0x0ab2 && c <= 0x0ab3)
1155 || (c >= 0x0ab5 && c <= 0x0ab9)
1156 || (c == 0x0ae0))
1157 return 1;
1158
1159 /* Oriya */
1160 if ((c >= 0x0b05 && c <= 0x0b0c)
1161 || (c >= 0x0b0f && c <= 0x0b10)
1162 || (c >= 0x0b13 && c <= 0x0b28)
1163 || (c >= 0x0b2a && c <= 0x0b30)
1164 || (c >= 0x0b32 && c <= 0x0b33)
1165 || (c >= 0x0b36 && c <= 0x0b39)
1166 || (c >= 0x0b5c && c <= 0x0b5d)
1167 || (c >= 0x0b5f && c <= 0x0b61))
1168 return 1;
1169
1170 /* Tamil */
1171 if ((c >= 0x0b85 && c <= 0x0b8a)
1172 || (c >= 0x0b8e && c <= 0x0b90)
1173 || (c >= 0x0b92 && c <= 0x0b95)
1174 || (c >= 0x0b99 && c <= 0x0b9a)
1175 || (c == 0x0b9c)
1176 || (c >= 0x0b9e && c <= 0x0b9f)
1177 || (c >= 0x0ba3 && c <= 0x0ba4)
1178 || (c >= 0x0ba8 && c <= 0x0baa)
1179 || (c >= 0x0bae && c <= 0x0bb5)
1180 || (c >= 0x0bb7 && c <= 0x0bb9))
1181 return 1;
1182
1183 /* Telugu */
1184 if ((c >= 0x0c05 && c <= 0x0c0c)
1185 || (c >= 0x0c0e && c <= 0x0c10)
1186 || (c >= 0x0c12 && c <= 0x0c28)
1187 || (c >= 0x0c2a && c <= 0x0c33)
1188 || (c >= 0x0c35 && c <= 0x0c39)
1189 || (c >= 0x0c60 && c <= 0x0c61))
1190 return 1;
1191
1192 /* Kannada */
1193 if ((c >= 0x0c85 && c <= 0x0c8c)
1194 || (c >= 0x0c8e && c <= 0x0c90)
1195 || (c >= 0x0c92 && c <= 0x0ca8)
1196 || (c >= 0x0caa && c <= 0x0cb3)
1197 || (c >= 0x0cb5 && c <= 0x0cb9)
1198 || (c >= 0x0ce0 && c <= 0x0ce1))
1199 return 1;
1200
1201 /* Malayalam */
1202 if ((c >= 0x0d05 && c <= 0x0d0c)
1203 || (c >= 0x0d0e && c <= 0x0d10)
1204 || (c >= 0x0d12 && c <= 0x0d28)
1205 || (c >= 0x0d2a && c <= 0x0d39)
1206 || (c >= 0x0d60 && c <= 0x0d61))
1207 return 1;
1208
1209 /* Thai */
1210 if ((c >= 0x0e01 && c <= 0x0e30)
1211 || (c >= 0x0e32 && c <= 0x0e33)
1212 || (c >= 0x0e40 && c <= 0x0e46)
1213 || (c >= 0x0e4f && c <= 0x0e5b))
1214 return 1;
1215
1216 /* Lao */
1217 if ((c >= 0x0e81 && c <= 0x0e82)
1218 || (c == 0x0e84)
1219 || (c == 0x0e87)
1220 || (c == 0x0e88)
1221 || (c == 0x0e8a)
1222 || (c == 0x0e0d)
1223 || (c >= 0x0e94 && c <= 0x0e97)
1224 || (c >= 0x0e99 && c <= 0x0e9f)
1225 || (c >= 0x0ea1 && c <= 0x0ea3)
1226 || (c == 0x0ea5)
1227 || (c == 0x0ea7)
1228 || (c == 0x0eaa)
1229 || (c == 0x0eab)
1230 || (c >= 0x0ead && c <= 0x0eb0)
1231 || (c == 0x0eb2)
1232 || (c == 0x0eb3)
1233 || (c == 0x0ebd)
1234 || (c >= 0x0ec0 && c <= 0x0ec4)
1235 || (c == 0x0ec6))
1236 return 1;
1237
1238 /* Georgian */
1239 if ((c >= 0x10a0 && c <= 0x10c5)
1240 || (c >= 0x10d0 && c <= 0x10f6))
1241 return 1;
1242
1243 /* Hiragana */
1244 if ((c >= 0x3041 && c <= 0x3094)
1245 || (c >= 0x309b && c <= 0x309e))
1246 return 1;
1247
1248 /* Katakana */
1249 if ((c >= 0x30a1 && c <= 0x30fe))
1250 return 1;
1251
1252 /* Bopmofo */
1253 if ((c >= 0x3105 && c <= 0x312c))
1254 return 1;
1255
1256 /* Hangul */
1257 if ((c >= 0x1100 && c <= 0x1159)
1258 || (c >= 0x1161 && c <= 0x11a2)
1259 || (c >= 0x11a8 && c <= 0x11f9))
1260 return 1;
1261
1262 /* CJK Unified Ideographs */
1263 if ((c >= 0xf900 && c <= 0xfa2d)
1264 || (c >= 0xfb1f && c <= 0xfb36)
1265 || (c >= 0xfb38 && c <= 0xfb3c)
1266 || (c == 0xfb3e)
1267 || (c >= 0xfb40 && c <= 0xfb41)
1268 || (c >= 0xfb42 && c <= 0xfb44)
1269 || (c >= 0xfb46 && c <= 0xfbb1)
1270 || (c >= 0xfbd3 && c <= 0xfd3f)
1271 || (c >= 0xfd50 && c <= 0xfd8f)
1272 || (c >= 0xfd92 && c <= 0xfdc7)
1273 || (c >= 0xfdf0 && c <= 0xfdfb)
1274 || (c >= 0xfe70 && c <= 0xfe72)
1275 || (c == 0xfe74)
1276 || (c >= 0xfe76 && c <= 0xfefc)
1277 || (c >= 0xff21 && c <= 0xff3a)
1278 || (c >= 0xff41 && c <= 0xff5a)
1279 || (c >= 0xff66 && c <= 0xffbe)
1280 || (c >= 0xffc2 && c <= 0xffc7)
1281 || (c >= 0xffca && c <= 0xffcf)
1282 || (c >= 0xffd2 && c <= 0xffd7)
1283 || (c >= 0xffda && c <= 0xffdc)
1284 || (c >= 0x4e00 && c <= 0x9fa5))
1285 return 1;
1286
1287 error ("universal-character-name '\\u%04x' not valid in identifier", c);
1288 return 1;
1289#endif
1290}
1291
1292/* Add the UTF-8 representation of C to the token_buffer. */
1293
1294static void
1295utf8_extend_token (c)
1296 int c;
e8bbfc4e 1297{
0e5921e8
ZW
1298 int shift, mask;
1299
1300 if (c <= 0x0000007f)
1301 {
1302 extend_token (c);
1303 return;
1304 }
1305 else if (c <= 0x000007ff)
1306 shift = 6, mask = 0xc0;
1307 else if (c <= 0x0000ffff)
1308 shift = 12, mask = 0xe0;
1309 else if (c <= 0x001fffff)
1310 shift = 18, mask = 0xf0;
1311 else if (c <= 0x03ffffff)
1312 shift = 24, mask = 0xf8;
e8bbfc4e 1313 else
0e5921e8
ZW
1314 shift = 30, mask = 0xfc;
1315
1316 extend_token (mask | (c >> shift));
1317 do
1318 {
1319 shift -= 6;
1320 extend_token ((unsigned char) (0x80 | (c >> shift)));
1321 }
1322 while (shift);
e8bbfc4e 1323}
0e5921e8 1324#endif
e8bbfc4e
RK
1325
1326#if 0
e8bbfc4e
RK
1327struct try_type
1328{
1329 tree *node_var;
1330 char unsigned_flag;
1331 char long_flag;
1332 char long_long_flag;
1333};
1334
75cb8865 1335struct try_type type_sequence[] =
e8bbfc4e
RK
1336{
1337 { &integer_type_node, 0, 0, 0},
1338 { &unsigned_type_node, 1, 0, 0},
1339 { &long_integer_type_node, 0, 1, 0},
1340 { &long_unsigned_type_node, 1, 1, 0},
1341 { &long_long_integer_type_node, 0, 1, 1},
1342 { &long_long_unsigned_type_node, 1, 1, 1}
1343};
1344#endif /* 0 */
1345\f
ed513abf
KG
1346struct pf_args
1347{
1348 /* Input */
0e5921e8
ZW
1349 const char *str;
1350 int fflag;
1351 int lflag;
ed513abf 1352 int base;
fbb18613 1353 /* Output */
ed513abf 1354 int conversion_errno;
ed513abf 1355 REAL_VALUE_TYPE value;
0e5921e8 1356 tree type;
ed513abf
KG
1357};
1358
1359static void
1360parse_float (data)
1361 PTR data;
1362{
1363 struct pf_args * args = (struct pf_args *) data;
0e5921e8
ZW
1364 const char *typename;
1365
fbb18613
JM
1366 args->conversion_errno = 0;
1367 args->type = double_type_node;
0e5921e8 1368 typename = "double";
ed513abf
KG
1369
1370 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1371 tells the desired precision of the binary result
1372 of decimal-to-binary conversion. */
1373
0e5921e8 1374 if (args->fflag)
ed513abf 1375 {
0e5921e8
ZW
1376 if (args->lflag)
1377 error ("both 'f' and 'l' suffixes on floating constant");
ed513abf
KG
1378
1379 args->type = float_type_node;
0e5921e8 1380 typename = "float";
ed513abf 1381 }
0e5921e8 1382 else if (args->lflag)
ed513abf
KG
1383 {
1384 args->type = long_double_type_node;
0e5921e8 1385 typename = "long double";
ed513abf 1386 }
0e5921e8 1387 else if (flag_single_precision_constant)
ed513abf 1388 {
0e5921e8
ZW
1389 args->type = float_type_node;
1390 typename = "float";
ed513abf 1391 }
0e5921e8
ZW
1392
1393 errno = 0;
1394 if (args->base == 16)
1395 args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1396 else
1397 args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1398
1399 args->conversion_errno = errno;
1400 /* A diagnostic is required here by some ISO C testsuites.
1401 This is not pedwarn, because some people don't want
1402 an error for this. */
1403 if (REAL_VALUE_ISINF (args->value) && pedantic)
1404 warning ("floating point number exceeds range of '%s'", typename);
ed513abf
KG
1405}
1406
0e5921e8
ZW
1407int
1408c_lex (value)
1409 tree *value;
fbb18613
JM
1410{
1411#if USE_CPPLIB
0e5921e8
ZW
1412 const cpp_token *tok;
1413 enum cpp_ttype type;
1414
1415 retry:
1416 timevar_push (TV_CPP);
1417 tok = cpp_get_token (&parse_in);
1418 timevar_pop (TV_CPP);
1419
1420 /* The C++ front end does horrible things with the current line
1421 number. To ensure an accurate line number, we must reset it
1422 every time we return a token. If we reset it from tok->line
1423 every time, we'll get line numbers inside macros referring to the
1424 macro definition; this is nice, but we don't want to change the
1425 behavior until integrated mode is the only option. So we keep our
1426 own idea of the line number, and reset it from tok->line at each
1427 new line (which never happens inside a macro). */
1428 if (tok->flags & BOL)
1429 lex_lineno = tok->line;
1430
1431 *value = NULL_TREE;
1432 lineno = lex_lineno;
1433 type = tok->type;
1434 switch (type)
1435 {
1436 case CPP_OPEN_BRACE: indent_level++; break;
1437 case CPP_CLOSE_BRACE: indent_level--; break;
1438
1439 /* Issue this error here, where we can get at tok->val.aux. */
1440 case CPP_OTHER:
1441 if (ISGRAPH (tok->val.aux))
1442 error ("stray '%c' in program", tok->val.aux);
1443 else
1444 error ("stray '\\%#o' in program", tok->val.aux);
1445 goto retry;
1446
1447 case CPP_DEFINED:
1448 type = CPP_NAME;
1449 case CPP_NAME:
1450 *value = get_identifier ((const char *)tok->val.node->name);
1451 break;
fbb18613 1452
0e5921e8
ZW
1453 case CPP_INT:
1454 case CPP_FLOAT:
1455 case CPP_NUMBER:
1456 *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len);
1457 break;
93868d11 1458
0e5921e8
ZW
1459 case CPP_CHAR:
1460 case CPP_WCHAR:
1461 *value = lex_charconst ((const char *)tok->val.str.text,
1462 tok->val.str.len, tok->type == CPP_WCHAR);
1463 break;
fbb18613 1464
0e5921e8
ZW
1465 case CPP_STRING:
1466 case CPP_WSTRING:
1467 case CPP_OSTRING:
1468 *value = lex_string ((const char *)tok->val.str.text,
1469 tok->val.str.len, tok->type == CPP_WSTRING);
1470 break;
fbb18613 1471
0e5921e8
ZW
1472 /* These tokens should not be visible outside cpplib. */
1473 case CPP_HEADER_NAME:
1474 case CPP_COMMENT:
1475 case CPP_MACRO_ARG:
1476 case CPP_PLACEMARKER:
1477 abort ();
1478
1479 default: break;
1480 }
1481
1482 return type;
1483
1484#else
1485 int c;
1486 char *p;
e8bbfc4e 1487 int wide_flag = 0;
e31c7eec 1488 int objc_flag = 0;
0e5921e8
ZW
1489 int charconst = 0;
1490
1491 *value = NULL_TREE;
e8bbfc4e 1492
0e5921e8
ZW
1493 retry:
1494 c = getch ();
e8bbfc4e
RK
1495
1496 /* Effectively do c = skip_white_space (c)
1497 but do it faster in the usual cases. */
1498 while (1)
1499 switch (c)
1500 {
e8bbfc4e
RK
1501 case ' ':
1502 case '\t':
1503 case '\f':
1504 case '\v':
0e5921e8 1505 c = getch ();
e8bbfc4e
RK
1506 break;
1507
1508 case '\n':
e8bbfc4e
RK
1509 c = skip_white_space (c);
1510 default:
1511 goto found_nonwhite;
1512 }
1513 found_nonwhite:
1514
0e5921e8 1515 lineno = lex_lineno;
e8bbfc4e
RK
1516
1517 switch (c)
1518 {
1519 case EOF:
0e5921e8 1520 return CPP_EOF;
e8bbfc4e 1521
e8bbfc4e
RK
1522 case 'L':
1523 /* Capital L may start a wide-string or wide-character constant. */
1524 {
0e5921e8
ZW
1525 register int c1 = getch();
1526 if (c1 == '\'')
e8bbfc4e
RK
1527 {
1528 wide_flag = 1;
1529 goto char_constant;
1530 }
0e5921e8 1531 if (c1 == '"')
e8bbfc4e
RK
1532 {
1533 wide_flag = 1;
1534 goto string_constant;
1535 }
0e5921e8 1536 put_back (c1);
e8bbfc4e
RK
1537 }
1538 goto letter;
1539
1540 case '@':
1541 if (!doing_objc_thang)
0e5921e8 1542 goto straychar;
e31c7eec 1543 else
e8bbfc4e 1544 {
e31c7eec 1545 /* '@' may start a constant string object. */
0e5921e8
ZW
1546 register int c1 = getch ();
1547 if (c1 == '"')
e31c7eec
TW
1548 {
1549 objc_flag = 1;
1550 goto string_constant;
1551 }
0e5921e8 1552 put_back (c1);
ddd5a7c1 1553 /* Fall through to treat '@' as the start of an identifier. */
e8bbfc4e
RK
1554 }
1555
e8bbfc4e
RK
1556 case 'A': case 'B': case 'C': case 'D': case 'E':
1557 case 'F': case 'G': case 'H': case 'I': case 'J':
1558 case 'K': case 'M': case 'N': case 'O':
1559 case 'P': case 'Q': case 'R': case 'S': case 'T':
1560 case 'U': case 'V': case 'W': case 'X': case 'Y':
1561 case 'Z':
1562 case 'a': case 'b': case 'c': case 'd': case 'e':
1563 case 'f': case 'g': case 'h': case 'i': case 'j':
1564 case 'k': case 'l': case 'm': case 'n': case 'o':
1565 case 'p': case 'q': case 'r': case 's': case 't':
1566 case 'u': case 'v': case 'w': case 'x': case 'y':
1567 case 'z':
1568 case '_':
f84cddb9 1569 case '$':
e8bbfc4e 1570 letter:
0e5921e8
ZW
1571 p = token_buffer;
1572 while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
e8bbfc4e 1573 {
0e5921e8
ZW
1574 /* Make sure this char really belongs in an identifier. */
1575 if (c == '$')
f84cddb9 1576 {
0e5921e8
ZW
1577 if (! dollars_in_ident)
1578 error ("'$' in identifier");
1579 else if (pedantic)
1580 pedwarn ("'$' in identifier");
fbb18613 1581 }
e8bbfc4e 1582
0e5921e8
ZW
1583 if (p >= token_buffer + maxtoken)
1584 p = extend_token_buffer (p);
e8bbfc4e 1585
0e5921e8
ZW
1586 *p++ = c;
1587 c = getch();
1588 }
75cb8865 1589
0e5921e8 1590 put_back (c);
e8bbfc4e 1591
0e5921e8
ZW
1592 if (p >= token_buffer + maxtoken)
1593 p = extend_token_buffer (p);
1594 *p = 0;
e8bbfc4e 1595
0e5921e8
ZW
1596 *value = get_identifier (token_buffer);
1597 return CPP_NAME;
e8bbfc4e 1598
fbb18613 1599 case '.':
fbb18613
JM
1600 {
1601 /* It's hard to preserve tokenization on '.' because
1602 it could be a symbol by itself, or it could be the
1603 start of a floating point number and cpp won't tell us. */
0e5921e8 1604 int c1 = getch ();
fbb18613
JM
1605 if (c1 == '.')
1606 {
0e5921e8
ZW
1607 int c2 = getch ();
1608 if (c2 == '.')
1609 return CPP_ELLIPSIS;
1610
1611 put_back (c2);
1612 error ("parse error at '..'");
fbb18613 1613 }
0e5921e8
ZW
1614 else if (c1 == '*' && c_language == clk_cplusplus)
1615 return CPP_DOT_STAR;
1616
1617 put_back (c1);
fbb18613 1618 if (ISDIGIT (c1))
0e5921e8 1619 goto number;
fbb18613 1620 }
0e5921e8 1621 return CPP_DOT;
d669f5da 1622
0e5921e8 1623 case '0': case '1': case '2': case '3': case '4':
e8bbfc4e 1624 case '5': case '6': case '7': case '8': case '9':
fbb18613 1625 number:
0e5921e8
ZW
1626 p = token_buffer;
1627 /* Scan the next preprocessing number. All C numeric constants
1628 are preprocessing numbers, but not all preprocessing numbers
1629 are valid numeric constants. Preprocessing numbers fit the
1630 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1631 See C99 section 6.4.8. */
1632 for (;;)
1633 {
1634 if (p >= token_buffer + maxtoken)
1635 p = extend_token_buffer (p);
cc144655 1636
0e5921e8
ZW
1637 *p++ = c;
1638 c = getch();
e8bbfc4e 1639
0e5921e8
ZW
1640 if (c == '+' || c == '-')
1641 {
1642 int d = p[-1];
1643 if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1644 continue;
1645 }
1646 if (ISALNUM (c) || c == '_' || c == '.')
1647 continue;
1648 break;
1649 }
1650 put_back (c);
e8bbfc4e 1651
0e5921e8
ZW
1652 *value = lex_number (token_buffer, p - token_buffer);
1653 return CPP_NUMBER;
e8bbfc4e 1654
0e5921e8
ZW
1655 case '\'':
1656 char_constant:
1657 charconst = 1;
e8bbfc4e 1658
0e5921e8
ZW
1659 case '"':
1660 string_constant:
1661 {
1662 int delimiter = charconst ? '\'' : '"';
1663#ifdef MULTIBYTE_CHARS
1664 int longest_char = local_mb_cur_max ();
1665 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1666#endif
1667 c = getch ();
1668 p = token_buffer + 1;
e8bbfc4e 1669
0e5921e8 1670 while (c != delimiter && c != EOF)
e8bbfc4e 1671 {
0e5921e8
ZW
1672 if (p + 2 > token_buffer + maxtoken)
1673 p = extend_token_buffer (p);
1674
1675 /* ignore_escape_flag is set for reading the filename in #line. */
1676 if (!ignore_escape_flag && c == '\\')
e8bbfc4e 1677 {
0e5921e8
ZW
1678 *p++ = c;
1679 *p++ = getch (); /* escaped character */
1680 c = getch ();
1681 continue;
e8bbfc4e
RK
1682 }
1683 else
1684 {
0e5921e8
ZW
1685#ifdef MULTIBYTE_CHARS
1686 int i;
1687 int char_len = -1;
1688 for (i = 0; i < longest_char; ++i)
e8bbfc4e 1689 {
0e5921e8
ZW
1690 if (p + i >= token_buffer + maxtoken)
1691 p = extend_token_buffer (p);
1692 p[i] = c;
e8bbfc4e 1693
0e5921e8
ZW
1694 char_len = local_mblen (p, i + 1);
1695 if (char_len != -1)
1696 break;
1697 c = getch ();
e8bbfc4e 1698 }
0e5921e8 1699 if (char_len == -1)
cc144655 1700 {
0e5921e8
ZW
1701 /* Replace all except the first byte. */
1702 put_back (c);
1703 for (--i; i > 0; --i)
1704 put_back (p[i]);
1705 char_len = 1;
cc144655 1706 }
0e5921e8
ZW
1707 /* mbtowc sometimes needs an extra char before accepting */
1708 else if (char_len <= i)
1709 put_back (c);
e8bbfc4e 1710
0e5921e8
ZW
1711 p += char_len;
1712#else
1713 *p++ = c;
1714#endif
1715 c = getch ();
e8bbfc4e
RK
1716 }
1717 }
0e5921e8 1718 }
e8bbfc4e 1719
0e5921e8
ZW
1720 if (charconst)
1721 {
1722 *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1723 wide_flag);
1724 return wide_flag ? CPP_WCHAR : CPP_CHAR;
1725 }
1726 else
1727 {
1728 *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1729 wide_flag);
1730 return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1731 }
e8bbfc4e 1732
0e5921e8
ZW
1733 case '+':
1734 case '-':
1735 case '&':
1736 case '|':
1737 case ':':
1738 case '<':
1739 case '>':
1740 case '*':
1741 case '/':
1742 case '%':
1743 case '^':
1744 case '!':
1745 case '=':
1746 {
1747 int c1;
1748 enum cpp_ttype type = CPP_EOF;
e8bbfc4e 1749
0e5921e8 1750 switch (c)
e8bbfc4e 1751 {
0e5921e8
ZW
1752 case '+': type = CPP_PLUS; break;
1753 case '-': type = CPP_MINUS; break;
1754 case '&': type = CPP_AND; break;
1755 case '|': type = CPP_OR; break;
1756 case ':': type = CPP_COLON; break;
1757 case '<': type = CPP_LESS; break;
1758 case '>': type = CPP_GREATER; break;
1759 case '*': type = CPP_MULT; break;
1760 case '/': type = CPP_DIV; break;
1761 case '%': type = CPP_MOD; break;
1762 case '^': type = CPP_XOR; break;
1763 case '!': type = CPP_NOT; break;
1764 case '=': type = CPP_EQ; break;
1765 }
e8bbfc4e 1766
0e5921e8 1767 c1 = getch ();
e8bbfc4e 1768
0e5921e8
ZW
1769 if (c1 == '=' && type < CPP_LAST_EQ)
1770 return type + (CPP_EQ_EQ - CPP_EQ);
1771 else if (c == c1)
1772 switch (c)
1773 {
1774 case '+': return CPP_PLUS_PLUS;
1775 case '-': return CPP_MINUS_MINUS;
1776 case '&': return CPP_AND_AND;
1777 case '|': return CPP_OR_OR;
1778 case ':':
1779 if (c_language == clk_cplusplus)
1780 return CPP_SCOPE;
1781 break;
e8bbfc4e 1782
0e5921e8
ZW
1783 case '<': type = CPP_LSHIFT; goto do_triad;
1784 case '>': type = CPP_RSHIFT; goto do_triad;
1785 }
1786 else
1787 switch (c)
1788 {
1789 case '-':
1790 if (c1 == '>')
1791 {
1792 if (c_language == clk_cplusplus)
1793 {
1794 c1 = getch ();
1795 if (c1 == '*')
1796 return CPP_DEREF_STAR;
1797 put_back (c1);
1798 }
1799 return CPP_DEREF;
1800 }
1801 break;
ed513abf 1802
0e5921e8
ZW
1803 case '>':
1804 if (c1 == '?' && c_language == clk_cplusplus)
1805 { type = CPP_MAX; goto do_triad; }
1806 break;
e26ceb28 1807
0e5921e8
ZW
1808 case '<':
1809 if (c1 == ':' && flag_digraphs)
1810 return CPP_OPEN_SQUARE;
1811 if (c1 == '%' && flag_digraphs)
1812 { indent_level++; return CPP_OPEN_BRACE; }
1813 if (c1 == '?' && c_language == clk_cplusplus)
1814 { type = CPP_MIN; goto do_triad; }
1815 break;
e8bbfc4e 1816
0e5921e8
ZW
1817 case ':':
1818 if (c1 == '>' && flag_digraphs)
1819 return CPP_CLOSE_SQUARE;
1820 break;
1821 case '%':
1822 if (c1 == '>' && flag_digraphs)
1823 { indent_level--; return CPP_CLOSE_BRACE; }
1824 break;
1825 }
a47a0ed5 1826
0e5921e8
ZW
1827 put_back (c1);
1828 return type;
e8bbfc4e 1829
0e5921e8
ZW
1830 do_triad:
1831 c1 = getch ();
1832 if (c1 == '=')
1833 type += (CPP_EQ_EQ - CPP_EQ);
1834 else
1835 put_back (c1);
1836 return type;
1837 }
e8bbfc4e 1838
0e5921e8
ZW
1839 case '~': return CPP_COMPL;
1840 case '?': return CPP_QUERY;
1841 case ',': return CPP_COMMA;
1842 case '(': return CPP_OPEN_PAREN;
1843 case ')': return CPP_CLOSE_PAREN;
1844 case '[': return CPP_OPEN_SQUARE;
1845 case ']': return CPP_CLOSE_SQUARE;
1846 case '{': indent_level++; return CPP_OPEN_BRACE;
1847 case '}': indent_level--; return CPP_CLOSE_BRACE;
1848 case ';': return CPP_SEMICOLON;
1849
1850 straychar:
1851 default:
1852 if (ISGRAPH (c))
1853 error ("stray '%c' in program", c);
1854 else
1855 error ("stray '\\%#o' in program", c);
1856 goto retry;
1857 }
1858 /* NOTREACHED */
1859#endif
1860}
8d9bfdc5 1861
8d9bfdc5 1862
0e5921e8 1863#define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
75cb8865 1864
0e5921e8
ZW
1865static tree
1866lex_number (str, len)
1867 const char *str;
1868 unsigned int len;
1869{
1870 int base = 10;
1871 int count = 0;
1872 int largest_digit = 0;
1873 int numdigits = 0;
1874 int overflow = 0;
1875 int c;
1876 tree value;
1877 const char *p;
1878 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1879
1880 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1881 The code below which fills the parts array assumes that a host
1882 int is at least twice as wide as a host char, and that
1883 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1884 Two HOST_WIDE_INTs is the largest int literal we can store.
1885 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1886 must be exactly the number of parts needed to hold the bits
1887 of two HOST_WIDE_INTs. */
1888#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1889 unsigned int parts[TOTAL_PARTS];
1890
1891 /* Optimize for most frequent case. */
1892 if (len == 1)
1893 {
1894 if (*str == '0')
1895 return integer_zero_node;
1896 else if (*str == '1')
1897 return integer_one_node;
1898 else
1899 return build_int_2 (*str - '0', 0);
1900 }
e8bbfc4e 1901
0e5921e8
ZW
1902 for (count = 0; count < TOTAL_PARTS; count++)
1903 parts[count] = 0;
e8bbfc4e 1904
0e5921e8
ZW
1905 /* len is known to be >1 at this point. */
1906 p = str;
e8bbfc4e 1907
0e5921e8
ZW
1908 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1909 {
1910 base = 16;
1911 p = str + 2;
1912 }
1913 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1914 else if (str[0] == '0' && ISDIGIT (str[1]))
1915 {
1916 base = 8;
1917 p = str + 1;
1918 }
e8bbfc4e 1919
0e5921e8
ZW
1920 do
1921 {
1922 c = *p++;
88d92ca5 1923
0e5921e8
ZW
1924 if (c == '.')
1925 {
1926 if (base == 16 && pedantic && !flag_isoc99)
1927 pedwarn ("floating constant may not be in radix 16");
1928 else if (floatflag == AFTER_POINT)
1929 ERROR ("too many decimal points in floating constant");
1930 else if (floatflag == AFTER_EXPON)
1931 ERROR ("decimal point in exponent - impossible!");
1932 else
1933 floatflag = AFTER_POINT;
c832a30e 1934
0e5921e8
ZW
1935 if (base == 8)
1936 base = 10;
1937 }
1938 else if (c == '_')
1939 /* Possible future extension: silently ignore _ in numbers,
1940 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1941 but somewhat easier to read. Ada has this? */
1942 ERROR ("underscore in number");
1943 else
1944 {
1945 int n;
1946 /* It is not a decimal point.
1947 It should be a digit (perhaps a hex digit). */
0e0fda0d 1948
0e5921e8
ZW
1949 if (ISDIGIT (c))
1950 {
1951 n = c - '0';
1952 }
1953 else if (base <= 10 && (c == 'e' || c == 'E'))
1954 {
1955 base = 10;
1956 floatflag = AFTER_EXPON;
1957 break;
1958 }
1959 else if (base == 16 && (c == 'p' || c == 'P'))
1960 {
1961 floatflag = AFTER_EXPON;
1962 break; /* start of exponent */
1963 }
1964 else if (base == 16 && c >= 'a' && c <= 'f')
1965 {
1966 n = c - 'a' + 10;
1967 }
1968 else if (base == 16 && c >= 'A' && c <= 'F')
1969 {
1970 n = c - 'A' + 10;
1971 }
1972 else
1973 {
1974 p--;
1975 break; /* start of suffix */
1976 }
0e0fda0d 1977
0e5921e8
ZW
1978 if (n >= largest_digit)
1979 largest_digit = n;
1980 numdigits++;
0e0fda0d 1981
0e5921e8
ZW
1982 for (count = 0; count < TOTAL_PARTS; count++)
1983 {
1984 parts[count] *= base;
1985 if (count)
1986 {
1987 parts[count]
1988 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1989 parts[count-1]
1990 &= (1 << HOST_BITS_PER_CHAR) - 1;
1991 }
1992 else
1993 parts[0] += n;
1994 }
e8bbfc4e 1995
0e5921e8
ZW
1996 /* If the highest-order part overflows (gets larger than
1997 a host char will hold) then the whole number has
1998 overflowed. Record this and truncate the highest-order
1999 part. */
2000 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
2001 {
2002 overflow = 1;
2003 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
2004 }
2005 }
2006 }
2007 while (p < str + len);
91b0989e 2008
0e5921e8
ZW
2009 /* This can happen on input like `int i = 0x;' */
2010 if (numdigits == 0)
2011 ERROR ("numeric constant with no digits");
91b0989e 2012
0e5921e8
ZW
2013 if (largest_digit >= base)
2014 ERROR ("numeric constant contains digits beyond the radix");
e8bbfc4e 2015
0e5921e8
ZW
2016 if (floatflag != NOT_FLOAT)
2017 {
2018 tree type;
2019 int imag, fflag, lflag, conversion_errno;
2020 REAL_VALUE_TYPE real;
2021 struct pf_args args;
2022 char *copy;
2023
2024 if (base == 16 && floatflag != AFTER_EXPON)
2025 ERROR ("hexadecimal floating constant has no exponent");
2026
2027 /* Read explicit exponent if any, and put it in tokenbuf. */
2028 if ((base == 10 && ((c == 'e') || (c == 'E')))
2029 || (base == 16 && (c == 'p' || c == 'P')))
2030 {
2031 if (p < str + len)
2032 c = *p++;
2033 if (p < str + len && (c == '+' || c == '-'))
2034 c = *p++;
2035 /* Exponent is decimal, even if string is a hex float. */
2036 if (! ISDIGIT (c))
2037 ERROR ("floating constant exponent has no digits");
2038 while (p < str + len && ISDIGIT (c))
2039 c = *p++;
2040 if (! ISDIGIT (c))
2041 p--;
2042 }
56f48ce9 2043
0e5921e8
ZW
2044 /* Copy the float constant now; we don't want any suffixes in the
2045 string passed to parse_float. */
2046 copy = alloca (p - str + 1);
2047 memcpy (copy, str, p - str);
2048 copy[p - str] = '\0';
e8bbfc4e 2049
0e5921e8
ZW
2050 /* Now parse suffixes. */
2051 fflag = lflag = imag = 0;
2052 while (p < str + len)
2053 switch (*p++)
e8bbfc4e 2054 {
0e5921e8
ZW
2055 case 'f': case 'F':
2056 if (fflag)
2057 ERROR ("more than one 'f' suffix on floating constant");
2058 else if (warn_traditional && !in_system_header)
2059 warning ("traditional C rejects the 'f' suffix");
e8bbfc4e 2060
0e5921e8
ZW
2061 fflag = 1;
2062 break;
e8bbfc4e 2063
0e5921e8
ZW
2064 case 'l': case 'L':
2065 if (lflag)
2066 ERROR ("more than one 'l' suffix on floating constant");
2067 else if (warn_traditional && !in_system_header)
2068 warning ("traditional C rejects the 'l' suffix");
56f48ce9 2069
0e5921e8
ZW
2070 lflag = 1;
2071 break;
e8bbfc4e 2072
0e5921e8
ZW
2073 case 'i': case 'I':
2074 case 'j': case 'J':
2075 if (imag)
2076 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2077 else if (pedantic)
2078 pedwarn ("ISO C forbids imaginary numeric constants");
2079 imag = 1;
2080 break;
e8bbfc4e 2081
0e5921e8
ZW
2082 default:
2083 ERROR ("invalid suffix on floating constant");
e8bbfc4e
RK
2084 }
2085
0e5921e8
ZW
2086 /* Setup input for parse_float() */
2087 args.str = copy;
2088 args.fflag = fflag;
2089 args.lflag = lflag;
2090 args.base = base;
e8bbfc4e 2091
0e5921e8
ZW
2092 /* Convert string to a double, checking for overflow. */
2093 if (do_float_handler (parse_float, (PTR) &args))
2094 {
2095 /* Receive output from parse_float() */
2096 real = args.value;
2097 }
2098 else
2099 /* We got an exception from parse_float() */
2100 ERROR ("floating constant out of range");
e8bbfc4e 2101
0e5921e8
ZW
2102 /* Receive output from parse_float() */
2103 conversion_errno = args.conversion_errno;
2104 type = args.type;
2105
2106#ifdef ERANGE
2107 /* ERANGE is also reported for underflow,
2108 so test the value to distinguish overflow from that. */
2109 if (conversion_errno == ERANGE && !flag_traditional && pedantic
2110 && (REAL_VALUES_LESS (dconst1, real)
2111 || REAL_VALUES_LESS (real, dconstm1)))
2112 warning ("floating point number exceeds range of 'double'");
56f48ce9 2113#endif
e8bbfc4e 2114
0e5921e8
ZW
2115 /* Create a node with determined type and value. */
2116 if (imag)
2117 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2118 build_real (type, real));
2119 else
2120 value = build_real (type, real);
2121 }
2122 else
2123 {
2124 tree trad_type, ansi_type, type;
2125 HOST_WIDE_INT high, low;
2126 int spec_unsigned = 0;
2127 int spec_long = 0;
2128 int spec_long_long = 0;
2129 int spec_imag = 0;
2130 int suffix_lu = 0;
2131 int warn = 0, i;
2132
2133 trad_type = ansi_type = type = NULL_TREE;
2134 while (p < str + len)
2135 {
2136 c = *p++;
2137 switch (c)
2138 {
2139 case 'u': case 'U':
2140 if (spec_unsigned)
2141 error ("two 'u' suffixes on integer constant");
2142 else if (warn_traditional && !in_system_header)
2143 warning ("traditional C rejects the 'u' suffix");
2144
2145 spec_unsigned = 1;
2146 if (spec_long)
2147 suffix_lu = 1;
2148 break;
e8bbfc4e 2149
0e5921e8
ZW
2150 case 'l': case 'L':
2151 if (spec_long)
2152 {
2153 if (spec_long_long)
2154 error ("three 'l' suffixes on integer constant");
2155 else if (suffix_lu)
2156 error ("'lul' is not a valid integer suffix");
2157 else if (c != spec_long)
2158 error ("'Ll' and 'lL' are not valid integer suffixes");
2159 else if (pedantic && ! flag_isoc99
2160 && ! in_system_header && warn_long_long)
2161 pedwarn ("ISO C89 forbids long long integer constants");
2162 spec_long_long = 1;
2163 }
2164 spec_long = c;
2165 break;
56f48ce9 2166
0e5921e8
ZW
2167 case 'i': case 'I': case 'j': case 'J':
2168 if (spec_imag)
2169 error ("more than one 'i' or 'j' suffix on integer constant");
2170 else if (pedantic)
2171 pedwarn ("ISO C forbids imaginary numeric constants");
2172 spec_imag = 1;
2173 break;
56f48ce9 2174
0e5921e8
ZW
2175 default:
2176 ERROR ("invalid suffix on integer constant");
2177 }
2178 }
56f48ce9 2179
0e5921e8
ZW
2180 /* If the literal overflowed, pedwarn about it now. */
2181 if (overflow)
2182 {
2183 warn = 1;
2184 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2185 }
e8bbfc4e 2186
0e5921e8
ZW
2187 /* This is simplified by the fact that our constant
2188 is always positive. */
56f48ce9 2189
0e5921e8 2190 high = low = 0;
e8bbfc4e 2191
0e5921e8
ZW
2192 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2193 {
2194 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2195 / HOST_BITS_PER_CHAR)]
2196 << (i * HOST_BITS_PER_CHAR));
2197 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2198 }
0468bc75 2199
0e5921e8
ZW
2200 value = build_int_2 (low, high);
2201 TREE_TYPE (value) = long_long_unsigned_type_node;
e8bbfc4e 2202
0e5921e8
ZW
2203 /* If warn_traditional, calculate both the ISO type and the
2204 traditional type, then see if they disagree.
2205 Otherwise, calculate only the type for the dialect in use. */
2206 if (warn_traditional || flag_traditional)
2207 {
2208 /* Calculate the traditional type. */
2209 /* Traditionally, any constant is signed; but if unsigned is
2210 specified explicitly, obey that. Use the smallest size
2211 with the right number of bits, except for one special
2212 case with decimal constants. */
2213 if (! spec_long && base != 10
2214 && int_fits_type_p (value, unsigned_type_node))
2215 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2216 /* A decimal constant must be long if it does not fit in
2217 type int. I think this is independent of whether the
2218 constant is signed. */
2219 else if (! spec_long && base == 10
2220 && int_fits_type_p (value, integer_type_node))
2221 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2222 else if (! spec_long_long)
2223 trad_type = (spec_unsigned
2224 ? long_unsigned_type_node
2225 : long_integer_type_node);
2226 else if (int_fits_type_p (value,
2227 spec_unsigned
2228 ? long_long_unsigned_type_node
2229 : long_long_integer_type_node))
2230 trad_type = (spec_unsigned
2231 ? long_long_unsigned_type_node
2232 : long_long_integer_type_node);
2233 else
2234 trad_type = (spec_unsigned
2235 ? widest_unsigned_literal_type_node
2236 : widest_integer_literal_type_node);
2237 }
2238 if (warn_traditional || ! flag_traditional)
2239 {
2240 /* Calculate the ISO type. */
2241 if (! spec_long && ! spec_unsigned
2242 && int_fits_type_p (value, integer_type_node))
2243 ansi_type = integer_type_node;
2244 else if (! spec_long && (base != 10 || spec_unsigned)
2245 && int_fits_type_p (value, unsigned_type_node))
2246 ansi_type = unsigned_type_node;
2247 else if (! spec_unsigned && !spec_long_long
2248 && int_fits_type_p (value, long_integer_type_node))
2249 ansi_type = long_integer_type_node;
2250 else if (! spec_long_long
2251 && int_fits_type_p (value, long_unsigned_type_node))
2252 ansi_type = long_unsigned_type_node;
2253 else if (! spec_unsigned
2254 && int_fits_type_p (value, long_long_integer_type_node))
2255 ansi_type = long_long_integer_type_node;
2256 else if (int_fits_type_p (value, long_long_unsigned_type_node))
2257 ansi_type = long_long_unsigned_type_node;
2258 else if (! spec_unsigned
2259 && int_fits_type_p (value, widest_integer_literal_type_node))
2260 ansi_type = widest_integer_literal_type_node;
2261 else
2262 ansi_type = widest_unsigned_literal_type_node;
2263 }
e8bbfc4e 2264
0e5921e8 2265 type = flag_traditional ? trad_type : ansi_type;
e8bbfc4e 2266
0e5921e8
ZW
2267 /* We assume that constants specified in a non-decimal
2268 base are bit patterns, and that the programmer really
2269 meant what they wrote. */
2270 if (warn_traditional && !in_system_header
2271 && base == 10 && trad_type != ansi_type)
2272 {
2273 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2274 warning ("width of integer constant changes with -traditional");
2275 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2276 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2277 else
2278 warning ("width of integer constant may change on other systems with -traditional");
2279 }
e8bbfc4e 2280
0e5921e8
ZW
2281 if (pedantic && !flag_traditional && !spec_long_long && !warn
2282 && (TYPE_PRECISION (long_integer_type_node) < TYPE_PRECISION (type)))
2283 {
2284 warn = 1;
2285 pedwarn ("integer constant larger than the maximum value of an unsigned long int");
2286 }
e8bbfc4e 2287
0e5921e8
ZW
2288 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2289 warning ("decimal constant is so large that it is unsigned");
e8bbfc4e 2290
0e5921e8
ZW
2291 if (spec_imag)
2292 {
2293 if (TYPE_PRECISION (type)
2294 <= TYPE_PRECISION (integer_type_node))
2295 value = build_complex (NULL_TREE, integer_zero_node,
2296 convert (integer_type_node, value));
2297 else
2298 ERROR ("complex integer constant is too wide for 'complex int'");
2299 }
2300 else if (flag_traditional && !int_fits_type_p (value, type))
2301 /* The traditional constant 0x80000000 is signed
2302 but doesn't fit in the range of int.
2303 This will change it to -0x80000000, which does fit. */
2304 {
2305 TREE_TYPE (value) = unsigned_type (type);
2306 value = convert (type, value);
2307 TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2308 }
2309 else
2310 TREE_TYPE (value) = type;
e8bbfc4e 2311
0e5921e8
ZW
2312 /* If it's still an integer (not a complex), and it doesn't
2313 fit in the type we choose for it, then pedwarn. */
fbb18613 2314
0e5921e8
ZW
2315 if (! warn
2316 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2317 && ! int_fits_type_p (value, TREE_TYPE (value)))
2318 pedwarn ("integer constant is larger than the maximum value for its type");
2319 }
fbb18613 2320
0e5921e8
ZW
2321 if (p < str + len)
2322 error ("missing white space after number '%.*s'", (int) (p - str), str);
e8bbfc4e 2323
0e5921e8 2324 return value;
e8bbfc4e 2325
0e5921e8
ZW
2326 syntax_error:
2327 return integer_zero_node;
2328}
e8bbfc4e 2329
0e5921e8
ZW
2330static tree
2331lex_string (str, len, wide)
2332 const char *str;
2333 unsigned int len;
2334 int wide;
2335{
2336 tree value;
2337 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2338 char *q = buf;
2339 const char *p = str, *limit = str + len;
2340 unsigned int c;
2341 unsigned width = wide ? WCHAR_TYPE_SIZE
2342 : TYPE_PRECISION (char_type_node);
e9a25f70 2343
0e5921e8
ZW
2344#ifdef MULTIBYTE_CHARS
2345 /* Reset multibyte conversion state. */
2346 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2347#endif
e9a25f70 2348
0e5921e8
ZW
2349 while (p < limit)
2350 {
2351#ifdef MULTIBYTE_CHARS
2352 wchar_t wc;
2353 int char_len;
2354
2355 char_len = local_mbtowc (&wc, p, limit - p);
2356 if (char_len == -1)
2357 {
2358 warning ("Ignoring invalid multibyte character");
2359 char_len = 1;
2360 c = *p++;
2361 }
2362 else
2363 {
2364 p += char_len;
2365 c = wc;
2366 }
2367#else
2368 c = *p++;
2369#endif
2370
2371 if (c == '\\' && !ignore_escape_flag)
2372 {
2373 p = readescape (p, limit, &c);
2374 if (width < HOST_BITS_PER_INT
2375 && (unsigned) c >= ((unsigned)1 << width))
2376 pedwarn ("escape sequence out of range for character");
2377 }
2378
2379 /* Add this single character into the buffer either as a wchar_t
2380 or as a single byte. */
2381 if (wide)
2382 {
2383 unsigned charwidth = TYPE_PRECISION (char_type_node);
2384 unsigned bytemask = (1 << width) - 1;
2385 int byte;
2386
2387 for (byte = 0; byte < WCHAR_BYTES; ++byte)
2388 {
2389 int n;
2390 if (byte >= (int) sizeof (c))
2391 n = 0;
2392 else
2393 n = (c >> (byte * charwidth)) & bytemask;
2394 if (BYTES_BIG_ENDIAN)
2395 q[WCHAR_BYTES - byte - 1] = n;
2396 else
2397 q[byte] = n;
2398 }
2399 q += WCHAR_BYTES;
2400 }
2401 else
2402 {
2403 *q++ = c;
2404 }
e8bbfc4e
RK
2405 }
2406
0e5921e8
ZW
2407 /* Terminate the string value, either with a single byte zero
2408 or with a wide zero. */
e8bbfc4e 2409
0e5921e8
ZW
2410 if (wide)
2411 {
2412 memset (q, 0, WCHAR_BYTES);
2413 q += WCHAR_BYTES;
2414 }
2415 else
2416 {
2417 *q++ = '\0';
2418 }
2419
2420 value = build_string (q - buf, buf);
2421
2422 if (wide)
2423 TREE_TYPE (value) = wchar_array_type_node;
2424 else
2425 TREE_TYPE (value) = char_array_type_node;
e8bbfc4e
RK
2426 return value;
2427}
2428
0e5921e8
ZW
2429static tree
2430lex_charconst (str, len, wide)
2431 const char *str;
2432 unsigned int len;
2433 int wide;
e8bbfc4e 2434{
0e5921e8
ZW
2435 const char *limit = str + len;
2436 int result = 0;
2437 int num_chars = 0;
2438 int chars_seen = 0;
2439 unsigned width = TYPE_PRECISION (char_type_node);
2440 int max_chars;
2441 unsigned int c;
2442 tree value;
2443
2444#ifdef MULTIBYTE_CHARS
2445 int longest_char = local_mb_cur_max ();
2446 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2447#endif
2448
2449 max_chars = TYPE_PRECISION (integer_type_node) / width;
2450 if (wide)
2451 width = WCHAR_TYPE_SIZE;
2452
2453 while (str < limit)
2454 {
2455#ifdef MULTIBYTE_CHARS
2456 wchar_t wc;
2457 int char_len;
2458
2459 char_len = local_mbtowc (&wc, str, limit - str);
2460 if (char_len == -1)
2461 {
2462 warning ("Ignoring invalid multibyte character");
2463 char_len = 1;
2464 c = *str++;
2465 }
2466 else
2467 {
2468 p += char_len;
2469 c = wc;
2470 }
e8bbfc4e 2471#else
0e5921e8
ZW
2472 c = *str++;
2473#endif
2474
2475 ++chars_seen;
2476 if (c == '\\')
2477 {
2478 str = readescape (str, limit, &c);
2479 if (width < HOST_BITS_PER_INT
2480 && (unsigned) c >= ((unsigned)1 << width))
2481 pedwarn ("escape sequence out of range for character");
2482 }
2483#ifdef MAP_CHARACTER
2484 if (ISPRINT (c))
2485 c = MAP_CHARACTER (c);
e8bbfc4e 2486#endif
0e5921e8
ZW
2487
2488 /* Merge character into result; ignore excess chars. */
2489 num_chars += (width / TYPE_PRECISION (char_type_node));
2490 if (num_chars < max_chars + 1)
2491 {
2492 if (width < HOST_BITS_PER_INT)
2493 result = (result << width) | (c & ((1 << width) - 1));
2494 else
2495 result = c;
2496 }
2497 }
2498
2499 if (chars_seen == 0)
2500 error ("empty character constant");
2501 else if (num_chars > max_chars)
2502 {
2503 num_chars = max_chars;
2504 error ("character constant too long");
2505 }
2506 else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2507 warning ("multi-character character constant");
2508
2509 /* If char type is signed, sign-extend the constant. */
2510 if (! wide)
2511 {
2512 int num_bits = num_chars * width;
2513 if (num_bits == 0)
2514 /* We already got an error; avoid invalid shift. */
2515 value = build_int_2 (0, 0);
2516 else if (TREE_UNSIGNED (char_type_node)
2517 || ((result >> (num_bits - 1)) & 1) == 0)
2518 value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2519 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2520 0);
2521 else
2522 value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2523 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2524 -1);
2525 /* In C, a character constant has type 'int'; in C++, 'char'. */
2526 if (chars_seen <= 1 && c_language == clk_cplusplus)
2527 TREE_TYPE (value) = char_type_node;
2528 else
2529 TREE_TYPE (value) = integer_type_node;
2530 }
2531 else
2532 {
2533 value = build_int_2 (result, 0);
2534 TREE_TYPE (value) = wchar_type_node;
2535 }
2536
2537 return value;
e8bbfc4e 2538}
This page took 1.022706 seconds and 5 git commands to generate.