]> gcc.gnu.org Git - gcc.git/blob - gcc/c-lex.c
c-lex.c (process_directive): If not HANDLE_GENERIC_PRAGMAS, do not call dispatch_pragma.
[gcc.git] / gcc / c-lex.c
1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24
25 #include "rtl.h"
26 #include "tree.h"
27 #include "input.h"
28 #include "output.h"
29 #include "c-lex.h"
30 #include "c-tree.h"
31 #include "flags.h"
32 #include "timevar.h"
33 #include "cpplib.h"
34 #include "c-pragma.h"
35 #include "toplev.h"
36 #include "intl.h"
37 #include "ggc.h"
38 #include "tm_p.h"
39 #include "splay-tree.h"
40
41 /* MULTIBYTE_CHARS support only works for native compilers.
42 ??? Ideally what we want is to model widechar support after
43 the current floating point support. */
44 #ifdef CROSS_COMPILE
45 #undef MULTIBYTE_CHARS
46 #endif
47
48 #ifdef MULTIBYTE_CHARS
49 #include "mbchar.h"
50 #include <locale.h>
51 #endif /* MULTIBYTE_CHARS */
52 #ifndef GET_ENVIRONMENT
53 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
54 #endif
55
56 #if USE_CPPLIB
57 extern cpp_reader parse_in;
58 #else
59 /* Stream for reading from the input file. */
60 FILE *finput;
61 #endif
62
63 /* Private idea of the line number. See discussion in c_lex(). */
64 static int lex_lineno;
65
66 /* We may keep statistics about how long which files took to compile. */
67 static int header_time, body_time;
68 static splay_tree file_info_tree;
69
70 /* Cause the `yydebug' variable to be defined. */
71 #define YYDEBUG 1
72
73 #if !USE_CPPLIB
74
75 struct putback_buffer
76 {
77 unsigned char *buffer;
78 int buffer_size;
79 int index;
80 };
81
82 static struct putback_buffer putback = {NULL, 0, -1};
83
84 static inline int getch PARAMS ((void));
85
86 static inline int
87 getch ()
88 {
89 if (putback.index != -1)
90 {
91 int ch = putback.buffer[putback.index];
92 --putback.index;
93 return ch;
94 }
95 return getc (finput);
96 }
97
98 static inline void put_back PARAMS ((int));
99
100 static inline void
101 put_back (ch)
102 int ch;
103 {
104 if (ch != EOF)
105 {
106 if (putback.index == putback.buffer_size - 1)
107 {
108 putback.buffer_size += 16;
109 putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
110 }
111 putback.buffer[++putback.index] = ch;
112 }
113 }
114
115 int linemode;
116
117 #endif
118
119 /* File used for outputting assembler code. */
120 extern FILE *asm_out_file;
121
122 #undef WCHAR_TYPE_SIZE
123 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
124
125 /* Number of bytes in a wide character. */
126 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
127
128 #if !USE_CPPLIB
129 static int maxtoken; /* Current nominal length of token buffer. */
130 static char *token_buffer; /* Pointer to token buffer.
131 Actual allocated length is maxtoken + 2. */
132 #endif
133
134 int indent_level; /* Number of { minus number of }. */
135 int pending_lang_change; /* If we need to switch languages - C++ only */
136 int c_header_level; /* depth in C headers - C++ only */
137
138 /* Nonzero tells yylex to ignore \ in string constants. */
139 static int ignore_escape_flag;
140
141 static const char *readescape PARAMS ((const char *, const char *,
142 unsigned int *));
143 static const char *read_ucs PARAMS ((const char *, const char *,
144 unsigned int *, int));
145 static void parse_float PARAMS ((PTR));
146 static tree lex_number PARAMS ((const char *, unsigned int));
147 static tree lex_string PARAMS ((const char *, unsigned int, int));
148 static tree lex_charconst PARAMS ((const char *, unsigned int, int));
149 static void update_header_times PARAMS ((const char *));
150 static int dump_one_header PARAMS ((splay_tree_node, void *));
151
152 #if !USE_CPPLIB
153 static int skip_white_space PARAMS ((int));
154 static char *extend_token_buffer PARAMS ((const char *));
155 static void extend_token_buffer_to PARAMS ((int));
156 static int read_line_number PARAMS ((int *));
157 static void process_directive PARAMS ((void));
158 #else
159 static void cb_ident PARAMS ((cpp_reader *, const unsigned char *,
160 unsigned int));
161 static void cb_enter_file PARAMS ((cpp_reader *));
162 static void cb_leave_file PARAMS ((cpp_reader *));
163 static void cb_rename_file PARAMS ((cpp_reader *));
164 static void cb_def_pragma PARAMS ((cpp_reader *));
165 #endif
166
167 \f
168 const char *
169 init_c_lex (filename)
170 const char *filename;
171 {
172 struct c_fileinfo *toplevel;
173
174 /* Set up filename timing. Must happen before cpp_start_read. */
175 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
176 0,
177 (splay_tree_delete_value_fn)free);
178 toplevel = get_fileinfo ("<top level>");
179 if (flag_detailed_statistics)
180 {
181 header_time = 0;
182 body_time = get_run_time ();
183 toplevel->time = body_time;
184 }
185
186 #ifdef MULTIBYTE_CHARS
187 /* Change to the native locale for multibyte conversions. */
188 setlocale (LC_CTYPE, "");
189 GET_ENVIRONMENT (literal_codeset, "LANG");
190 #endif
191
192 #if !USE_CPPLIB
193 /* Open input file. */
194 if (filename == 0 || !strcmp (filename, "-"))
195 {
196 finput = stdin;
197 filename = "stdin";
198 }
199 else
200 finput = fopen (filename, "r");
201 if (finput == 0)
202 pfatal_with_name (filename);
203
204 #ifdef IO_BUFFER_SIZE
205 setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
206 #endif
207 #else /* !USE_CPPLIB */
208
209 parse_in.cb.ident = cb_ident;
210 parse_in.cb.enter_file = cb_enter_file;
211 parse_in.cb.leave_file = cb_leave_file;
212 parse_in.cb.rename_file = cb_rename_file;
213 parse_in.cb.def_pragma = cb_def_pragma;
214
215 /* Make sure parse_in.digraphs matches flag_digraphs. */
216 CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
217
218 if (! cpp_start_read (&parse_in, 0 /* no printer */, filename))
219 abort ();
220
221 if (filename == 0 || !strcmp (filename, "-"))
222 filename = "stdin";
223 #endif
224
225 #if !USE_CPPLIB
226 maxtoken = 40;
227 token_buffer = (char *) xmalloc (maxtoken + 2);
228 #endif
229 /* Start it at 0, because check_newline is called at the very beginning
230 and will increment it to 1. */
231 lineno = lex_lineno = 0;
232
233 return filename;
234 }
235
236 struct c_fileinfo *
237 get_fileinfo (name)
238 const char *name;
239 {
240 splay_tree_node n;
241 struct c_fileinfo *fi;
242
243 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
244 if (n)
245 return (struct c_fileinfo *) n->value;
246
247 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
248 fi->time = 0;
249 fi->interface_only = 0;
250 fi->interface_unknown = 1;
251 splay_tree_insert (file_info_tree, (splay_tree_key) name,
252 (splay_tree_value) fi);
253 return fi;
254 }
255
256 static void
257 update_header_times (name)
258 const char *name;
259 {
260 /* Changing files again. This means currently collected time
261 is charged against header time, and body time starts back at 0. */
262 if (flag_detailed_statistics)
263 {
264 int this_time = get_run_time ();
265 struct c_fileinfo *file = get_fileinfo (name);
266 header_time += this_time - body_time;
267 file->time += this_time - body_time;
268 body_time = this_time;
269 }
270 }
271
272 static int
273 dump_one_header (n, dummy)
274 splay_tree_node n;
275 void *dummy ATTRIBUTE_UNUSED;
276 {
277 print_time ((const char *) n->key,
278 ((struct c_fileinfo *) n->value)->time);
279 return 0;
280 }
281
282 void
283 dump_time_statistics ()
284 {
285 struct c_fileinfo *file = get_fileinfo (input_filename);
286 int this_time = get_run_time ();
287 file->time += this_time - body_time;
288
289 fprintf (stderr, "\n******\n");
290 print_time ("header files (total)", header_time);
291 print_time ("main file (total)", this_time - body_time);
292 fprintf (stderr, "ratio = %g : 1\n",
293 (double)header_time / (double)(this_time - body_time));
294 fprintf (stderr, "\n******\n");
295
296 splay_tree_foreach (file_info_tree, dump_one_header, 0);
297 }
298
299 #if !USE_CPPLIB
300
301 /* If C is not whitespace, return C.
302 Otherwise skip whitespace and return first nonwhite char read. */
303
304 static int
305 skip_white_space (c)
306 register int c;
307 {
308 for (;;)
309 {
310 switch (c)
311 {
312 /* There is no need to process comments, backslash-newline,
313 or \r here. None can occur in the output of cpp. */
314
315 case '\n':
316 if (linemode)
317 {
318 put_back (c);
319 return EOF;
320 }
321 c = check_newline ();
322 break;
323
324 /* Per C99, horizontal whitespace is just these four characters. */
325 case ' ':
326 case '\t':
327 case '\f':
328 case '\v':
329 c = getch ();
330 break;
331
332 case '\\':
333 error ("stray '\\' in program");
334 c = getch ();
335 break;
336
337 default:
338 return (c);
339 }
340 }
341 }
342
343 /* Skips all of the white space at the current location in the input file. */
344
345 void
346 position_after_white_space ()
347 {
348 register int c;
349
350 c = getch ();
351
352 put_back (skip_white_space (c));
353 }
354
355 /* Make the token buffer longer, preserving the data in it.
356 P should point to just beyond the last valid character in the old buffer.
357 The value we return is a pointer to the new buffer
358 at a place corresponding to P. */
359
360 static void
361 extend_token_buffer_to (size)
362 int size;
363 {
364 do
365 maxtoken = maxtoken * 2 + 10;
366 while (maxtoken < size);
367 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
368 }
369
370 static char *
371 extend_token_buffer (p)
372 const char *p;
373 {
374 int offset = p - token_buffer;
375 extend_token_buffer_to (offset);
376 return token_buffer + offset;
377 }
378 \f
379
380 static int
381 read_line_number (num)
382 int *num;
383 {
384 tree value;
385 enum cpp_ttype token = c_lex (&value);
386
387 if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
388 {
389 *num = TREE_INT_CST_LOW (value);
390 return 1;
391 }
392 else
393 {
394 if (token != CPP_EOF)
395 error ("invalid #-line");
396 return 0;
397 }
398 }
399
400 /* At the beginning of a line, increment the line number
401 and process any #-directive on this line.
402 If the line is a #-directive, read the entire line and return a newline.
403 Otherwise, return the line's first non-whitespace character. */
404
405 int
406 check_newline ()
407 {
408 register int c;
409
410 /* Loop till we get a nonblank, non-directive line. */
411 for (;;)
412 {
413 /* Read first nonwhite char on the line. */
414 do
415 c = getch ();
416 while (c == ' ' || c == '\t');
417
418 lex_lineno++;
419 if (c == '#')
420 {
421 process_directive ();
422 return '\n';
423 }
424
425 else if (c != '\n')
426 break;
427 }
428 return c;
429 }
430
431 static void
432 process_directive ()
433 {
434 enum cpp_ttype token;
435 tree value;
436 int saw_line;
437 enum { act_none, act_push, act_pop } action;
438 int action_number, l;
439 char *new_file;
440 #ifndef NO_IMPLICIT_EXTERN_C
441 int entering_c_header;
442 #endif
443
444 /* Don't read beyond this line. */
445 saw_line = 0;
446 linemode = 1;
447
448 token = c_lex (&value);
449
450 if (token == CPP_NAME)
451 {
452 /* If a letter follows, then if the word here is `line', skip
453 it and ignore it; otherwise, ignore the line, with an error
454 if the word isn't `pragma'. */
455
456 const char *name = IDENTIFIER_POINTER (value);
457
458 if (!strcmp (name, "pragma"))
459 {
460 #ifdef HANDLE_GENERIC_PRAGMAS
461 dispatch_pragma ();
462 #endif
463 goto skipline;
464 }
465 else if (!strcmp (name, "define"))
466 {
467 debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
468 goto skipline;
469 }
470 else if (!strcmp (name, "undef"))
471 {
472 debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
473 goto skipline;
474 }
475 else if (!strcmp (name, "line"))
476 {
477 saw_line = 1;
478 token = c_lex (&value);
479 goto linenum;
480 }
481 else if (!strcmp (name, "ident"))
482 {
483 /* #ident. We expect a string constant here.
484 The pedantic warning and syntax error are now in cpp. */
485
486 token = c_lex (&value);
487 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
488 goto skipline;
489
490 #ifdef ASM_OUTPUT_IDENT
491 if (! flag_no_ident)
492 {
493 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
494 }
495 #endif
496
497 /* Skip the rest of this line. */
498 goto skipline;
499 }
500
501 error ("undefined or invalid # directive `%s'", name);
502 goto skipline;
503 }
504
505 /* If the # is the only nonwhite char on the line,
506 just ignore it. Check the new newline. */
507 if (token == CPP_EOF)
508 goto skipline;
509
510 linenum:
511 /* Here we have either `#line' or `# <nonletter>'.
512 In either case, it should be a line number; a digit should follow. */
513
514 if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
515 {
516 error ("invalid #-line");
517 goto skipline;
518 }
519
520 /* subtract one, because it is the following line that
521 gets the specified number */
522
523 l = TREE_INT_CST_LOW (value) - 1;
524
525 /* More follows: it must be a string constant (filename).
526 It would be neat to use cpplib to quickly process the string, but
527 (1) we don't have a handy tokenization of the string, and
528 (2) I don't know how well that would work in the presense
529 of filenames that contain wide characters. */
530
531 if (saw_line)
532 {
533 /* Don't treat \ as special if we are processing #line 1 "...".
534 If you want it to be treated specially, use # 1 "...". */
535 ignore_escape_flag = 1;
536 }
537
538 /* Read the string constant. */
539 token = c_lex (&value);
540
541 ignore_escape_flag = 0;
542
543 if (token == CPP_EOF)
544 {
545 /* No more: store the line number and check following line. */
546 lex_lineno = l;
547 goto skipline;
548 }
549
550 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
551 {
552 error ("invalid #line");
553 goto skipline;
554 }
555
556 new_file = TREE_STRING_POINTER (value);
557
558 if (main_input_filename == 0)
559 main_input_filename = new_file;
560
561 action = act_none;
562 action_number = 0;
563
564 /* Each change of file name
565 reinitializes whether we are now in a system header. */
566 in_system_header = 0;
567
568 if (!read_line_number (&action_number))
569 {
570 /* Update the name in the top element of input_file_stack. */
571 if (input_file_stack)
572 input_file_stack->name = input_filename;
573 }
574
575 /* `1' after file name means entering new file.
576 `2' after file name means just left a file. */
577
578 if (action_number == 1)
579 {
580 action = act_push;
581 read_line_number (&action_number);
582 }
583 else if (action_number == 2)
584 {
585 action = act_pop;
586 read_line_number (&action_number);
587 }
588 if (action_number == 3)
589 {
590 /* `3' after file name means this is a system header file. */
591 in_system_header = 1;
592 read_line_number (&action_number);
593 }
594 #ifndef NO_IMPLICIT_EXTERN_C
595 if (action_number == 4)
596 {
597 /* `4' after file name means this is a C header file. */
598 entering_c_header = 1;
599 read_line_number (&action_number);
600 }
601 #endif
602
603 /* Do the actions implied by the preceding numbers. */
604 if (action == act_push)
605 {
606 lineno = lex_lineno;
607 push_srcloc (input_filename, 1);
608 input_file_stack->indent_level = indent_level;
609 debug_start_source_file (input_filename);
610 #ifndef NO_IMPLICIT_EXTERN_C
611 if (c_header_level)
612 ++c_header_level;
613 else if (entering_c_header)
614 {
615 c_header_level = 1;
616 ++pending_lang_change;
617 }
618 #endif
619 }
620 else if (action == act_pop)
621 {
622 /* Popping out of a file. */
623 if (input_file_stack->next)
624 {
625 #ifndef NO_IMPLICIT_EXTERN_C
626 if (c_header_level && --c_header_level == 0)
627 {
628 if (entering_c_header)
629 warning ("badly nested C headers from preprocessor");
630 --pending_lang_change;
631 }
632 #endif
633 #if 0
634 if (indent_level != input_file_stack->indent_level)
635 {
636 warning_with_file_and_line
637 (input_filename, lex_lineno,
638 "This file contains more '%c's than '%c's.",
639 indent_level > input_file_stack->indent_level ? '{' : '}',
640 indent_level > input_file_stack->indent_level ? '}' : '{');
641 }
642 #endif
643 pop_srcloc ();
644 debug_end_source_file (input_file_stack->line);
645 }
646 else
647 error ("#-lines for entering and leaving files don't match");
648 }
649
650 update_header_times (new_file);
651
652 input_filename = new_file;
653 lex_lineno = l;
654
655 /* Hook for C++. */
656 extract_interface_info ();
657
658 /* skip the rest of this line. */
659 skipline:
660 linemode = 0;
661
662 while (getch () != '\n');
663 }
664 #else /* USE_CPPLIB */
665
666 /* Not yet handled: #pragma, #define, #undef.
667 No need to deal with linemarkers under normal conditions. */
668
669 static void
670 cb_ident (pfile, str, len)
671 cpp_reader *pfile ATTRIBUTE_UNUSED;
672 const unsigned char *str;
673 unsigned int len;
674 {
675 #ifdef ASM_OUTPUT_IDENT
676 if (! flag_no_ident)
677 {
678 /* Convert escapes in the string. */
679 tree value = lex_string ((const char *)str, len, 0);
680 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
681 }
682 #endif
683 }
684
685 static void
686 cb_enter_file (pfile)
687 cpp_reader *pfile;
688 {
689 cpp_buffer *ip = CPP_BUFFER (pfile);
690 /* Bleah, need a better interface to this. */
691 const char *flags = cpp_syshdr_flags (pfile, ip);
692
693 /* Mustn't stack the main buffer on the input stack. (Ick.) */
694 if (ip->prev)
695 {
696 lex_lineno = lineno = ip->prev->lineno - 1;
697 push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1);
698 input_file_stack->indent_level = indent_level;
699 debug_start_source_file (ip->nominal_fname);
700 }
701 else
702 lex_lineno = 1;
703
704 update_header_times (ip->nominal_fname);
705
706 /* Hook for C++. */
707 extract_interface_info ();
708
709 in_system_header = (flags[0] != 0);
710 #ifndef NO_IMPLICIT_EXTERN_C
711 if (c_header_level)
712 ++c_header_level;
713 else if (flags[2] != 0)
714 {
715 c_header_level = 1;
716 ++pending_lang_change;
717 }
718 #endif
719 }
720
721 static void
722 cb_leave_file (pfile)
723 cpp_reader *pfile;
724 {
725 /* Bleah, need a better interface to this. */
726 const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
727 #if 0
728 if (indent_level != input_file_stack->indent_level)
729 {
730 warning_with_file_and_line
731 (input_filename, lex_lineno,
732 "This file contains more '%c's than '%c's.",
733 indent_level > input_file_stack->indent_level ? '{' : '}',
734 indent_level > input_file_stack->indent_level ? '}' : '{');
735 }
736 #endif
737 /* We get called for the main buffer, but we mustn't pop it. */
738 if (input_file_stack->next)
739 pop_srcloc ();
740 in_system_header = (flags[0] != 0);
741 #ifndef NO_IMPLICIT_EXTERN_C
742 if (c_header_level && --c_header_level == 0)
743 {
744 if (flags[2] != 0)
745 warning ("badly nested C headers from preprocessor");
746 --pending_lang_change;
747 }
748 #endif
749 lex_lineno = CPP_BUFFER (pfile)->lineno;
750 debug_end_source_file (input_file_stack->line);
751
752 update_header_times (input_file_stack->name);
753 /* Hook for C++. */
754 extract_interface_info ();
755 }
756
757 static void
758 cb_rename_file (pfile)
759 cpp_reader *pfile;
760 {
761 cpp_buffer *ip = CPP_BUFFER (pfile);
762 /* Bleah, need a better interface to this. */
763 const char *flags = cpp_syshdr_flags (pfile, ip);
764 input_filename = ggc_alloc_string (ip->nominal_fname, -1);
765 lex_lineno = ip->lineno;
766 in_system_header = (flags[0] != 0);
767
768 update_header_times (ip->nominal_fname);
769 /* Hook for C++. */
770 extract_interface_info ();
771 }
772
773 static void
774 cb_def_pragma (pfile)
775 cpp_reader *pfile;
776 {
777 /* Issue a warning message if we have been asked to do so. Ignore
778 unknown pragmas in system headers unless an explicit
779 -Wunknown-pragmas has been given. */
780 if (warn_unknown_pragmas > in_system_header)
781 {
782 const unsigned char *space, *name;
783 const cpp_token *t = pfile->first_directive_token + 2;
784
785 space = t[0].val.node->name;
786 name = t[1].type == CPP_NAME ? t[1].val.node->name : 0;
787 if (name)
788 warning ("ignoring #pragma %s %s", space, name);
789 else
790 warning ("ignoring #pragma %s", space);
791 }
792 }
793 #endif /* USE_CPPLIB */
794
795 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
796
797 [lex.charset]: The character designated by the universal-character-name
798 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
799 is NNNNNNNN; the character designated by the universal-character-name
800 \uNNNN is that character whose character short name in ISO/IEC 10646 is
801 0000NNNN. If the hexadecimal value for a universal character name is
802 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
803 universal character name designates a character in the basic source
804 character set, then the program is ill-formed.
805
806 We assume that wchar_t is Unicode, so we don't need to do any
807 mapping. Is this ever wrong? */
808
809 static const char *
810 read_ucs (p, limit, cptr, length)
811 const char *p;
812 const char *limit;
813 unsigned int *cptr;
814 int length;
815 {
816 unsigned int code = 0;
817 int c;
818
819 for (; length; --length)
820 {
821 if (p >= limit)
822 {
823 error ("incomplete universal-character-name");
824 break;
825 }
826
827 c = *p++;
828 if (! ISXDIGIT (c))
829 {
830 error ("non hex digit '%c' in universal-character-name", c);
831 p--;
832 break;
833 }
834
835 code <<= 4;
836 if (c >= 'a' && c <= 'f')
837 code += c - 'a' + 10;
838 if (c >= 'A' && c <= 'F')
839 code += c - 'A' + 10;
840 if (c >= '0' && c <= '9')
841 code += c - '0';
842 }
843
844 #ifdef TARGET_EBCDIC
845 sorry ("universal-character-name on EBCDIC target");
846 *cptr = 0x3f; /* EBCDIC invalid character */
847 return p;
848 #endif
849
850 if (code > 0x9f && !(code & 0x80000000))
851 /* True extended character, OK. */;
852 else if (code >= 0x20 && code < 0x7f)
853 {
854 /* ASCII printable character. The C character set consists of all of
855 these except $, @ and `. We use hex escapes so that this also
856 works with EBCDIC hosts. */
857 if (code != 0x24 && code != 0x40 && code != 0x60)
858 error ("universal-character-name used for '%c'", code);
859 }
860 else
861 error ("invalid universal-character-name");
862
863 *cptr = code;
864 return p;
865 }
866
867 /* Read an escape sequence and write its character equivalent into *CPTR.
868 P is the input pointer, which is just after the backslash. LIMIT
869 is how much text we have.
870 Returns the updated input pointer. */
871
872 static const char *
873 readescape (p, limit, cptr)
874 const char *p;
875 const char *limit;
876 unsigned int *cptr;
877 {
878 unsigned int c, code, count;
879 unsigned firstdig = 0;
880 int nonnull;
881
882 if (p == limit)
883 {
884 /* cpp has already issued an error for this. */
885 *cptr = 0;
886 return p;
887 }
888
889 c = *p++;
890
891 switch (c)
892 {
893 case 'x':
894 if (warn_traditional && !in_system_header)
895 warning ("the meaning of `\\x' varies with -traditional");
896
897 if (flag_traditional)
898 {
899 *cptr = 'x';
900 return p;
901 }
902
903 code = 0;
904 count = 0;
905 nonnull = 0;
906 while (p < limit)
907 {
908 c = *p++;
909 if (! ISXDIGIT (c))
910 {
911 p--;
912 break;
913 }
914 code *= 16;
915 if (c >= 'a' && c <= 'f')
916 code += c - 'a' + 10;
917 if (c >= 'A' && c <= 'F')
918 code += c - 'A' + 10;
919 if (c >= '0' && c <= '9')
920 code += c - '0';
921 if (code != 0 || count != 0)
922 {
923 if (count == 0)
924 firstdig = code;
925 count++;
926 }
927 nonnull = 1;
928 }
929 if (! nonnull)
930 {
931 warning ("\\x used with no following hex digits");
932 *cptr = 'x';
933 return p;
934 }
935 else if (count == 0)
936 /* Digits are all 0's. Ok. */
937 ;
938 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
939 || (count > 1
940 && (((unsigned)1
941 << (TYPE_PRECISION (integer_type_node)
942 - (count - 1) * 4))
943 <= firstdig)))
944 pedwarn ("hex escape out of range");
945 *cptr = code;
946 return p;
947
948 case '0': case '1': case '2': case '3': case '4':
949 case '5': case '6': case '7':
950 code = 0;
951 for (count = 0; count < 3; count++)
952 {
953 if (c < '0' || c > '7')
954 {
955 p--;
956 break;
957 }
958 code = (code * 8) + (c - '0');
959 if (p == limit)
960 break;
961 c = *p++;
962 }
963
964 if (count == 3)
965 p--;
966
967 *cptr = code;
968 return p;
969
970 case '\\': case '\'': case '"': case '?':
971 *cptr = c;
972 return p;
973
974 case 'n': *cptr = TARGET_NEWLINE; return p;
975 case 't': *cptr = TARGET_TAB; return p;
976 case 'r': *cptr = TARGET_CR; return p;
977 case 'f': *cptr = TARGET_FF; return p;
978 case 'b': *cptr = TARGET_BS; return p;
979 case 'v': *cptr = TARGET_VT; return p;
980 case 'a':
981 if (warn_traditional && !in_system_header)
982 warning ("the meaning of '\\a' varies with -traditional");
983 *cptr = flag_traditional ? c : TARGET_BELL;
984 return p;
985
986 /* Warnings and support checks handled by read_ucs(). */
987 case 'u': case 'U':
988 if (c_language != clk_cplusplus && !flag_isoc99)
989 break;
990
991 if (warn_traditional && !in_system_header)
992 warning ("the meaning of '\\%c' varies with -traditional", c);
993
994 return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
995
996 case 'e': case 'E':
997 if (pedantic)
998 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
999 *cptr = TARGET_ESC; return p;
1000
1001 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1002 '\%' is used to prevent SCCS from getting confused. */
1003 case '(': case '{': case '[': case '%':
1004 if (pedantic)
1005 pedwarn ("unknown escape sequence '\\%c'", c);
1006 *cptr = c;
1007 return p;
1008 }
1009
1010 if (ISGRAPH (c))
1011 pedwarn ("unknown escape sequence '\\%c'", c);
1012 else
1013 pedwarn ("unknown escape sequence: '\\' followed by char 0x%.2x", c);
1014
1015 *cptr = c;
1016 return p;
1017 }
1018
1019 #if 0 /* not yet */
1020 /* Returns nonzero if C is a universal-character-name. Give an error if it
1021 is not one which may appear in an identifier, as per [extendid].
1022
1023 Note that extended character support in identifiers has not yet been
1024 implemented. It is my personal opinion that this is not a desirable
1025 feature. Portable code cannot count on support for more than the basic
1026 identifier character set. */
1027
1028 static inline int
1029 is_extended_char (c)
1030 int c;
1031 {
1032 #ifdef TARGET_EBCDIC
1033 return 0;
1034 #else
1035 /* ASCII. */
1036 if (c < 0x7f)
1037 return 0;
1038
1039 /* None of the valid chars are outside the Basic Multilingual Plane (the
1040 low 16 bits). */
1041 if (c > 0xffff)
1042 {
1043 error ("universal-character-name '\\U%08x' not valid in identifier", c);
1044 return 1;
1045 }
1046
1047 /* Latin */
1048 if ((c >= 0x00c0 && c <= 0x00d6)
1049 || (c >= 0x00d8 && c <= 0x00f6)
1050 || (c >= 0x00f8 && c <= 0x01f5)
1051 || (c >= 0x01fa && c <= 0x0217)
1052 || (c >= 0x0250 && c <= 0x02a8)
1053 || (c >= 0x1e00 && c <= 0x1e9a)
1054 || (c >= 0x1ea0 && c <= 0x1ef9))
1055 return 1;
1056
1057 /* Greek */
1058 if ((c == 0x0384)
1059 || (c >= 0x0388 && c <= 0x038a)
1060 || (c == 0x038c)
1061 || (c >= 0x038e && c <= 0x03a1)
1062 || (c >= 0x03a3 && c <= 0x03ce)
1063 || (c >= 0x03d0 && c <= 0x03d6)
1064 || (c == 0x03da)
1065 || (c == 0x03dc)
1066 || (c == 0x03de)
1067 || (c == 0x03e0)
1068 || (c >= 0x03e2 && c <= 0x03f3)
1069 || (c >= 0x1f00 && c <= 0x1f15)
1070 || (c >= 0x1f18 && c <= 0x1f1d)
1071 || (c >= 0x1f20 && c <= 0x1f45)
1072 || (c >= 0x1f48 && c <= 0x1f4d)
1073 || (c >= 0x1f50 && c <= 0x1f57)
1074 || (c == 0x1f59)
1075 || (c == 0x1f5b)
1076 || (c == 0x1f5d)
1077 || (c >= 0x1f5f && c <= 0x1f7d)
1078 || (c >= 0x1f80 && c <= 0x1fb4)
1079 || (c >= 0x1fb6 && c <= 0x1fbc)
1080 || (c >= 0x1fc2 && c <= 0x1fc4)
1081 || (c >= 0x1fc6 && c <= 0x1fcc)
1082 || (c >= 0x1fd0 && c <= 0x1fd3)
1083 || (c >= 0x1fd6 && c <= 0x1fdb)
1084 || (c >= 0x1fe0 && c <= 0x1fec)
1085 || (c >= 0x1ff2 && c <= 0x1ff4)
1086 || (c >= 0x1ff6 && c <= 0x1ffc))
1087 return 1;
1088
1089 /* Cyrillic */
1090 if ((c >= 0x0401 && c <= 0x040d)
1091 || (c >= 0x040f && c <= 0x044f)
1092 || (c >= 0x0451 && c <= 0x045c)
1093 || (c >= 0x045e && c <= 0x0481)
1094 || (c >= 0x0490 && c <= 0x04c4)
1095 || (c >= 0x04c7 && c <= 0x04c8)
1096 || (c >= 0x04cb && c <= 0x04cc)
1097 || (c >= 0x04d0 && c <= 0x04eb)
1098 || (c >= 0x04ee && c <= 0x04f5)
1099 || (c >= 0x04f8 && c <= 0x04f9))
1100 return 1;
1101
1102 /* Armenian */
1103 if ((c >= 0x0531 && c <= 0x0556)
1104 || (c >= 0x0561 && c <= 0x0587))
1105 return 1;
1106
1107 /* Hebrew */
1108 if ((c >= 0x05d0 && c <= 0x05ea)
1109 || (c >= 0x05f0 && c <= 0x05f4))
1110 return 1;
1111
1112 /* Arabic */
1113 if ((c >= 0x0621 && c <= 0x063a)
1114 || (c >= 0x0640 && c <= 0x0652)
1115 || (c >= 0x0670 && c <= 0x06b7)
1116 || (c >= 0x06ba && c <= 0x06be)
1117 || (c >= 0x06c0 && c <= 0x06ce)
1118 || (c >= 0x06e5 && c <= 0x06e7))
1119 return 1;
1120
1121 /* Devanagari */
1122 if ((c >= 0x0905 && c <= 0x0939)
1123 || (c >= 0x0958 && c <= 0x0962))
1124 return 1;
1125
1126 /* Bengali */
1127 if ((c >= 0x0985 && c <= 0x098c)
1128 || (c >= 0x098f && c <= 0x0990)
1129 || (c >= 0x0993 && c <= 0x09a8)
1130 || (c >= 0x09aa && c <= 0x09b0)
1131 || (c == 0x09b2)
1132 || (c >= 0x09b6 && c <= 0x09b9)
1133 || (c >= 0x09dc && c <= 0x09dd)
1134 || (c >= 0x09df && c <= 0x09e1)
1135 || (c >= 0x09f0 && c <= 0x09f1))
1136 return 1;
1137
1138 /* Gurmukhi */
1139 if ((c >= 0x0a05 && c <= 0x0a0a)
1140 || (c >= 0x0a0f && c <= 0x0a10)
1141 || (c >= 0x0a13 && c <= 0x0a28)
1142 || (c >= 0x0a2a && c <= 0x0a30)
1143 || (c >= 0x0a32 && c <= 0x0a33)
1144 || (c >= 0x0a35 && c <= 0x0a36)
1145 || (c >= 0x0a38 && c <= 0x0a39)
1146 || (c >= 0x0a59 && c <= 0x0a5c)
1147 || (c == 0x0a5e))
1148 return 1;
1149
1150 /* Gujarati */
1151 if ((c >= 0x0a85 && c <= 0x0a8b)
1152 || (c == 0x0a8d)
1153 || (c >= 0x0a8f && c <= 0x0a91)
1154 || (c >= 0x0a93 && c <= 0x0aa8)
1155 || (c >= 0x0aaa && c <= 0x0ab0)
1156 || (c >= 0x0ab2 && c <= 0x0ab3)
1157 || (c >= 0x0ab5 && c <= 0x0ab9)
1158 || (c == 0x0ae0))
1159 return 1;
1160
1161 /* Oriya */
1162 if ((c >= 0x0b05 && c <= 0x0b0c)
1163 || (c >= 0x0b0f && c <= 0x0b10)
1164 || (c >= 0x0b13 && c <= 0x0b28)
1165 || (c >= 0x0b2a && c <= 0x0b30)
1166 || (c >= 0x0b32 && c <= 0x0b33)
1167 || (c >= 0x0b36 && c <= 0x0b39)
1168 || (c >= 0x0b5c && c <= 0x0b5d)
1169 || (c >= 0x0b5f && c <= 0x0b61))
1170 return 1;
1171
1172 /* Tamil */
1173 if ((c >= 0x0b85 && c <= 0x0b8a)
1174 || (c >= 0x0b8e && c <= 0x0b90)
1175 || (c >= 0x0b92 && c <= 0x0b95)
1176 || (c >= 0x0b99 && c <= 0x0b9a)
1177 || (c == 0x0b9c)
1178 || (c >= 0x0b9e && c <= 0x0b9f)
1179 || (c >= 0x0ba3 && c <= 0x0ba4)
1180 || (c >= 0x0ba8 && c <= 0x0baa)
1181 || (c >= 0x0bae && c <= 0x0bb5)
1182 || (c >= 0x0bb7 && c <= 0x0bb9))
1183 return 1;
1184
1185 /* Telugu */
1186 if ((c >= 0x0c05 && c <= 0x0c0c)
1187 || (c >= 0x0c0e && c <= 0x0c10)
1188 || (c >= 0x0c12 && c <= 0x0c28)
1189 || (c >= 0x0c2a && c <= 0x0c33)
1190 || (c >= 0x0c35 && c <= 0x0c39)
1191 || (c >= 0x0c60 && c <= 0x0c61))
1192 return 1;
1193
1194 /* Kannada */
1195 if ((c >= 0x0c85 && c <= 0x0c8c)
1196 || (c >= 0x0c8e && c <= 0x0c90)
1197 || (c >= 0x0c92 && c <= 0x0ca8)
1198 || (c >= 0x0caa && c <= 0x0cb3)
1199 || (c >= 0x0cb5 && c <= 0x0cb9)
1200 || (c >= 0x0ce0 && c <= 0x0ce1))
1201 return 1;
1202
1203 /* Malayalam */
1204 if ((c >= 0x0d05 && c <= 0x0d0c)
1205 || (c >= 0x0d0e && c <= 0x0d10)
1206 || (c >= 0x0d12 && c <= 0x0d28)
1207 || (c >= 0x0d2a && c <= 0x0d39)
1208 || (c >= 0x0d60 && c <= 0x0d61))
1209 return 1;
1210
1211 /* Thai */
1212 if ((c >= 0x0e01 && c <= 0x0e30)
1213 || (c >= 0x0e32 && c <= 0x0e33)
1214 || (c >= 0x0e40 && c <= 0x0e46)
1215 || (c >= 0x0e4f && c <= 0x0e5b))
1216 return 1;
1217
1218 /* Lao */
1219 if ((c >= 0x0e81 && c <= 0x0e82)
1220 || (c == 0x0e84)
1221 || (c == 0x0e87)
1222 || (c == 0x0e88)
1223 || (c == 0x0e8a)
1224 || (c == 0x0e0d)
1225 || (c >= 0x0e94 && c <= 0x0e97)
1226 || (c >= 0x0e99 && c <= 0x0e9f)
1227 || (c >= 0x0ea1 && c <= 0x0ea3)
1228 || (c == 0x0ea5)
1229 || (c == 0x0ea7)
1230 || (c == 0x0eaa)
1231 || (c == 0x0eab)
1232 || (c >= 0x0ead && c <= 0x0eb0)
1233 || (c == 0x0eb2)
1234 || (c == 0x0eb3)
1235 || (c == 0x0ebd)
1236 || (c >= 0x0ec0 && c <= 0x0ec4)
1237 || (c == 0x0ec6))
1238 return 1;
1239
1240 /* Georgian */
1241 if ((c >= 0x10a0 && c <= 0x10c5)
1242 || (c >= 0x10d0 && c <= 0x10f6))
1243 return 1;
1244
1245 /* Hiragana */
1246 if ((c >= 0x3041 && c <= 0x3094)
1247 || (c >= 0x309b && c <= 0x309e))
1248 return 1;
1249
1250 /* Katakana */
1251 if ((c >= 0x30a1 && c <= 0x30fe))
1252 return 1;
1253
1254 /* Bopmofo */
1255 if ((c >= 0x3105 && c <= 0x312c))
1256 return 1;
1257
1258 /* Hangul */
1259 if ((c >= 0x1100 && c <= 0x1159)
1260 || (c >= 0x1161 && c <= 0x11a2)
1261 || (c >= 0x11a8 && c <= 0x11f9))
1262 return 1;
1263
1264 /* CJK Unified Ideographs */
1265 if ((c >= 0xf900 && c <= 0xfa2d)
1266 || (c >= 0xfb1f && c <= 0xfb36)
1267 || (c >= 0xfb38 && c <= 0xfb3c)
1268 || (c == 0xfb3e)
1269 || (c >= 0xfb40 && c <= 0xfb41)
1270 || (c >= 0xfb42 && c <= 0xfb44)
1271 || (c >= 0xfb46 && c <= 0xfbb1)
1272 || (c >= 0xfbd3 && c <= 0xfd3f)
1273 || (c >= 0xfd50 && c <= 0xfd8f)
1274 || (c >= 0xfd92 && c <= 0xfdc7)
1275 || (c >= 0xfdf0 && c <= 0xfdfb)
1276 || (c >= 0xfe70 && c <= 0xfe72)
1277 || (c == 0xfe74)
1278 || (c >= 0xfe76 && c <= 0xfefc)
1279 || (c >= 0xff21 && c <= 0xff3a)
1280 || (c >= 0xff41 && c <= 0xff5a)
1281 || (c >= 0xff66 && c <= 0xffbe)
1282 || (c >= 0xffc2 && c <= 0xffc7)
1283 || (c >= 0xffca && c <= 0xffcf)
1284 || (c >= 0xffd2 && c <= 0xffd7)
1285 || (c >= 0xffda && c <= 0xffdc)
1286 || (c >= 0x4e00 && c <= 0x9fa5))
1287 return 1;
1288
1289 error ("universal-character-name '\\u%04x' not valid in identifier", c);
1290 return 1;
1291 #endif
1292 }
1293
1294 /* Add the UTF-8 representation of C to the token_buffer. */
1295
1296 static void
1297 utf8_extend_token (c)
1298 int c;
1299 {
1300 int shift, mask;
1301
1302 if (c <= 0x0000007f)
1303 {
1304 extend_token (c);
1305 return;
1306 }
1307 else if (c <= 0x000007ff)
1308 shift = 6, mask = 0xc0;
1309 else if (c <= 0x0000ffff)
1310 shift = 12, mask = 0xe0;
1311 else if (c <= 0x001fffff)
1312 shift = 18, mask = 0xf0;
1313 else if (c <= 0x03ffffff)
1314 shift = 24, mask = 0xf8;
1315 else
1316 shift = 30, mask = 0xfc;
1317
1318 extend_token (mask | (c >> shift));
1319 do
1320 {
1321 shift -= 6;
1322 extend_token ((unsigned char) (0x80 | (c >> shift)));
1323 }
1324 while (shift);
1325 }
1326 #endif
1327
1328 #if 0
1329 struct try_type
1330 {
1331 tree *node_var;
1332 char unsigned_flag;
1333 char long_flag;
1334 char long_long_flag;
1335 };
1336
1337 struct try_type type_sequence[] =
1338 {
1339 { &integer_type_node, 0, 0, 0},
1340 { &unsigned_type_node, 1, 0, 0},
1341 { &long_integer_type_node, 0, 1, 0},
1342 { &long_unsigned_type_node, 1, 1, 0},
1343 { &long_long_integer_type_node, 0, 1, 1},
1344 { &long_long_unsigned_type_node, 1, 1, 1}
1345 };
1346 #endif /* 0 */
1347 \f
1348 struct pf_args
1349 {
1350 /* Input */
1351 const char *str;
1352 int fflag;
1353 int lflag;
1354 int base;
1355 /* Output */
1356 int conversion_errno;
1357 REAL_VALUE_TYPE value;
1358 tree type;
1359 };
1360
1361 static void
1362 parse_float (data)
1363 PTR data;
1364 {
1365 struct pf_args * args = (struct pf_args *) data;
1366 const char *typename;
1367
1368 args->conversion_errno = 0;
1369 args->type = double_type_node;
1370 typename = "double";
1371
1372 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1373 tells the desired precision of the binary result
1374 of decimal-to-binary conversion. */
1375
1376 if (args->fflag)
1377 {
1378 if (args->lflag)
1379 error ("both 'f' and 'l' suffixes on floating constant");
1380
1381 args->type = float_type_node;
1382 typename = "float";
1383 }
1384 else if (args->lflag)
1385 {
1386 args->type = long_double_type_node;
1387 typename = "long double";
1388 }
1389 else if (flag_single_precision_constant)
1390 {
1391 args->type = float_type_node;
1392 typename = "float";
1393 }
1394
1395 errno = 0;
1396 if (args->base == 16)
1397 args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1398 else
1399 args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1400
1401 args->conversion_errno = errno;
1402 /* A diagnostic is required here by some ISO C testsuites.
1403 This is not pedwarn, because some people don't want
1404 an error for this. */
1405 if (REAL_VALUE_ISINF (args->value) && pedantic)
1406 warning ("floating point number exceeds range of '%s'", typename);
1407 }
1408
1409 int
1410 c_lex (value)
1411 tree *value;
1412 {
1413 #if USE_CPPLIB
1414 const cpp_token *tok;
1415 enum cpp_ttype type;
1416
1417 retry:
1418 timevar_push (TV_CPP);
1419 tok = cpp_get_token (&parse_in);
1420 timevar_pop (TV_CPP);
1421
1422 /* The C++ front end does horrible things with the current line
1423 number. To ensure an accurate line number, we must reset it
1424 every time we return a token. If we reset it from tok->line
1425 every time, we'll get line numbers inside macros referring to the
1426 macro definition; this is nice, but we don't want to change the
1427 behavior until integrated mode is the only option. So we keep our
1428 own idea of the line number, and reset it from tok->line at each
1429 new line (which never happens inside a macro). */
1430 if (tok->flags & BOL)
1431 lex_lineno = tok->line;
1432
1433 *value = NULL_TREE;
1434 lineno = lex_lineno;
1435 type = tok->type;
1436 switch (type)
1437 {
1438 case CPP_OPEN_BRACE: indent_level++; break;
1439 case CPP_CLOSE_BRACE: indent_level--; break;
1440
1441 /* Issue this error here, where we can get at tok->val.aux. */
1442 case CPP_OTHER:
1443 if (ISGRAPH (tok->val.aux))
1444 error ("stray '%c' in program", tok->val.aux);
1445 else
1446 error ("stray '\\%#o' in program", tok->val.aux);
1447 goto retry;
1448
1449 case CPP_DEFINED:
1450 type = CPP_NAME;
1451 case CPP_NAME:
1452 *value = get_identifier ((const char *)tok->val.node->name);
1453 break;
1454
1455 case CPP_INT:
1456 case CPP_FLOAT:
1457 case CPP_NUMBER:
1458 *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len);
1459 break;
1460
1461 case CPP_CHAR:
1462 case CPP_WCHAR:
1463 *value = lex_charconst ((const char *)tok->val.str.text,
1464 tok->val.str.len, tok->type == CPP_WCHAR);
1465 break;
1466
1467 case CPP_STRING:
1468 case CPP_WSTRING:
1469 case CPP_OSTRING:
1470 *value = lex_string ((const char *)tok->val.str.text,
1471 tok->val.str.len, tok->type == CPP_WSTRING);
1472 break;
1473
1474 /* These tokens should not be visible outside cpplib. */
1475 case CPP_HEADER_NAME:
1476 case CPP_COMMENT:
1477 case CPP_MACRO_ARG:
1478 case CPP_PLACEMARKER:
1479 abort ();
1480
1481 default: break;
1482 }
1483
1484 return type;
1485
1486 #else
1487 int c;
1488 char *p;
1489 int wide_flag = 0;
1490 int objc_flag = 0;
1491 int charconst = 0;
1492
1493 *value = NULL_TREE;
1494
1495 retry:
1496 c = getch ();
1497
1498 /* Effectively do c = skip_white_space (c)
1499 but do it faster in the usual cases. */
1500 while (1)
1501 switch (c)
1502 {
1503 case ' ':
1504 case '\t':
1505 case '\f':
1506 case '\v':
1507 c = getch ();
1508 break;
1509
1510 case '\n':
1511 c = skip_white_space (c);
1512 default:
1513 goto found_nonwhite;
1514 }
1515 found_nonwhite:
1516
1517 lineno = lex_lineno;
1518
1519 switch (c)
1520 {
1521 case EOF:
1522 return CPP_EOF;
1523
1524 case 'L':
1525 /* Capital L may start a wide-string or wide-character constant. */
1526 {
1527 register int c1 = getch();
1528 if (c1 == '\'')
1529 {
1530 wide_flag = 1;
1531 goto char_constant;
1532 }
1533 if (c1 == '"')
1534 {
1535 wide_flag = 1;
1536 goto string_constant;
1537 }
1538 put_back (c1);
1539 }
1540 goto letter;
1541
1542 case '@':
1543 if (!doing_objc_thang)
1544 goto straychar;
1545 else
1546 {
1547 /* '@' may start a constant string object. */
1548 register int c1 = getch ();
1549 if (c1 == '"')
1550 {
1551 objc_flag = 1;
1552 goto string_constant;
1553 }
1554 put_back (c1);
1555 /* Fall through to treat '@' as the start of an identifier. */
1556 }
1557
1558 case 'A': case 'B': case 'C': case 'D': case 'E':
1559 case 'F': case 'G': case 'H': case 'I': case 'J':
1560 case 'K': case 'M': case 'N': case 'O':
1561 case 'P': case 'Q': case 'R': case 'S': case 'T':
1562 case 'U': case 'V': case 'W': case 'X': case 'Y':
1563 case 'Z':
1564 case 'a': case 'b': case 'c': case 'd': case 'e':
1565 case 'f': case 'g': case 'h': case 'i': case 'j':
1566 case 'k': case 'l': case 'm': case 'n': case 'o':
1567 case 'p': case 'q': case 'r': case 's': case 't':
1568 case 'u': case 'v': case 'w': case 'x': case 'y':
1569 case 'z':
1570 case '_':
1571 case '$':
1572 letter:
1573 p = token_buffer;
1574 while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
1575 {
1576 /* Make sure this char really belongs in an identifier. */
1577 if (c == '$')
1578 {
1579 if (! dollars_in_ident)
1580 error ("'$' in identifier");
1581 else if (pedantic)
1582 pedwarn ("'$' in identifier");
1583 }
1584
1585 if (p >= token_buffer + maxtoken)
1586 p = extend_token_buffer (p);
1587
1588 *p++ = c;
1589 c = getch();
1590 }
1591
1592 put_back (c);
1593
1594 if (p >= token_buffer + maxtoken)
1595 p = extend_token_buffer (p);
1596 *p = 0;
1597
1598 *value = get_identifier (token_buffer);
1599 return CPP_NAME;
1600
1601 case '.':
1602 {
1603 /* It's hard to preserve tokenization on '.' because
1604 it could be a symbol by itself, or it could be the
1605 start of a floating point number and cpp won't tell us. */
1606 int c1 = getch ();
1607 if (c1 == '.')
1608 {
1609 int c2 = getch ();
1610 if (c2 == '.')
1611 return CPP_ELLIPSIS;
1612
1613 put_back (c2);
1614 error ("parse error at '..'");
1615 }
1616 else if (c1 == '*' && c_language == clk_cplusplus)
1617 return CPP_DOT_STAR;
1618
1619 put_back (c1);
1620 if (ISDIGIT (c1))
1621 goto number;
1622 }
1623 return CPP_DOT;
1624
1625 case '0': case '1': case '2': case '3': case '4':
1626 case '5': case '6': case '7': case '8': case '9':
1627 number:
1628 p = token_buffer;
1629 /* Scan the next preprocessing number. All C numeric constants
1630 are preprocessing numbers, but not all preprocessing numbers
1631 are valid numeric constants. Preprocessing numbers fit the
1632 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1633 See C99 section 6.4.8. */
1634 for (;;)
1635 {
1636 if (p >= token_buffer + maxtoken)
1637 p = extend_token_buffer (p);
1638
1639 *p++ = c;
1640 c = getch();
1641
1642 if (c == '+' || c == '-')
1643 {
1644 int d = p[-1];
1645 if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1646 continue;
1647 }
1648 if (ISALNUM (c) || c == '_' || c == '.')
1649 continue;
1650 break;
1651 }
1652 put_back (c);
1653
1654 *value = lex_number (token_buffer, p - token_buffer);
1655 return CPP_NUMBER;
1656
1657 case '\'':
1658 char_constant:
1659 charconst = 1;
1660
1661 case '"':
1662 string_constant:
1663 {
1664 int delimiter = charconst ? '\'' : '"';
1665 #ifdef MULTIBYTE_CHARS
1666 int longest_char = local_mb_cur_max ();
1667 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1668 #endif
1669 c = getch ();
1670 p = token_buffer + 1;
1671
1672 while (c != delimiter && c != EOF)
1673 {
1674 if (p + 2 > token_buffer + maxtoken)
1675 p = extend_token_buffer (p);
1676
1677 /* ignore_escape_flag is set for reading the filename in #line. */
1678 if (!ignore_escape_flag && c == '\\')
1679 {
1680 *p++ = c;
1681 *p++ = getch (); /* escaped character */
1682 c = getch ();
1683 continue;
1684 }
1685 else
1686 {
1687 #ifdef MULTIBYTE_CHARS
1688 int i;
1689 int char_len = -1;
1690 for (i = 0; i < longest_char; ++i)
1691 {
1692 if (p + i >= token_buffer + maxtoken)
1693 p = extend_token_buffer (p);
1694 p[i] = c;
1695
1696 char_len = local_mblen (p, i + 1);
1697 if (char_len != -1)
1698 break;
1699 c = getch ();
1700 }
1701 if (char_len == -1)
1702 {
1703 /* Replace all except the first byte. */
1704 put_back (c);
1705 for (--i; i > 0; --i)
1706 put_back (p[i]);
1707 char_len = 1;
1708 }
1709 /* mbtowc sometimes needs an extra char before accepting */
1710 else if (char_len <= i)
1711 put_back (c);
1712
1713 p += char_len;
1714 #else
1715 *p++ = c;
1716 #endif
1717 c = getch ();
1718 }
1719 }
1720 }
1721
1722 if (charconst)
1723 {
1724 *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1725 wide_flag);
1726 return wide_flag ? CPP_WCHAR : CPP_CHAR;
1727 }
1728 else
1729 {
1730 *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1731 wide_flag);
1732 return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1733 }
1734
1735 case '+':
1736 case '-':
1737 case '&':
1738 case '|':
1739 case ':':
1740 case '<':
1741 case '>':
1742 case '*':
1743 case '/':
1744 case '%':
1745 case '^':
1746 case '!':
1747 case '=':
1748 {
1749 int c1;
1750 enum cpp_ttype type = CPP_EOF;
1751
1752 switch (c)
1753 {
1754 case '+': type = CPP_PLUS; break;
1755 case '-': type = CPP_MINUS; break;
1756 case '&': type = CPP_AND; break;
1757 case '|': type = CPP_OR; break;
1758 case ':': type = CPP_COLON; break;
1759 case '<': type = CPP_LESS; break;
1760 case '>': type = CPP_GREATER; break;
1761 case '*': type = CPP_MULT; break;
1762 case '/': type = CPP_DIV; break;
1763 case '%': type = CPP_MOD; break;
1764 case '^': type = CPP_XOR; break;
1765 case '!': type = CPP_NOT; break;
1766 case '=': type = CPP_EQ; break;
1767 }
1768
1769 c1 = getch ();
1770
1771 if (c1 == '=' && type < CPP_LAST_EQ)
1772 return type + (CPP_EQ_EQ - CPP_EQ);
1773 else if (c == c1)
1774 switch (c)
1775 {
1776 case '+': return CPP_PLUS_PLUS;
1777 case '-': return CPP_MINUS_MINUS;
1778 case '&': return CPP_AND_AND;
1779 case '|': return CPP_OR_OR;
1780 case ':':
1781 if (c_language == clk_cplusplus)
1782 return CPP_SCOPE;
1783 break;
1784
1785 case '<': type = CPP_LSHIFT; goto do_triad;
1786 case '>': type = CPP_RSHIFT; goto do_triad;
1787 }
1788 else
1789 switch (c)
1790 {
1791 case '-':
1792 if (c1 == '>')
1793 {
1794 if (c_language == clk_cplusplus)
1795 {
1796 c1 = getch ();
1797 if (c1 == '*')
1798 return CPP_DEREF_STAR;
1799 put_back (c1);
1800 }
1801 return CPP_DEREF;
1802 }
1803 break;
1804
1805 case '>':
1806 if (c1 == '?' && c_language == clk_cplusplus)
1807 { type = CPP_MAX; goto do_triad; }
1808 break;
1809
1810 case '<':
1811 if (c1 == ':' && flag_digraphs)
1812 return CPP_OPEN_SQUARE;
1813 if (c1 == '%' && flag_digraphs)
1814 { indent_level++; return CPP_OPEN_BRACE; }
1815 if (c1 == '?' && c_language == clk_cplusplus)
1816 { type = CPP_MIN; goto do_triad; }
1817 break;
1818
1819 case ':':
1820 if (c1 == '>' && flag_digraphs)
1821 return CPP_CLOSE_SQUARE;
1822 break;
1823 case '%':
1824 if (c1 == '>' && flag_digraphs)
1825 { indent_level--; return CPP_CLOSE_BRACE; }
1826 break;
1827 }
1828
1829 put_back (c1);
1830 return type;
1831
1832 do_triad:
1833 c1 = getch ();
1834 if (c1 == '=')
1835 type += (CPP_EQ_EQ - CPP_EQ);
1836 else
1837 put_back (c1);
1838 return type;
1839 }
1840
1841 case '~': return CPP_COMPL;
1842 case '?': return CPP_QUERY;
1843 case ',': return CPP_COMMA;
1844 case '(': return CPP_OPEN_PAREN;
1845 case ')': return CPP_CLOSE_PAREN;
1846 case '[': return CPP_OPEN_SQUARE;
1847 case ']': return CPP_CLOSE_SQUARE;
1848 case '{': indent_level++; return CPP_OPEN_BRACE;
1849 case '}': indent_level--; return CPP_CLOSE_BRACE;
1850 case ';': return CPP_SEMICOLON;
1851
1852 straychar:
1853 default:
1854 if (ISGRAPH (c))
1855 error ("stray '%c' in program", c);
1856 else
1857 error ("stray '\\%#o' in program", c);
1858 goto retry;
1859 }
1860 /* NOTREACHED */
1861 #endif
1862 }
1863
1864
1865 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1866
1867 static tree
1868 lex_number (str, len)
1869 const char *str;
1870 unsigned int len;
1871 {
1872 int base = 10;
1873 int count = 0;
1874 int largest_digit = 0;
1875 int numdigits = 0;
1876 int overflow = 0;
1877 int c;
1878 tree value;
1879 const char *p;
1880 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1881
1882 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1883 The code below which fills the parts array assumes that a host
1884 int is at least twice as wide as a host char, and that
1885 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1886 Two HOST_WIDE_INTs is the largest int literal we can store.
1887 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1888 must be exactly the number of parts needed to hold the bits
1889 of two HOST_WIDE_INTs. */
1890 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1891 unsigned int parts[TOTAL_PARTS];
1892
1893 /* Optimize for most frequent case. */
1894 if (len == 1)
1895 {
1896 if (*str == '0')
1897 return integer_zero_node;
1898 else if (*str == '1')
1899 return integer_one_node;
1900 else
1901 return build_int_2 (*str - '0', 0);
1902 }
1903
1904 for (count = 0; count < TOTAL_PARTS; count++)
1905 parts[count] = 0;
1906
1907 /* len is known to be >1 at this point. */
1908 p = str;
1909
1910 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1911 {
1912 base = 16;
1913 p = str + 2;
1914 }
1915 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1916 else if (str[0] == '0' && ISDIGIT (str[1]))
1917 {
1918 base = 8;
1919 p = str + 1;
1920 }
1921
1922 do
1923 {
1924 c = *p++;
1925
1926 if (c == '.')
1927 {
1928 if (base == 16 && pedantic && !flag_isoc99)
1929 pedwarn ("floating constant may not be in radix 16");
1930 else if (floatflag == AFTER_POINT)
1931 ERROR ("too many decimal points in floating constant");
1932 else if (floatflag == AFTER_EXPON)
1933 ERROR ("decimal point in exponent - impossible!");
1934 else
1935 floatflag = AFTER_POINT;
1936
1937 if (base == 8)
1938 base = 10;
1939 }
1940 else if (c == '_')
1941 /* Possible future extension: silently ignore _ in numbers,
1942 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1943 but somewhat easier to read. Ada has this? */
1944 ERROR ("underscore in number");
1945 else
1946 {
1947 int n;
1948 /* It is not a decimal point.
1949 It should be a digit (perhaps a hex digit). */
1950
1951 if (ISDIGIT (c))
1952 {
1953 n = c - '0';
1954 }
1955 else if (base <= 10 && (c == 'e' || c == 'E'))
1956 {
1957 base = 10;
1958 floatflag = AFTER_EXPON;
1959 break;
1960 }
1961 else if (base == 16 && (c == 'p' || c == 'P'))
1962 {
1963 floatflag = AFTER_EXPON;
1964 break; /* start of exponent */
1965 }
1966 else if (base == 16 && c >= 'a' && c <= 'f')
1967 {
1968 n = c - 'a' + 10;
1969 }
1970 else if (base == 16 && c >= 'A' && c <= 'F')
1971 {
1972 n = c - 'A' + 10;
1973 }
1974 else
1975 {
1976 p--;
1977 break; /* start of suffix */
1978 }
1979
1980 if (n >= largest_digit)
1981 largest_digit = n;
1982 numdigits++;
1983
1984 for (count = 0; count < TOTAL_PARTS; count++)
1985 {
1986 parts[count] *= base;
1987 if (count)
1988 {
1989 parts[count]
1990 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1991 parts[count-1]
1992 &= (1 << HOST_BITS_PER_CHAR) - 1;
1993 }
1994 else
1995 parts[0] += n;
1996 }
1997
1998 /* If the highest-order part overflows (gets larger than
1999 a host char will hold) then the whole number has
2000 overflowed. Record this and truncate the highest-order
2001 part. */
2002 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
2003 {
2004 overflow = 1;
2005 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
2006 }
2007 }
2008 }
2009 while (p < str + len);
2010
2011 /* This can happen on input like `int i = 0x;' */
2012 if (numdigits == 0)
2013 ERROR ("numeric constant with no digits");
2014
2015 if (largest_digit >= base)
2016 ERROR ("numeric constant contains digits beyond the radix");
2017
2018 if (floatflag != NOT_FLOAT)
2019 {
2020 tree type;
2021 int imag, fflag, lflag, conversion_errno;
2022 REAL_VALUE_TYPE real;
2023 struct pf_args args;
2024 char *copy;
2025
2026 if (base == 16 && floatflag != AFTER_EXPON)
2027 ERROR ("hexadecimal floating constant has no exponent");
2028
2029 /* Read explicit exponent if any, and put it in tokenbuf. */
2030 if ((base == 10 && ((c == 'e') || (c == 'E')))
2031 || (base == 16 && (c == 'p' || c == 'P')))
2032 {
2033 if (p < str + len)
2034 c = *p++;
2035 if (p < str + len && (c == '+' || c == '-'))
2036 c = *p++;
2037 /* Exponent is decimal, even if string is a hex float. */
2038 if (! ISDIGIT (c))
2039 ERROR ("floating constant exponent has no digits");
2040 while (p < str + len && ISDIGIT (c))
2041 c = *p++;
2042 if (! ISDIGIT (c))
2043 p--;
2044 }
2045
2046 /* Copy the float constant now; we don't want any suffixes in the
2047 string passed to parse_float. */
2048 copy = alloca (p - str + 1);
2049 memcpy (copy, str, p - str);
2050 copy[p - str] = '\0';
2051
2052 /* Now parse suffixes. */
2053 fflag = lflag = imag = 0;
2054 while (p < str + len)
2055 switch (*p++)
2056 {
2057 case 'f': case 'F':
2058 if (fflag)
2059 ERROR ("more than one 'f' suffix on floating constant");
2060 else if (warn_traditional && !in_system_header)
2061 warning ("traditional C rejects the 'f' suffix");
2062
2063 fflag = 1;
2064 break;
2065
2066 case 'l': case 'L':
2067 if (lflag)
2068 ERROR ("more than one 'l' suffix on floating constant");
2069 else if (warn_traditional && !in_system_header)
2070 warning ("traditional C rejects the 'l' suffix");
2071
2072 lflag = 1;
2073 break;
2074
2075 case 'i': case 'I':
2076 case 'j': case 'J':
2077 if (imag)
2078 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2079 else if (pedantic)
2080 pedwarn ("ISO C forbids imaginary numeric constants");
2081 imag = 1;
2082 break;
2083
2084 default:
2085 ERROR ("invalid suffix on floating constant");
2086 }
2087
2088 /* Setup input for parse_float() */
2089 args.str = copy;
2090 args.fflag = fflag;
2091 args.lflag = lflag;
2092 args.base = base;
2093
2094 /* Convert string to a double, checking for overflow. */
2095 if (do_float_handler (parse_float, (PTR) &args))
2096 {
2097 /* Receive output from parse_float() */
2098 real = args.value;
2099 }
2100 else
2101 /* We got an exception from parse_float() */
2102 ERROR ("floating constant out of range");
2103
2104 /* Receive output from parse_float() */
2105 conversion_errno = args.conversion_errno;
2106 type = args.type;
2107
2108 #ifdef ERANGE
2109 /* ERANGE is also reported for underflow,
2110 so test the value to distinguish overflow from that. */
2111 if (conversion_errno == ERANGE && !flag_traditional && pedantic
2112 && (REAL_VALUES_LESS (dconst1, real)
2113 || REAL_VALUES_LESS (real, dconstm1)))
2114 warning ("floating point number exceeds range of 'double'");
2115 #endif
2116
2117 /* Create a node with determined type and value. */
2118 if (imag)
2119 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2120 build_real (type, real));
2121 else
2122 value = build_real (type, real);
2123 }
2124 else
2125 {
2126 tree trad_type, ansi_type, type;
2127 HOST_WIDE_INT high, low;
2128 int spec_unsigned = 0;
2129 int spec_long = 0;
2130 int spec_long_long = 0;
2131 int spec_imag = 0;
2132 int suffix_lu = 0;
2133 int warn = 0, i;
2134
2135 trad_type = ansi_type = type = NULL_TREE;
2136 while (p < str + len)
2137 {
2138 c = *p++;
2139 switch (c)
2140 {
2141 case 'u': case 'U':
2142 if (spec_unsigned)
2143 error ("two 'u' suffixes on integer constant");
2144 else if (warn_traditional && !in_system_header)
2145 warning ("traditional C rejects the 'u' suffix");
2146
2147 spec_unsigned = 1;
2148 if (spec_long)
2149 suffix_lu = 1;
2150 break;
2151
2152 case 'l': case 'L':
2153 if (spec_long)
2154 {
2155 if (spec_long_long)
2156 error ("three 'l' suffixes on integer constant");
2157 else if (suffix_lu)
2158 error ("'lul' is not a valid integer suffix");
2159 else if (c != spec_long)
2160 error ("'Ll' and 'lL' are not valid integer suffixes");
2161 else if (pedantic && ! flag_isoc99
2162 && ! in_system_header && warn_long_long)
2163 pedwarn ("ISO C89 forbids long long integer constants");
2164 spec_long_long = 1;
2165 }
2166 spec_long = c;
2167 break;
2168
2169 case 'i': case 'I': case 'j': case 'J':
2170 if (spec_imag)
2171 error ("more than one 'i' or 'j' suffix on integer constant");
2172 else if (pedantic)
2173 pedwarn ("ISO C forbids imaginary numeric constants");
2174 spec_imag = 1;
2175 break;
2176
2177 default:
2178 ERROR ("invalid suffix on integer constant");
2179 }
2180 }
2181
2182 /* If the literal overflowed, pedwarn about it now. */
2183 if (overflow)
2184 {
2185 warn = 1;
2186 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2187 }
2188
2189 /* This is simplified by the fact that our constant
2190 is always positive. */
2191
2192 high = low = 0;
2193
2194 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2195 {
2196 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2197 / HOST_BITS_PER_CHAR)]
2198 << (i * HOST_BITS_PER_CHAR));
2199 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2200 }
2201
2202 value = build_int_2 (low, high);
2203 TREE_TYPE (value) = long_long_unsigned_type_node;
2204
2205 /* If warn_traditional, calculate both the ISO type and the
2206 traditional type, then see if they disagree.
2207 Otherwise, calculate only the type for the dialect in use. */
2208 if (warn_traditional || flag_traditional)
2209 {
2210 /* Calculate the traditional type. */
2211 /* Traditionally, any constant is signed; but if unsigned is
2212 specified explicitly, obey that. Use the smallest size
2213 with the right number of bits, except for one special
2214 case with decimal constants. */
2215 if (! spec_long && base != 10
2216 && int_fits_type_p (value, unsigned_type_node))
2217 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2218 /* A decimal constant must be long if it does not fit in
2219 type int. I think this is independent of whether the
2220 constant is signed. */
2221 else if (! spec_long && base == 10
2222 && int_fits_type_p (value, integer_type_node))
2223 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2224 else if (! spec_long_long)
2225 trad_type = (spec_unsigned
2226 ? long_unsigned_type_node
2227 : long_integer_type_node);
2228 else if (int_fits_type_p (value,
2229 spec_unsigned
2230 ? long_long_unsigned_type_node
2231 : long_long_integer_type_node))
2232 trad_type = (spec_unsigned
2233 ? long_long_unsigned_type_node
2234 : long_long_integer_type_node);
2235 else
2236 trad_type = (spec_unsigned
2237 ? widest_unsigned_literal_type_node
2238 : widest_integer_literal_type_node);
2239 }
2240 if (warn_traditional || ! flag_traditional)
2241 {
2242 /* Calculate the ISO type. */
2243 if (! spec_long && ! spec_unsigned
2244 && int_fits_type_p (value, integer_type_node))
2245 ansi_type = integer_type_node;
2246 else if (! spec_long && (base != 10 || spec_unsigned)
2247 && int_fits_type_p (value, unsigned_type_node))
2248 ansi_type = unsigned_type_node;
2249 else if (! spec_unsigned && !spec_long_long
2250 && int_fits_type_p (value, long_integer_type_node))
2251 ansi_type = long_integer_type_node;
2252 else if (! spec_long_long
2253 && int_fits_type_p (value, long_unsigned_type_node))
2254 ansi_type = long_unsigned_type_node;
2255 else if (! spec_unsigned
2256 && int_fits_type_p (value, long_long_integer_type_node))
2257 ansi_type = long_long_integer_type_node;
2258 else if (int_fits_type_p (value, long_long_unsigned_type_node))
2259 ansi_type = long_long_unsigned_type_node;
2260 else if (! spec_unsigned
2261 && int_fits_type_p (value, widest_integer_literal_type_node))
2262 ansi_type = widest_integer_literal_type_node;
2263 else
2264 ansi_type = widest_unsigned_literal_type_node;
2265 }
2266
2267 type = flag_traditional ? trad_type : ansi_type;
2268
2269 /* We assume that constants specified in a non-decimal
2270 base are bit patterns, and that the programmer really
2271 meant what they wrote. */
2272 if (warn_traditional && !in_system_header
2273 && base == 10 && trad_type != ansi_type)
2274 {
2275 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2276 warning ("width of integer constant changes with -traditional");
2277 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2278 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2279 else
2280 warning ("width of integer constant may change on other systems with -traditional");
2281 }
2282
2283 if (pedantic && !flag_traditional && !spec_long_long && !warn
2284 && (TYPE_PRECISION (long_integer_type_node) < TYPE_PRECISION (type)))
2285 {
2286 warn = 1;
2287 pedwarn ("integer constant larger than the maximum value of an unsigned long int");
2288 }
2289
2290 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2291 warning ("decimal constant is so large that it is unsigned");
2292
2293 if (spec_imag)
2294 {
2295 if (TYPE_PRECISION (type)
2296 <= TYPE_PRECISION (integer_type_node))
2297 value = build_complex (NULL_TREE, integer_zero_node,
2298 convert (integer_type_node, value));
2299 else
2300 ERROR ("complex integer constant is too wide for 'complex int'");
2301 }
2302 else if (flag_traditional && !int_fits_type_p (value, type))
2303 /* The traditional constant 0x80000000 is signed
2304 but doesn't fit in the range of int.
2305 This will change it to -0x80000000, which does fit. */
2306 {
2307 TREE_TYPE (value) = unsigned_type (type);
2308 value = convert (type, value);
2309 TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2310 }
2311 else
2312 TREE_TYPE (value) = type;
2313
2314 /* If it's still an integer (not a complex), and it doesn't
2315 fit in the type we choose for it, then pedwarn. */
2316
2317 if (! warn
2318 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2319 && ! int_fits_type_p (value, TREE_TYPE (value)))
2320 pedwarn ("integer constant is larger than the maximum value for its type");
2321 }
2322
2323 if (p < str + len)
2324 error ("missing white space after number '%.*s'", (int) (p - str), str);
2325
2326 return value;
2327
2328 syntax_error:
2329 return integer_zero_node;
2330 }
2331
2332 static tree
2333 lex_string (str, len, wide)
2334 const char *str;
2335 unsigned int len;
2336 int wide;
2337 {
2338 tree value;
2339 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2340 char *q = buf;
2341 const char *p = str, *limit = str + len;
2342 unsigned int c;
2343 unsigned width = wide ? WCHAR_TYPE_SIZE
2344 : TYPE_PRECISION (char_type_node);
2345
2346 #ifdef MULTIBYTE_CHARS
2347 /* Reset multibyte conversion state. */
2348 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2349 #endif
2350
2351 while (p < limit)
2352 {
2353 #ifdef MULTIBYTE_CHARS
2354 wchar_t wc;
2355 int char_len;
2356
2357 char_len = local_mbtowc (&wc, p, limit - p);
2358 if (char_len == -1)
2359 {
2360 warning ("Ignoring invalid multibyte character");
2361 char_len = 1;
2362 c = *p++;
2363 }
2364 else
2365 {
2366 p += char_len;
2367 c = wc;
2368 }
2369 #else
2370 c = *p++;
2371 #endif
2372
2373 if (c == '\\' && !ignore_escape_flag)
2374 {
2375 p = readescape (p, limit, &c);
2376 if (width < HOST_BITS_PER_INT
2377 && (unsigned) c >= ((unsigned)1 << width))
2378 pedwarn ("escape sequence out of range for character");
2379 }
2380
2381 /* Add this single character into the buffer either as a wchar_t
2382 or as a single byte. */
2383 if (wide)
2384 {
2385 unsigned charwidth = TYPE_PRECISION (char_type_node);
2386 unsigned bytemask = (1 << width) - 1;
2387 int byte;
2388
2389 for (byte = 0; byte < WCHAR_BYTES; ++byte)
2390 {
2391 int n;
2392 if (byte >= (int) sizeof (c))
2393 n = 0;
2394 else
2395 n = (c >> (byte * charwidth)) & bytemask;
2396 if (BYTES_BIG_ENDIAN)
2397 q[WCHAR_BYTES - byte - 1] = n;
2398 else
2399 q[byte] = n;
2400 }
2401 q += WCHAR_BYTES;
2402 }
2403 else
2404 {
2405 *q++ = c;
2406 }
2407 }
2408
2409 /* Terminate the string value, either with a single byte zero
2410 or with a wide zero. */
2411
2412 if (wide)
2413 {
2414 memset (q, 0, WCHAR_BYTES);
2415 q += WCHAR_BYTES;
2416 }
2417 else
2418 {
2419 *q++ = '\0';
2420 }
2421
2422 value = build_string (q - buf, buf);
2423
2424 if (wide)
2425 TREE_TYPE (value) = wchar_array_type_node;
2426 else
2427 TREE_TYPE (value) = char_array_type_node;
2428 return value;
2429 }
2430
2431 static tree
2432 lex_charconst (str, len, wide)
2433 const char *str;
2434 unsigned int len;
2435 int wide;
2436 {
2437 const char *limit = str + len;
2438 int result = 0;
2439 int num_chars = 0;
2440 int chars_seen = 0;
2441 unsigned width = TYPE_PRECISION (char_type_node);
2442 int max_chars;
2443 unsigned int c;
2444 tree value;
2445
2446 #ifdef MULTIBYTE_CHARS
2447 int longest_char = local_mb_cur_max ();
2448 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2449 #endif
2450
2451 max_chars = TYPE_PRECISION (integer_type_node) / width;
2452 if (wide)
2453 width = WCHAR_TYPE_SIZE;
2454
2455 while (str < limit)
2456 {
2457 #ifdef MULTIBYTE_CHARS
2458 wchar_t wc;
2459 int char_len;
2460
2461 char_len = local_mbtowc (&wc, str, limit - str);
2462 if (char_len == -1)
2463 {
2464 warning ("Ignoring invalid multibyte character");
2465 char_len = 1;
2466 c = *str++;
2467 }
2468 else
2469 {
2470 p += char_len;
2471 c = wc;
2472 }
2473 #else
2474 c = *str++;
2475 #endif
2476
2477 ++chars_seen;
2478 if (c == '\\')
2479 {
2480 str = readescape (str, limit, &c);
2481 if (width < HOST_BITS_PER_INT
2482 && (unsigned) c >= ((unsigned)1 << width))
2483 pedwarn ("escape sequence out of range for character");
2484 }
2485 #ifdef MAP_CHARACTER
2486 if (ISPRINT (c))
2487 c = MAP_CHARACTER (c);
2488 #endif
2489
2490 /* Merge character into result; ignore excess chars. */
2491 num_chars += (width / TYPE_PRECISION (char_type_node));
2492 if (num_chars < max_chars + 1)
2493 {
2494 if (width < HOST_BITS_PER_INT)
2495 result = (result << width) | (c & ((1 << width) - 1));
2496 else
2497 result = c;
2498 }
2499 }
2500
2501 if (chars_seen == 0)
2502 error ("empty character constant");
2503 else if (num_chars > max_chars)
2504 {
2505 num_chars = max_chars;
2506 error ("character constant too long");
2507 }
2508 else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2509 warning ("multi-character character constant");
2510
2511 /* If char type is signed, sign-extend the constant. */
2512 if (! wide)
2513 {
2514 int num_bits = num_chars * width;
2515 if (num_bits == 0)
2516 /* We already got an error; avoid invalid shift. */
2517 value = build_int_2 (0, 0);
2518 else if (TREE_UNSIGNED (char_type_node)
2519 || ((result >> (num_bits - 1)) & 1) == 0)
2520 value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2521 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2522 0);
2523 else
2524 value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2525 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2526 -1);
2527 /* In C, a character constant has type 'int'; in C++, 'char'. */
2528 if (chars_seen <= 1 && c_language == clk_cplusplus)
2529 TREE_TYPE (value) = char_type_node;
2530 else
2531 TREE_TYPE (value) = integer_type_node;
2532 }
2533 else
2534 {
2535 value = build_int_2 (result, 0);
2536 TREE_TYPE (value) = wchar_type_node;
2537 }
2538
2539 return value;
2540 }
This page took 0.180542 seconds and 6 git commands to generate.