1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
39 #include "splay-tree.h"
41 /* MULTIBYTE_CHARS support only works for native compilers.
42 ??? Ideally what we want is to model widechar support after
43 the current floating point support. */
45 #undef MULTIBYTE_CHARS
48 #ifdef MULTIBYTE_CHARS
51 #endif /* MULTIBYTE_CHARS */
52 #ifndef GET_ENVIRONMENT
53 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
57 extern cpp_reader parse_in
;
59 /* Stream for reading from the input file. */
63 /* Private idea of the line number. See discussion in c_lex(). */
64 static int lex_lineno
;
66 /* We may keep statistics about how long which files took to compile. */
67 static int header_time
, body_time
;
68 static splay_tree file_info_tree
;
70 /* Cause the `yydebug' variable to be defined. */
77 unsigned char *buffer
;
82 static struct putback_buffer putback
= {NULL
, 0, -1};
84 static inline int getch
PARAMS ((void));
89 if (putback
.index
!= -1)
91 int ch
= putback
.buffer
[putback
.index
];
98 static inline void put_back
PARAMS ((int));
106 if (putback
.index
== putback
.buffer_size
- 1)
108 putback
.buffer_size
+= 16;
109 putback
.buffer
= xrealloc (putback
.buffer
, putback
.buffer_size
);
111 putback
.buffer
[++putback
.index
] = ch
;
119 /* File used for outputting assembler code. */
120 extern FILE *asm_out_file
;
122 #undef WCHAR_TYPE_SIZE
123 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
125 /* Number of bytes in a wide character. */
126 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
129 static int maxtoken
; /* Current nominal length of token buffer. */
130 static char *token_buffer
; /* Pointer to token buffer.
131 Actual allocated length is maxtoken + 2. */
134 int indent_level
; /* Number of { minus number of }. */
135 int pending_lang_change
; /* If we need to switch languages - C++ only */
136 int c_header_level
; /* depth in C headers - C++ only */
138 /* Nonzero tells yylex to ignore \ in string constants. */
139 static int ignore_escape_flag
;
141 static const char *readescape
PARAMS ((const char *, const char *,
143 static const char *read_ucs
PARAMS ((const char *, const char *,
144 unsigned int *, int));
145 static void parse_float
PARAMS ((PTR
));
146 static tree lex_number
PARAMS ((const char *, unsigned int));
147 static tree lex_string
PARAMS ((const char *, unsigned int, int));
148 static tree lex_charconst
PARAMS ((const char *, unsigned int, int));
149 static void update_header_times
PARAMS ((const char *));
150 static int dump_one_header
PARAMS ((splay_tree_node
, void *));
153 static int skip_white_space
PARAMS ((int));
154 static char *extend_token_buffer
PARAMS ((const char *));
155 static void extend_token_buffer_to
PARAMS ((int));
156 static int read_line_number
PARAMS ((int *));
157 static void process_directive
PARAMS ((void));
159 static void cb_ident
PARAMS ((cpp_reader
*, const unsigned char *,
161 static void cb_enter_file
PARAMS ((cpp_reader
*));
162 static void cb_leave_file
PARAMS ((cpp_reader
*));
163 static void cb_rename_file
PARAMS ((cpp_reader
*));
164 static void cb_def_pragma
PARAMS ((cpp_reader
*));
169 init_c_lex (filename
)
170 const char *filename
;
172 struct c_fileinfo
*toplevel
;
174 /* Set up filename timing. Must happen before cpp_start_read. */
175 file_info_tree
= splay_tree_new ((splay_tree_compare_fn
)strcmp
,
177 (splay_tree_delete_value_fn
)free
);
178 toplevel
= get_fileinfo ("<top level>");
179 if (flag_detailed_statistics
)
182 body_time
= get_run_time ();
183 toplevel
->time
= body_time
;
186 #ifdef MULTIBYTE_CHARS
187 /* Change to the native locale for multibyte conversions. */
188 setlocale (LC_CTYPE
, "");
189 GET_ENVIRONMENT (literal_codeset
, "LANG");
193 /* Open input file. */
194 if (filename
== 0 || !strcmp (filename
, "-"))
200 finput
= fopen (filename
, "r");
202 pfatal_with_name (filename
);
204 #ifdef IO_BUFFER_SIZE
205 setvbuf (finput
, (char *) xmalloc (IO_BUFFER_SIZE
), _IOFBF
, IO_BUFFER_SIZE
);
207 #else /* !USE_CPPLIB */
209 parse_in
.cb
.ident
= cb_ident
;
210 parse_in
.cb
.enter_file
= cb_enter_file
;
211 parse_in
.cb
.leave_file
= cb_leave_file
;
212 parse_in
.cb
.rename_file
= cb_rename_file
;
213 parse_in
.cb
.def_pragma
= cb_def_pragma
;
215 /* Make sure parse_in.digraphs matches flag_digraphs. */
216 CPP_OPTION (&parse_in
, digraphs
) = flag_digraphs
;
218 if (! cpp_start_read (&parse_in
, 0 /* no printer */, filename
))
221 if (filename
== 0 || !strcmp (filename
, "-"))
227 token_buffer
= (char *) xmalloc (maxtoken
+ 2);
229 /* Start it at 0, because check_newline is called at the very beginning
230 and will increment it to 1. */
231 lineno
= lex_lineno
= 0;
241 struct c_fileinfo
*fi
;
243 n
= splay_tree_lookup (file_info_tree
, (splay_tree_key
) name
);
245 return (struct c_fileinfo
*) n
->value
;
247 fi
= (struct c_fileinfo
*) xmalloc (sizeof (struct c_fileinfo
));
249 fi
->interface_only
= 0;
250 fi
->interface_unknown
= 1;
251 splay_tree_insert (file_info_tree
, (splay_tree_key
) name
,
252 (splay_tree_value
) fi
);
257 update_header_times (name
)
260 /* Changing files again. This means currently collected time
261 is charged against header time, and body time starts back at 0. */
262 if (flag_detailed_statistics
)
264 int this_time
= get_run_time ();
265 struct c_fileinfo
*file
= get_fileinfo (name
);
266 header_time
+= this_time
- body_time
;
267 file
->time
+= this_time
- body_time
;
268 body_time
= this_time
;
273 dump_one_header (n
, dummy
)
275 void *dummy ATTRIBUTE_UNUSED
;
277 print_time ((const char *) n
->key
,
278 ((struct c_fileinfo
*) n
->value
)->time
);
283 dump_time_statistics ()
285 struct c_fileinfo
*file
= get_fileinfo (input_filename
);
286 int this_time
= get_run_time ();
287 file
->time
+= this_time
- body_time
;
289 fprintf (stderr
, "\n******\n");
290 print_time ("header files (total)", header_time
);
291 print_time ("main file (total)", this_time
- body_time
);
292 fprintf (stderr
, "ratio = %g : 1\n",
293 (double)header_time
/ (double)(this_time
- body_time
));
294 fprintf (stderr
, "\n******\n");
296 splay_tree_foreach (file_info_tree
, dump_one_header
, 0);
301 /* If C is not whitespace, return C.
302 Otherwise skip whitespace and return first nonwhite char read. */
312 /* There is no need to process comments, backslash-newline,
313 or \r here. None can occur in the output of cpp. */
321 c
= check_newline ();
324 /* Per C99, horizontal whitespace is just these four characters. */
333 error ("stray '\\' in program");
343 /* Skips all of the white space at the current location in the input file. */
346 position_after_white_space ()
352 put_back (skip_white_space (c
));
355 /* Make the token buffer longer, preserving the data in it.
356 P should point to just beyond the last valid character in the old buffer.
357 The value we return is a pointer to the new buffer
358 at a place corresponding to P. */
361 extend_token_buffer_to (size
)
365 maxtoken
= maxtoken
* 2 + 10;
366 while (maxtoken
< size
);
367 token_buffer
= (char *) xrealloc (token_buffer
, maxtoken
+ 2);
371 extend_token_buffer (p
)
374 int offset
= p
- token_buffer
;
375 extend_token_buffer_to (offset
);
376 return token_buffer
+ offset
;
381 read_line_number (num
)
385 enum cpp_ttype token
= c_lex (&value
);
387 if (token
== CPP_NUMBER
&& TREE_CODE (value
) == INTEGER_CST
)
389 *num
= TREE_INT_CST_LOW (value
);
394 if (token
!= CPP_EOF
)
395 error ("invalid #-line");
400 /* At the beginning of a line, increment the line number
401 and process any #-directive on this line.
402 If the line is a #-directive, read the entire line and return a newline.
403 Otherwise, return the line's first non-whitespace character. */
410 /* Loop till we get a nonblank, non-directive line. */
413 /* Read first nonwhite char on the line. */
416 while (c
== ' ' || c
== '\t');
421 process_directive ();
434 enum cpp_ttype token
;
437 enum { act_none
, act_push
, act_pop
} action
;
438 int action_number
, l
;
440 #ifndef NO_IMPLICIT_EXTERN_C
441 int entering_c_header
;
444 /* Don't read beyond this line. */
448 token
= c_lex (&value
);
450 if (token
== CPP_NAME
)
452 /* If a letter follows, then if the word here is `line', skip
453 it and ignore it; otherwise, ignore the line, with an error
454 if the word isn't `pragma'. */
456 const char *name
= IDENTIFIER_POINTER (value
);
458 if (!strcmp (name
, "pragma"))
460 #ifdef HANDLE_GENERIC_PRAGMAS
465 else if (!strcmp (name
, "define"))
467 debug_define (lex_lineno
, GET_DIRECTIVE_LINE ());
470 else if (!strcmp (name
, "undef"))
472 debug_undef (lex_lineno
, GET_DIRECTIVE_LINE ());
475 else if (!strcmp (name
, "line"))
478 token
= c_lex (&value
);
481 else if (!strcmp (name
, "ident"))
483 /* #ident. We expect a string constant here.
484 The pedantic warning and syntax error are now in cpp. */
486 token
= c_lex (&value
);
487 if (token
!= CPP_STRING
|| TREE_CODE (value
) != STRING_CST
)
490 #ifdef ASM_OUTPUT_IDENT
493 ASM_OUTPUT_IDENT (asm_out_file
, TREE_STRING_POINTER (value
));
497 /* Skip the rest of this line. */
501 error ("undefined or invalid # directive `%s'", name
);
505 /* If the # is the only nonwhite char on the line,
506 just ignore it. Check the new newline. */
507 if (token
== CPP_EOF
)
511 /* Here we have either `#line' or `# <nonletter>'.
512 In either case, it should be a line number; a digit should follow. */
514 if (token
!= CPP_NUMBER
|| TREE_CODE (value
) != INTEGER_CST
)
516 error ("invalid #-line");
520 /* subtract one, because it is the following line that
521 gets the specified number */
523 l
= TREE_INT_CST_LOW (value
) - 1;
525 /* More follows: it must be a string constant (filename).
526 It would be neat to use cpplib to quickly process the string, but
527 (1) we don't have a handy tokenization of the string, and
528 (2) I don't know how well that would work in the presense
529 of filenames that contain wide characters. */
533 /* Don't treat \ as special if we are processing #line 1 "...".
534 If you want it to be treated specially, use # 1 "...". */
535 ignore_escape_flag
= 1;
538 /* Read the string constant. */
539 token
= c_lex (&value
);
541 ignore_escape_flag
= 0;
543 if (token
== CPP_EOF
)
545 /* No more: store the line number and check following line. */
550 if (token
!= CPP_STRING
|| TREE_CODE (value
) != STRING_CST
)
552 error ("invalid #line");
556 new_file
= TREE_STRING_POINTER (value
);
558 if (main_input_filename
== 0)
559 main_input_filename
= new_file
;
564 /* Each change of file name
565 reinitializes whether we are now in a system header. */
566 in_system_header
= 0;
568 if (!read_line_number (&action_number
))
570 /* Update the name in the top element of input_file_stack. */
571 if (input_file_stack
)
572 input_file_stack
->name
= input_filename
;
575 /* `1' after file name means entering new file.
576 `2' after file name means just left a file. */
578 if (action_number
== 1)
581 read_line_number (&action_number
);
583 else if (action_number
== 2)
586 read_line_number (&action_number
);
588 if (action_number
== 3)
590 /* `3' after file name means this is a system header file. */
591 in_system_header
= 1;
592 read_line_number (&action_number
);
594 #ifndef NO_IMPLICIT_EXTERN_C
595 if (action_number
== 4)
597 /* `4' after file name means this is a C header file. */
598 entering_c_header
= 1;
599 read_line_number (&action_number
);
603 /* Do the actions implied by the preceding numbers. */
604 if (action
== act_push
)
607 push_srcloc (input_filename
, 1);
608 input_file_stack
->indent_level
= indent_level
;
609 debug_start_source_file (input_filename
);
610 #ifndef NO_IMPLICIT_EXTERN_C
613 else if (entering_c_header
)
616 ++pending_lang_change
;
620 else if (action
== act_pop
)
622 /* Popping out of a file. */
623 if (input_file_stack
->next
)
625 #ifndef NO_IMPLICIT_EXTERN_C
626 if (c_header_level
&& --c_header_level
== 0)
628 if (entering_c_header
)
629 warning ("badly nested C headers from preprocessor");
630 --pending_lang_change
;
634 if (indent_level
!= input_file_stack
->indent_level
)
636 warning_with_file_and_line
637 (input_filename
, lex_lineno
,
638 "This file contains more '%c's than '%c's.",
639 indent_level
> input_file_stack
->indent_level
? '{' : '}',
640 indent_level
> input_file_stack
->indent_level
? '}' : '{');
644 debug_end_source_file (input_file_stack
->line
);
647 error ("#-lines for entering and leaving files don't match");
650 update_header_times (new_file
);
652 input_filename
= new_file
;
656 extract_interface_info ();
658 /* skip the rest of this line. */
662 while (getch () != '\n');
664 #else /* USE_CPPLIB */
666 /* Not yet handled: #pragma, #define, #undef.
667 No need to deal with linemarkers under normal conditions. */
670 cb_ident (pfile
, str
, len
)
671 cpp_reader
*pfile ATTRIBUTE_UNUSED
;
672 const unsigned char *str
;
675 #ifdef ASM_OUTPUT_IDENT
678 /* Convert escapes in the string. */
679 tree value
= lex_string ((const char *)str
, len
, 0);
680 ASM_OUTPUT_IDENT (asm_out_file
, TREE_STRING_POINTER (value
));
686 cb_enter_file (pfile
)
689 cpp_buffer
*ip
= CPP_BUFFER (pfile
);
690 /* Bleah, need a better interface to this. */
691 const char *flags
= cpp_syshdr_flags (pfile
, ip
);
693 /* Mustn't stack the main buffer on the input stack. (Ick.) */
696 lex_lineno
= lineno
= ip
->prev
->lineno
- 1;
697 push_srcloc (ggc_alloc_string (ip
->nominal_fname
, -1), 1);
698 input_file_stack
->indent_level
= indent_level
;
699 debug_start_source_file (ip
->nominal_fname
);
704 update_header_times (ip
->nominal_fname
);
707 extract_interface_info ();
709 in_system_header
= (flags
[0] != 0);
710 #ifndef NO_IMPLICIT_EXTERN_C
713 else if (flags
[2] != 0)
716 ++pending_lang_change
;
722 cb_leave_file (pfile
)
725 /* Bleah, need a better interface to this. */
726 const char *flags
= cpp_syshdr_flags (pfile
, CPP_BUFFER (pfile
));
728 if (indent_level
!= input_file_stack
->indent_level
)
730 warning_with_file_and_line
731 (input_filename
, lex_lineno
,
732 "This file contains more '%c's than '%c's.",
733 indent_level
> input_file_stack
->indent_level
? '{' : '}',
734 indent_level
> input_file_stack
->indent_level
? '}' : '{');
737 /* We get called for the main buffer, but we mustn't pop it. */
738 if (input_file_stack
->next
)
740 in_system_header
= (flags
[0] != 0);
741 #ifndef NO_IMPLICIT_EXTERN_C
742 if (c_header_level
&& --c_header_level
== 0)
745 warning ("badly nested C headers from preprocessor");
746 --pending_lang_change
;
749 lex_lineno
= CPP_BUFFER (pfile
)->lineno
;
750 debug_end_source_file (input_file_stack
->line
);
752 update_header_times (input_file_stack
->name
);
754 extract_interface_info ();
758 cb_rename_file (pfile
)
761 cpp_buffer
*ip
= CPP_BUFFER (pfile
);
762 /* Bleah, need a better interface to this. */
763 const char *flags
= cpp_syshdr_flags (pfile
, ip
);
764 input_filename
= ggc_alloc_string (ip
->nominal_fname
, -1);
765 lex_lineno
= ip
->lineno
;
766 in_system_header
= (flags
[0] != 0);
768 update_header_times (ip
->nominal_fname
);
770 extract_interface_info ();
774 cb_def_pragma (pfile
)
777 /* Issue a warning message if we have been asked to do so. Ignore
778 unknown pragmas in system headers unless an explicit
779 -Wunknown-pragmas has been given. */
780 if (warn_unknown_pragmas
> in_system_header
)
782 const unsigned char *space
, *name
;
783 const cpp_token
*t
= pfile
->first_directive_token
+ 2;
785 space
= t
[0].val
.node
->name
;
786 name
= t
[1].type
== CPP_NAME
? t
[1].val
.node
->name
: 0;
788 warning ("ignoring #pragma %s %s", space
, name
);
790 warning ("ignoring #pragma %s", space
);
793 #endif /* USE_CPPLIB */
795 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
797 [lex.charset]: The character designated by the universal-character-name
798 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
799 is NNNNNNNN; the character designated by the universal-character-name
800 \uNNNN is that character whose character short name in ISO/IEC 10646 is
801 0000NNNN. If the hexadecimal value for a universal character name is
802 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
803 universal character name designates a character in the basic source
804 character set, then the program is ill-formed.
806 We assume that wchar_t is Unicode, so we don't need to do any
807 mapping. Is this ever wrong? */
810 read_ucs (p
, limit
, cptr
, length
)
816 unsigned int code
= 0;
819 for (; length
; --length
)
823 error ("incomplete universal-character-name");
830 error ("non hex digit '%c' in universal-character-name", c
);
836 if (c
>= 'a' && c
<= 'f')
837 code
+= c
- 'a' + 10;
838 if (c
>= 'A' && c
<= 'F')
839 code
+= c
- 'A' + 10;
840 if (c
>= '0' && c
<= '9')
845 sorry ("universal-character-name on EBCDIC target");
846 *cptr
= 0x3f; /* EBCDIC invalid character */
850 if (code
> 0x9f && !(code
& 0x80000000))
851 /* True extended character, OK. */;
852 else if (code
>= 0x20 && code
< 0x7f)
854 /* ASCII printable character. The C character set consists of all of
855 these except $, @ and `. We use hex escapes so that this also
856 works with EBCDIC hosts. */
857 if (code
!= 0x24 && code
!= 0x40 && code
!= 0x60)
858 error ("universal-character-name used for '%c'", code
);
861 error ("invalid universal-character-name");
867 /* Read an escape sequence and write its character equivalent into *CPTR.
868 P is the input pointer, which is just after the backslash. LIMIT
869 is how much text we have.
870 Returns the updated input pointer. */
873 readescape (p
, limit
, cptr
)
878 unsigned int c
, code
, count
;
879 unsigned firstdig
= 0;
884 /* cpp has already issued an error for this. */
894 if (warn_traditional
&& !in_system_header
)
895 warning ("the meaning of `\\x' varies with -traditional");
897 if (flag_traditional
)
915 if (c
>= 'a' && c
<= 'f')
916 code
+= c
- 'a' + 10;
917 if (c
>= 'A' && c
<= 'F')
918 code
+= c
- 'A' + 10;
919 if (c
>= '0' && c
<= '9')
921 if (code
!= 0 || count
!= 0)
931 warning ("\\x used with no following hex digits");
936 /* Digits are all 0's. Ok. */
938 else if ((count
- 1) * 4 >= TYPE_PRECISION (integer_type_node
)
941 << (TYPE_PRECISION (integer_type_node
)
944 pedwarn ("hex escape out of range");
948 case '0': case '1': case '2': case '3': case '4':
949 case '5': case '6': case '7':
951 for (count
= 0; count
< 3; count
++)
953 if (c
< '0' || c
> '7')
958 code
= (code
* 8) + (c
- '0');
970 case '\\': case '\'': case '"': case '?':
974 case 'n': *cptr
= TARGET_NEWLINE
; return p
;
975 case 't': *cptr
= TARGET_TAB
; return p
;
976 case 'r': *cptr
= TARGET_CR
; return p
;
977 case 'f': *cptr
= TARGET_FF
; return p
;
978 case 'b': *cptr
= TARGET_BS
; return p
;
979 case 'v': *cptr
= TARGET_VT
; return p
;
981 if (warn_traditional
&& !in_system_header
)
982 warning ("the meaning of '\\a' varies with -traditional");
983 *cptr
= flag_traditional
? c
: TARGET_BELL
;
986 /* Warnings and support checks handled by read_ucs(). */
988 if (c_language
!= clk_cplusplus
&& !flag_isoc99
)
991 if (warn_traditional
&& !in_system_header
)
992 warning ("the meaning of '\\%c' varies with -traditional", c
);
994 return read_ucs (p
, limit
, cptr
, c
== 'u' ? 4 : 8);
998 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c
);
999 *cptr
= TARGET_ESC
; return p
;
1001 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1002 '\%' is used to prevent SCCS from getting confused. */
1003 case '(': case '{': case '[': case '%':
1005 pedwarn ("unknown escape sequence '\\%c'", c
);
1011 pedwarn ("unknown escape sequence '\\%c'", c
);
1013 pedwarn ("unknown escape sequence: '\\' followed by char 0x%.2x", c
);
1020 /* Returns nonzero if C is a universal-character-name. Give an error if it
1021 is not one which may appear in an identifier, as per [extendid].
1023 Note that extended character support in identifiers has not yet been
1024 implemented. It is my personal opinion that this is not a desirable
1025 feature. Portable code cannot count on support for more than the basic
1026 identifier character set. */
1029 is_extended_char (c
)
1032 #ifdef TARGET_EBCDIC
1039 /* None of the valid chars are outside the Basic Multilingual Plane (the
1043 error ("universal-character-name '\\U%08x' not valid in identifier", c
);
1048 if ((c
>= 0x00c0 && c
<= 0x00d6)
1049 || (c
>= 0x00d8 && c
<= 0x00f6)
1050 || (c
>= 0x00f8 && c
<= 0x01f5)
1051 || (c
>= 0x01fa && c
<= 0x0217)
1052 || (c
>= 0x0250 && c
<= 0x02a8)
1053 || (c
>= 0x1e00 && c
<= 0x1e9a)
1054 || (c
>= 0x1ea0 && c
<= 0x1ef9))
1059 || (c
>= 0x0388 && c
<= 0x038a)
1061 || (c
>= 0x038e && c
<= 0x03a1)
1062 || (c
>= 0x03a3 && c
<= 0x03ce)
1063 || (c
>= 0x03d0 && c
<= 0x03d6)
1068 || (c
>= 0x03e2 && c
<= 0x03f3)
1069 || (c
>= 0x1f00 && c
<= 0x1f15)
1070 || (c
>= 0x1f18 && c
<= 0x1f1d)
1071 || (c
>= 0x1f20 && c
<= 0x1f45)
1072 || (c
>= 0x1f48 && c
<= 0x1f4d)
1073 || (c
>= 0x1f50 && c
<= 0x1f57)
1077 || (c
>= 0x1f5f && c
<= 0x1f7d)
1078 || (c
>= 0x1f80 && c
<= 0x1fb4)
1079 || (c
>= 0x1fb6 && c
<= 0x1fbc)
1080 || (c
>= 0x1fc2 && c
<= 0x1fc4)
1081 || (c
>= 0x1fc6 && c
<= 0x1fcc)
1082 || (c
>= 0x1fd0 && c
<= 0x1fd3)
1083 || (c
>= 0x1fd6 && c
<= 0x1fdb)
1084 || (c
>= 0x1fe0 && c
<= 0x1fec)
1085 || (c
>= 0x1ff2 && c
<= 0x1ff4)
1086 || (c
>= 0x1ff6 && c
<= 0x1ffc))
1090 if ((c
>= 0x0401 && c
<= 0x040d)
1091 || (c
>= 0x040f && c
<= 0x044f)
1092 || (c
>= 0x0451 && c
<= 0x045c)
1093 || (c
>= 0x045e && c
<= 0x0481)
1094 || (c
>= 0x0490 && c
<= 0x04c4)
1095 || (c
>= 0x04c7 && c
<= 0x04c8)
1096 || (c
>= 0x04cb && c
<= 0x04cc)
1097 || (c
>= 0x04d0 && c
<= 0x04eb)
1098 || (c
>= 0x04ee && c
<= 0x04f5)
1099 || (c
>= 0x04f8 && c
<= 0x04f9))
1103 if ((c
>= 0x0531 && c
<= 0x0556)
1104 || (c
>= 0x0561 && c
<= 0x0587))
1108 if ((c
>= 0x05d0 && c
<= 0x05ea)
1109 || (c
>= 0x05f0 && c
<= 0x05f4))
1113 if ((c
>= 0x0621 && c
<= 0x063a)
1114 || (c
>= 0x0640 && c
<= 0x0652)
1115 || (c
>= 0x0670 && c
<= 0x06b7)
1116 || (c
>= 0x06ba && c
<= 0x06be)
1117 || (c
>= 0x06c0 && c
<= 0x06ce)
1118 || (c
>= 0x06e5 && c
<= 0x06e7))
1122 if ((c
>= 0x0905 && c
<= 0x0939)
1123 || (c
>= 0x0958 && c
<= 0x0962))
1127 if ((c
>= 0x0985 && c
<= 0x098c)
1128 || (c
>= 0x098f && c
<= 0x0990)
1129 || (c
>= 0x0993 && c
<= 0x09a8)
1130 || (c
>= 0x09aa && c
<= 0x09b0)
1132 || (c
>= 0x09b6 && c
<= 0x09b9)
1133 || (c
>= 0x09dc && c
<= 0x09dd)
1134 || (c
>= 0x09df && c
<= 0x09e1)
1135 || (c
>= 0x09f0 && c
<= 0x09f1))
1139 if ((c
>= 0x0a05 && c
<= 0x0a0a)
1140 || (c
>= 0x0a0f && c
<= 0x0a10)
1141 || (c
>= 0x0a13 && c
<= 0x0a28)
1142 || (c
>= 0x0a2a && c
<= 0x0a30)
1143 || (c
>= 0x0a32 && c
<= 0x0a33)
1144 || (c
>= 0x0a35 && c
<= 0x0a36)
1145 || (c
>= 0x0a38 && c
<= 0x0a39)
1146 || (c
>= 0x0a59 && c
<= 0x0a5c)
1151 if ((c
>= 0x0a85 && c
<= 0x0a8b)
1153 || (c
>= 0x0a8f && c
<= 0x0a91)
1154 || (c
>= 0x0a93 && c
<= 0x0aa8)
1155 || (c
>= 0x0aaa && c
<= 0x0ab0)
1156 || (c
>= 0x0ab2 && c
<= 0x0ab3)
1157 || (c
>= 0x0ab5 && c
<= 0x0ab9)
1162 if ((c
>= 0x0b05 && c
<= 0x0b0c)
1163 || (c
>= 0x0b0f && c
<= 0x0b10)
1164 || (c
>= 0x0b13 && c
<= 0x0b28)
1165 || (c
>= 0x0b2a && c
<= 0x0b30)
1166 || (c
>= 0x0b32 && c
<= 0x0b33)
1167 || (c
>= 0x0b36 && c
<= 0x0b39)
1168 || (c
>= 0x0b5c && c
<= 0x0b5d)
1169 || (c
>= 0x0b5f && c
<= 0x0b61))
1173 if ((c
>= 0x0b85 && c
<= 0x0b8a)
1174 || (c
>= 0x0b8e && c
<= 0x0b90)
1175 || (c
>= 0x0b92 && c
<= 0x0b95)
1176 || (c
>= 0x0b99 && c
<= 0x0b9a)
1178 || (c
>= 0x0b9e && c
<= 0x0b9f)
1179 || (c
>= 0x0ba3 && c
<= 0x0ba4)
1180 || (c
>= 0x0ba8 && c
<= 0x0baa)
1181 || (c
>= 0x0bae && c
<= 0x0bb5)
1182 || (c
>= 0x0bb7 && c
<= 0x0bb9))
1186 if ((c
>= 0x0c05 && c
<= 0x0c0c)
1187 || (c
>= 0x0c0e && c
<= 0x0c10)
1188 || (c
>= 0x0c12 && c
<= 0x0c28)
1189 || (c
>= 0x0c2a && c
<= 0x0c33)
1190 || (c
>= 0x0c35 && c
<= 0x0c39)
1191 || (c
>= 0x0c60 && c
<= 0x0c61))
1195 if ((c
>= 0x0c85 && c
<= 0x0c8c)
1196 || (c
>= 0x0c8e && c
<= 0x0c90)
1197 || (c
>= 0x0c92 && c
<= 0x0ca8)
1198 || (c
>= 0x0caa && c
<= 0x0cb3)
1199 || (c
>= 0x0cb5 && c
<= 0x0cb9)
1200 || (c
>= 0x0ce0 && c
<= 0x0ce1))
1204 if ((c
>= 0x0d05 && c
<= 0x0d0c)
1205 || (c
>= 0x0d0e && c
<= 0x0d10)
1206 || (c
>= 0x0d12 && c
<= 0x0d28)
1207 || (c
>= 0x0d2a && c
<= 0x0d39)
1208 || (c
>= 0x0d60 && c
<= 0x0d61))
1212 if ((c
>= 0x0e01 && c
<= 0x0e30)
1213 || (c
>= 0x0e32 && c
<= 0x0e33)
1214 || (c
>= 0x0e40 && c
<= 0x0e46)
1215 || (c
>= 0x0e4f && c
<= 0x0e5b))
1219 if ((c
>= 0x0e81 && c
<= 0x0e82)
1225 || (c
>= 0x0e94 && c
<= 0x0e97)
1226 || (c
>= 0x0e99 && c
<= 0x0e9f)
1227 || (c
>= 0x0ea1 && c
<= 0x0ea3)
1232 || (c
>= 0x0ead && c
<= 0x0eb0)
1236 || (c
>= 0x0ec0 && c
<= 0x0ec4)
1241 if ((c
>= 0x10a0 && c
<= 0x10c5)
1242 || (c
>= 0x10d0 && c
<= 0x10f6))
1246 if ((c
>= 0x3041 && c
<= 0x3094)
1247 || (c
>= 0x309b && c
<= 0x309e))
1251 if ((c
>= 0x30a1 && c
<= 0x30fe))
1255 if ((c
>= 0x3105 && c
<= 0x312c))
1259 if ((c
>= 0x1100 && c
<= 0x1159)
1260 || (c
>= 0x1161 && c
<= 0x11a2)
1261 || (c
>= 0x11a8 && c
<= 0x11f9))
1264 /* CJK Unified Ideographs */
1265 if ((c
>= 0xf900 && c
<= 0xfa2d)
1266 || (c
>= 0xfb1f && c
<= 0xfb36)
1267 || (c
>= 0xfb38 && c
<= 0xfb3c)
1269 || (c
>= 0xfb40 && c
<= 0xfb41)
1270 || (c
>= 0xfb42 && c
<= 0xfb44)
1271 || (c
>= 0xfb46 && c
<= 0xfbb1)
1272 || (c
>= 0xfbd3 && c
<= 0xfd3f)
1273 || (c
>= 0xfd50 && c
<= 0xfd8f)
1274 || (c
>= 0xfd92 && c
<= 0xfdc7)
1275 || (c
>= 0xfdf0 && c
<= 0xfdfb)
1276 || (c
>= 0xfe70 && c
<= 0xfe72)
1278 || (c
>= 0xfe76 && c
<= 0xfefc)
1279 || (c
>= 0xff21 && c
<= 0xff3a)
1280 || (c
>= 0xff41 && c
<= 0xff5a)
1281 || (c
>= 0xff66 && c
<= 0xffbe)
1282 || (c
>= 0xffc2 && c
<= 0xffc7)
1283 || (c
>= 0xffca && c
<= 0xffcf)
1284 || (c
>= 0xffd2 && c
<= 0xffd7)
1285 || (c
>= 0xffda && c
<= 0xffdc)
1286 || (c
>= 0x4e00 && c
<= 0x9fa5))
1289 error ("universal-character-name '\\u%04x' not valid in identifier", c
);
1294 /* Add the UTF-8 representation of C to the token_buffer. */
1297 utf8_extend_token (c
)
1302 if (c
<= 0x0000007f)
1307 else if (c
<= 0x000007ff)
1308 shift
= 6, mask
= 0xc0;
1309 else if (c
<= 0x0000ffff)
1310 shift
= 12, mask
= 0xe0;
1311 else if (c
<= 0x001fffff)
1312 shift
= 18, mask
= 0xf0;
1313 else if (c
<= 0x03ffffff)
1314 shift
= 24, mask
= 0xf8;
1316 shift
= 30, mask
= 0xfc;
1318 extend_token (mask
| (c
>> shift
));
1322 extend_token ((unsigned char) (0x80 | (c
>> shift
)));
1334 char long_long_flag
;
1337 struct try_type type_sequence
[] =
1339 { &integer_type_node
, 0, 0, 0},
1340 { &unsigned_type_node
, 1, 0, 0},
1341 { &long_integer_type_node
, 0, 1, 0},
1342 { &long_unsigned_type_node
, 1, 1, 0},
1343 { &long_long_integer_type_node
, 0, 1, 1},
1344 { &long_long_unsigned_type_node
, 1, 1, 1}
1356 int conversion_errno
;
1357 REAL_VALUE_TYPE value
;
1365 struct pf_args
* args
= (struct pf_args
*) data
;
1366 const char *typename
;
1368 args
->conversion_errno
= 0;
1369 args
->type
= double_type_node
;
1370 typename
= "double";
1372 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1373 tells the desired precision of the binary result
1374 of decimal-to-binary conversion. */
1379 error ("both 'f' and 'l' suffixes on floating constant");
1381 args
->type
= float_type_node
;
1384 else if (args
->lflag
)
1386 args
->type
= long_double_type_node
;
1387 typename
= "long double";
1389 else if (flag_single_precision_constant
)
1391 args
->type
= float_type_node
;
1396 if (args
->base
== 16)
1397 args
->value
= REAL_VALUE_HTOF (args
->str
, TYPE_MODE (args
->type
));
1399 args
->value
= REAL_VALUE_ATOF (args
->str
, TYPE_MODE (args
->type
));
1401 args
->conversion_errno
= errno
;
1402 /* A diagnostic is required here by some ISO C testsuites.
1403 This is not pedwarn, because some people don't want
1404 an error for this. */
1405 if (REAL_VALUE_ISINF (args
->value
) && pedantic
)
1406 warning ("floating point number exceeds range of '%s'", typename
);
1414 const cpp_token
*tok
;
1415 enum cpp_ttype type
;
1418 timevar_push (TV_CPP
);
1419 tok
= cpp_get_token (&parse_in
);
1420 timevar_pop (TV_CPP
);
1422 /* The C++ front end does horrible things with the current line
1423 number. To ensure an accurate line number, we must reset it
1424 every time we return a token. If we reset it from tok->line
1425 every time, we'll get line numbers inside macros referring to the
1426 macro definition; this is nice, but we don't want to change the
1427 behavior until integrated mode is the only option. So we keep our
1428 own idea of the line number, and reset it from tok->line at each
1429 new line (which never happens inside a macro). */
1430 if (tok
->flags
& BOL
)
1431 lex_lineno
= tok
->line
;
1434 lineno
= lex_lineno
;
1438 case CPP_OPEN_BRACE
: indent_level
++; break;
1439 case CPP_CLOSE_BRACE
: indent_level
--; break;
1441 /* Issue this error here, where we can get at tok->val.aux. */
1443 if (ISGRAPH (tok
->val
.aux
))
1444 error ("stray '%c' in program", tok
->val
.aux
);
1446 error ("stray '\\%#o' in program", tok
->val
.aux
);
1452 *value
= get_identifier ((const char *)tok
->val
.node
->name
);
1458 *value
= lex_number ((const char *)tok
->val
.str
.text
, tok
->val
.str
.len
);
1463 *value
= lex_charconst ((const char *)tok
->val
.str
.text
,
1464 tok
->val
.str
.len
, tok
->type
== CPP_WCHAR
);
1470 *value
= lex_string ((const char *)tok
->val
.str
.text
,
1471 tok
->val
.str
.len
, tok
->type
== CPP_WSTRING
);
1474 /* These tokens should not be visible outside cpplib. */
1475 case CPP_HEADER_NAME
:
1478 case CPP_PLACEMARKER
:
1498 /* Effectively do c = skip_white_space (c)
1499 but do it faster in the usual cases. */
1511 c
= skip_white_space (c
);
1513 goto found_nonwhite
;
1517 lineno
= lex_lineno
;
1525 /* Capital L may start a wide-string or wide-character constant. */
1527 register int c1
= getch();
1536 goto string_constant
;
1543 if (!doing_objc_thang
)
1547 /* '@' may start a constant string object. */
1548 register int c1
= getch ();
1552 goto string_constant
;
1555 /* Fall through to treat '@' as the start of an identifier. */
1558 case 'A': case 'B': case 'C': case 'D': case 'E':
1559 case 'F': case 'G': case 'H': case 'I': case 'J':
1560 case 'K': case 'M': case 'N': case 'O':
1561 case 'P': case 'Q': case 'R': case 'S': case 'T':
1562 case 'U': case 'V': case 'W': case 'X': case 'Y':
1564 case 'a': case 'b': case 'c': case 'd': case 'e':
1565 case 'f': case 'g': case 'h': case 'i': case 'j':
1566 case 'k': case 'l': case 'm': case 'n': case 'o':
1567 case 'p': case 'q': case 'r': case 's': case 't':
1568 case 'u': case 'v': case 'w': case 'x': case 'y':
1574 while (ISALNUM (c
) || c
== '_' || c
== '$' || c
== '@')
1576 /* Make sure this char really belongs in an identifier. */
1579 if (! dollars_in_ident
)
1580 error ("'$' in identifier");
1582 pedwarn ("'$' in identifier");
1585 if (p
>= token_buffer
+ maxtoken
)
1586 p
= extend_token_buffer (p
);
1594 if (p
>= token_buffer
+ maxtoken
)
1595 p
= extend_token_buffer (p
);
1598 *value
= get_identifier (token_buffer
);
1603 /* It's hard to preserve tokenization on '.' because
1604 it could be a symbol by itself, or it could be the
1605 start of a floating point number and cpp won't tell us. */
1611 return CPP_ELLIPSIS
;
1614 error ("parse error at '..'");
1616 else if (c1
== '*' && c_language
== clk_cplusplus
)
1617 return CPP_DOT_STAR
;
1625 case '0': case '1': case '2': case '3': case '4':
1626 case '5': case '6': case '7': case '8': case '9':
1629 /* Scan the next preprocessing number. All C numeric constants
1630 are preprocessing numbers, but not all preprocessing numbers
1631 are valid numeric constants. Preprocessing numbers fit the
1632 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1633 See C99 section 6.4.8. */
1636 if (p
>= token_buffer
+ maxtoken
)
1637 p
= extend_token_buffer (p
);
1642 if (c
== '+' || c
== '-')
1645 if (d
== 'e' || d
== 'E' || d
== 'p' || d
== 'P')
1648 if (ISALNUM (c
) || c
== '_' || c
== '.')
1654 *value
= lex_number (token_buffer
, p
- token_buffer
);
1664 int delimiter
= charconst
? '\'' : '"';
1665 #ifdef MULTIBYTE_CHARS
1666 int longest_char
= local_mb_cur_max ();
1667 (void) local_mbtowc (NULL_PTR
, NULL_PTR
, 0);
1670 p
= token_buffer
+ 1;
1672 while (c
!= delimiter
&& c
!= EOF
)
1674 if (p
+ 2 > token_buffer
+ maxtoken
)
1675 p
= extend_token_buffer (p
);
1677 /* ignore_escape_flag is set for reading the filename in #line. */
1678 if (!ignore_escape_flag
&& c
== '\\')
1681 *p
++ = getch (); /* escaped character */
1687 #ifdef MULTIBYTE_CHARS
1690 for (i
= 0; i
< longest_char
; ++i
)
1692 if (p
+ i
>= token_buffer
+ maxtoken
)
1693 p
= extend_token_buffer (p
);
1696 char_len
= local_mblen (p
, i
+ 1);
1703 /* Replace all except the first byte. */
1705 for (--i
; i
> 0; --i
)
1709 /* mbtowc sometimes needs an extra char before accepting */
1710 else if (char_len
<= i
)
1724 *value
= lex_charconst (token_buffer
+ 1, p
- (token_buffer
+ 1),
1726 return wide_flag
? CPP_WCHAR
: CPP_CHAR
;
1730 *value
= lex_string (token_buffer
+ 1, p
- (token_buffer
+ 1),
1732 return wide_flag
? CPP_WSTRING
: objc_flag
? CPP_OSTRING
: CPP_STRING
;
1750 enum cpp_ttype type
= CPP_EOF
;
1754 case '+': type
= CPP_PLUS
; break;
1755 case '-': type
= CPP_MINUS
; break;
1756 case '&': type
= CPP_AND
; break;
1757 case '|': type
= CPP_OR
; break;
1758 case ':': type
= CPP_COLON
; break;
1759 case '<': type
= CPP_LESS
; break;
1760 case '>': type
= CPP_GREATER
; break;
1761 case '*': type
= CPP_MULT
; break;
1762 case '/': type
= CPP_DIV
; break;
1763 case '%': type
= CPP_MOD
; break;
1764 case '^': type
= CPP_XOR
; break;
1765 case '!': type
= CPP_NOT
; break;
1766 case '=': type
= CPP_EQ
; break;
1771 if (c1
== '=' && type
< CPP_LAST_EQ
)
1772 return type
+ (CPP_EQ_EQ
- CPP_EQ
);
1776 case '+': return CPP_PLUS_PLUS
;
1777 case '-': return CPP_MINUS_MINUS
;
1778 case '&': return CPP_AND_AND
;
1779 case '|': return CPP_OR_OR
;
1781 if (c_language
== clk_cplusplus
)
1785 case '<': type
= CPP_LSHIFT
; goto do_triad
;
1786 case '>': type
= CPP_RSHIFT
; goto do_triad
;
1794 if (c_language
== clk_cplusplus
)
1798 return CPP_DEREF_STAR
;
1806 if (c1
== '?' && c_language
== clk_cplusplus
)
1807 { type
= CPP_MAX
; goto do_triad
; }
1811 if (c1
== ':' && flag_digraphs
)
1812 return CPP_OPEN_SQUARE
;
1813 if (c1
== '%' && flag_digraphs
)
1814 { indent_level
++; return CPP_OPEN_BRACE
; }
1815 if (c1
== '?' && c_language
== clk_cplusplus
)
1816 { type
= CPP_MIN
; goto do_triad
; }
1820 if (c1
== '>' && flag_digraphs
)
1821 return CPP_CLOSE_SQUARE
;
1824 if (c1
== '>' && flag_digraphs
)
1825 { indent_level
--; return CPP_CLOSE_BRACE
; }
1835 type
+= (CPP_EQ_EQ
- CPP_EQ
);
1841 case '~': return CPP_COMPL
;
1842 case '?': return CPP_QUERY
;
1843 case ',': return CPP_COMMA
;
1844 case '(': return CPP_OPEN_PAREN
;
1845 case ')': return CPP_CLOSE_PAREN
;
1846 case '[': return CPP_OPEN_SQUARE
;
1847 case ']': return CPP_CLOSE_SQUARE
;
1848 case '{': indent_level
++; return CPP_OPEN_BRACE
;
1849 case '}': indent_level
--; return CPP_CLOSE_BRACE
;
1850 case ';': return CPP_SEMICOLON
;
1855 error ("stray '%c' in program", c
);
1857 error ("stray '\\%#o' in program", c
);
1865 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1868 lex_number (str
, len
)
1874 int largest_digit
= 0;
1880 enum anon1
{ NOT_FLOAT
= 0, AFTER_POINT
, AFTER_EXPON
} floatflag
= NOT_FLOAT
;
1882 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1883 The code below which fills the parts array assumes that a host
1884 int is at least twice as wide as a host char, and that
1885 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1886 Two HOST_WIDE_INTs is the largest int literal we can store.
1887 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1888 must be exactly the number of parts needed to hold the bits
1889 of two HOST_WIDE_INTs. */
1890 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1891 unsigned int parts
[TOTAL_PARTS
];
1893 /* Optimize for most frequent case. */
1897 return integer_zero_node
;
1898 else if (*str
== '1')
1899 return integer_one_node
;
1901 return build_int_2 (*str
- '0', 0);
1904 for (count
= 0; count
< TOTAL_PARTS
; count
++)
1907 /* len is known to be >1 at this point. */
1910 if (len
> 2 && str
[0] == '0' && (str
[1] == 'x' || str
[1] == 'X'))
1915 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1916 else if (str
[0] == '0' && ISDIGIT (str
[1]))
1928 if (base
== 16 && pedantic
&& !flag_isoc99
)
1929 pedwarn ("floating constant may not be in radix 16");
1930 else if (floatflag
== AFTER_POINT
)
1931 ERROR ("too many decimal points in floating constant");
1932 else if (floatflag
== AFTER_EXPON
)
1933 ERROR ("decimal point in exponent - impossible!");
1935 floatflag
= AFTER_POINT
;
1941 /* Possible future extension: silently ignore _ in numbers,
1942 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1943 but somewhat easier to read. Ada has this? */
1944 ERROR ("underscore in number");
1948 /* It is not a decimal point.
1949 It should be a digit (perhaps a hex digit). */
1955 else if (base
<= 10 && (c
== 'e' || c
== 'E'))
1958 floatflag
= AFTER_EXPON
;
1961 else if (base
== 16 && (c
== 'p' || c
== 'P'))
1963 floatflag
= AFTER_EXPON
;
1964 break; /* start of exponent */
1966 else if (base
== 16 && c
>= 'a' && c
<= 'f')
1970 else if (base
== 16 && c
>= 'A' && c
<= 'F')
1977 break; /* start of suffix */
1980 if (n
>= largest_digit
)
1984 for (count
= 0; count
< TOTAL_PARTS
; count
++)
1986 parts
[count
] *= base
;
1990 += (parts
[count
-1] >> HOST_BITS_PER_CHAR
);
1992 &= (1 << HOST_BITS_PER_CHAR
) - 1;
1998 /* If the highest-order part overflows (gets larger than
1999 a host char will hold) then the whole number has
2000 overflowed. Record this and truncate the highest-order
2002 if (parts
[TOTAL_PARTS
- 1] >> HOST_BITS_PER_CHAR
)
2005 parts
[TOTAL_PARTS
- 1] &= (1 << HOST_BITS_PER_CHAR
) - 1;
2009 while (p
< str
+ len
);
2011 /* This can happen on input like `int i = 0x;' */
2013 ERROR ("numeric constant with no digits");
2015 if (largest_digit
>= base
)
2016 ERROR ("numeric constant contains digits beyond the radix");
2018 if (floatflag
!= NOT_FLOAT
)
2021 int imag
, fflag
, lflag
, conversion_errno
;
2022 REAL_VALUE_TYPE real
;
2023 struct pf_args args
;
2026 if (base
== 16 && floatflag
!= AFTER_EXPON
)
2027 ERROR ("hexadecimal floating constant has no exponent");
2029 /* Read explicit exponent if any, and put it in tokenbuf. */
2030 if ((base
== 10 && ((c
== 'e') || (c
== 'E')))
2031 || (base
== 16 && (c
== 'p' || c
== 'P')))
2035 if (p
< str
+ len
&& (c
== '+' || c
== '-'))
2037 /* Exponent is decimal, even if string is a hex float. */
2039 ERROR ("floating constant exponent has no digits");
2040 while (p
< str
+ len
&& ISDIGIT (c
))
2046 /* Copy the float constant now; we don't want any suffixes in the
2047 string passed to parse_float. */
2048 copy
= alloca (p
- str
+ 1);
2049 memcpy (copy
, str
, p
- str
);
2050 copy
[p
- str
] = '\0';
2052 /* Now parse suffixes. */
2053 fflag
= lflag
= imag
= 0;
2054 while (p
< str
+ len
)
2059 ERROR ("more than one 'f' suffix on floating constant");
2060 else if (warn_traditional
&& !in_system_header
)
2061 warning ("traditional C rejects the 'f' suffix");
2068 ERROR ("more than one 'l' suffix on floating constant");
2069 else if (warn_traditional
&& !in_system_header
)
2070 warning ("traditional C rejects the 'l' suffix");
2078 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2080 pedwarn ("ISO C forbids imaginary numeric constants");
2085 ERROR ("invalid suffix on floating constant");
2088 /* Setup input for parse_float() */
2094 /* Convert string to a double, checking for overflow. */
2095 if (do_float_handler (parse_float
, (PTR
) &args
))
2097 /* Receive output from parse_float() */
2101 /* We got an exception from parse_float() */
2102 ERROR ("floating constant out of range");
2104 /* Receive output from parse_float() */
2105 conversion_errno
= args
.conversion_errno
;
2109 /* ERANGE is also reported for underflow,
2110 so test the value to distinguish overflow from that. */
2111 if (conversion_errno
== ERANGE
&& !flag_traditional
&& pedantic
2112 && (REAL_VALUES_LESS (dconst1
, real
)
2113 || REAL_VALUES_LESS (real
, dconstm1
)))
2114 warning ("floating point number exceeds range of 'double'");
2117 /* Create a node with determined type and value. */
2119 value
= build_complex (NULL_TREE
, convert (type
, integer_zero_node
),
2120 build_real (type
, real
));
2122 value
= build_real (type
, real
);
2126 tree trad_type
, ansi_type
, type
;
2127 HOST_WIDE_INT high
, low
;
2128 int spec_unsigned
= 0;
2130 int spec_long_long
= 0;
2135 trad_type
= ansi_type
= type
= NULL_TREE
;
2136 while (p
< str
+ len
)
2143 error ("two 'u' suffixes on integer constant");
2144 else if (warn_traditional
&& !in_system_header
)
2145 warning ("traditional C rejects the 'u' suffix");
2156 error ("three 'l' suffixes on integer constant");
2158 error ("'lul' is not a valid integer suffix");
2159 else if (c
!= spec_long
)
2160 error ("'Ll' and 'lL' are not valid integer suffixes");
2161 else if (pedantic
&& ! flag_isoc99
2162 && ! in_system_header
&& warn_long_long
)
2163 pedwarn ("ISO C89 forbids long long integer constants");
2169 case 'i': case 'I': case 'j': case 'J':
2171 error ("more than one 'i' or 'j' suffix on integer constant");
2173 pedwarn ("ISO C forbids imaginary numeric constants");
2178 ERROR ("invalid suffix on integer constant");
2182 /* If the literal overflowed, pedwarn about it now. */
2186 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT
* 2);
2189 /* This is simplified by the fact that our constant
2190 is always positive. */
2194 for (i
= 0; i
< HOST_BITS_PER_WIDE_INT
/ HOST_BITS_PER_CHAR
; i
++)
2196 high
|= ((HOST_WIDE_INT
) parts
[i
+ (HOST_BITS_PER_WIDE_INT
2197 / HOST_BITS_PER_CHAR
)]
2198 << (i
* HOST_BITS_PER_CHAR
));
2199 low
|= (HOST_WIDE_INT
) parts
[i
] << (i
* HOST_BITS_PER_CHAR
);
2202 value
= build_int_2 (low
, high
);
2203 TREE_TYPE (value
) = long_long_unsigned_type_node
;
2205 /* If warn_traditional, calculate both the ISO type and the
2206 traditional type, then see if they disagree.
2207 Otherwise, calculate only the type for the dialect in use. */
2208 if (warn_traditional
|| flag_traditional
)
2210 /* Calculate the traditional type. */
2211 /* Traditionally, any constant is signed; but if unsigned is
2212 specified explicitly, obey that. Use the smallest size
2213 with the right number of bits, except for one special
2214 case with decimal constants. */
2215 if (! spec_long
&& base
!= 10
2216 && int_fits_type_p (value
, unsigned_type_node
))
2217 trad_type
= spec_unsigned
? unsigned_type_node
: integer_type_node
;
2218 /* A decimal constant must be long if it does not fit in
2219 type int. I think this is independent of whether the
2220 constant is signed. */
2221 else if (! spec_long
&& base
== 10
2222 && int_fits_type_p (value
, integer_type_node
))
2223 trad_type
= spec_unsigned
? unsigned_type_node
: integer_type_node
;
2224 else if (! spec_long_long
)
2225 trad_type
= (spec_unsigned
2226 ? long_unsigned_type_node
2227 : long_integer_type_node
);
2228 else if (int_fits_type_p (value
,
2230 ? long_long_unsigned_type_node
2231 : long_long_integer_type_node
))
2232 trad_type
= (spec_unsigned
2233 ? long_long_unsigned_type_node
2234 : long_long_integer_type_node
);
2236 trad_type
= (spec_unsigned
2237 ? widest_unsigned_literal_type_node
2238 : widest_integer_literal_type_node
);
2240 if (warn_traditional
|| ! flag_traditional
)
2242 /* Calculate the ISO type. */
2243 if (! spec_long
&& ! spec_unsigned
2244 && int_fits_type_p (value
, integer_type_node
))
2245 ansi_type
= integer_type_node
;
2246 else if (! spec_long
&& (base
!= 10 || spec_unsigned
)
2247 && int_fits_type_p (value
, unsigned_type_node
))
2248 ansi_type
= unsigned_type_node
;
2249 else if (! spec_unsigned
&& !spec_long_long
2250 && int_fits_type_p (value
, long_integer_type_node
))
2251 ansi_type
= long_integer_type_node
;
2252 else if (! spec_long_long
2253 && int_fits_type_p (value
, long_unsigned_type_node
))
2254 ansi_type
= long_unsigned_type_node
;
2255 else if (! spec_unsigned
2256 && int_fits_type_p (value
, long_long_integer_type_node
))
2257 ansi_type
= long_long_integer_type_node
;
2258 else if (int_fits_type_p (value
, long_long_unsigned_type_node
))
2259 ansi_type
= long_long_unsigned_type_node
;
2260 else if (! spec_unsigned
2261 && int_fits_type_p (value
, widest_integer_literal_type_node
))
2262 ansi_type
= widest_integer_literal_type_node
;
2264 ansi_type
= widest_unsigned_literal_type_node
;
2267 type
= flag_traditional
? trad_type
: ansi_type
;
2269 /* We assume that constants specified in a non-decimal
2270 base are bit patterns, and that the programmer really
2271 meant what they wrote. */
2272 if (warn_traditional
&& !in_system_header
2273 && base
== 10 && trad_type
!= ansi_type
)
2275 if (TYPE_PRECISION (trad_type
) != TYPE_PRECISION (ansi_type
))
2276 warning ("width of integer constant changes with -traditional");
2277 else if (TREE_UNSIGNED (trad_type
) != TREE_UNSIGNED (ansi_type
))
2278 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2280 warning ("width of integer constant may change on other systems with -traditional");
2283 if (pedantic
&& !flag_traditional
&& !spec_long_long
&& !warn
2284 && (TYPE_PRECISION (long_integer_type_node
) < TYPE_PRECISION (type
)))
2287 pedwarn ("integer constant larger than the maximum value of an unsigned long int");
2290 if (base
== 10 && ! spec_unsigned
&& TREE_UNSIGNED (type
))
2291 warning ("decimal constant is so large that it is unsigned");
2295 if (TYPE_PRECISION (type
)
2296 <= TYPE_PRECISION (integer_type_node
))
2297 value
= build_complex (NULL_TREE
, integer_zero_node
,
2298 convert (integer_type_node
, value
));
2300 ERROR ("complex integer constant is too wide for 'complex int'");
2302 else if (flag_traditional
&& !int_fits_type_p (value
, type
))
2303 /* The traditional constant 0x80000000 is signed
2304 but doesn't fit in the range of int.
2305 This will change it to -0x80000000, which does fit. */
2307 TREE_TYPE (value
) = unsigned_type (type
);
2308 value
= convert (type
, value
);
2309 TREE_OVERFLOW (value
) = TREE_CONSTANT_OVERFLOW (value
) = 0;
2312 TREE_TYPE (value
) = type
;
2314 /* If it's still an integer (not a complex), and it doesn't
2315 fit in the type we choose for it, then pedwarn. */
2318 && TREE_CODE (TREE_TYPE (value
)) == INTEGER_TYPE
2319 && ! int_fits_type_p (value
, TREE_TYPE (value
)))
2320 pedwarn ("integer constant is larger than the maximum value for its type");
2324 error ("missing white space after number '%.*s'", (int) (p
- str
), str
);
2329 return integer_zero_node
;
2333 lex_string (str
, len
, wide
)
2339 char *buf
= alloca ((len
+ 1) * (wide
? WCHAR_BYTES
: 1));
2341 const char *p
= str
, *limit
= str
+ len
;
2343 unsigned width
= wide
? WCHAR_TYPE_SIZE
2344 : TYPE_PRECISION (char_type_node
);
2346 #ifdef MULTIBYTE_CHARS
2347 /* Reset multibyte conversion state. */
2348 (void) local_mbtowc (NULL_PTR
, NULL_PTR
, 0);
2353 #ifdef MULTIBYTE_CHARS
2357 char_len
= local_mbtowc (&wc
, p
, limit
- p
);
2360 warning ("Ignoring invalid multibyte character");
2373 if (c
== '\\' && !ignore_escape_flag
)
2375 p
= readescape (p
, limit
, &c
);
2376 if (width
< HOST_BITS_PER_INT
2377 && (unsigned) c
>= ((unsigned)1 << width
))
2378 pedwarn ("escape sequence out of range for character");
2381 /* Add this single character into the buffer either as a wchar_t
2382 or as a single byte. */
2385 unsigned charwidth
= TYPE_PRECISION (char_type_node
);
2386 unsigned bytemask
= (1 << width
) - 1;
2389 for (byte
= 0; byte
< WCHAR_BYTES
; ++byte
)
2392 if (byte
>= (int) sizeof (c
))
2395 n
= (c
>> (byte
* charwidth
)) & bytemask
;
2396 if (BYTES_BIG_ENDIAN
)
2397 q
[WCHAR_BYTES
- byte
- 1] = n
;
2409 /* Terminate the string value, either with a single byte zero
2410 or with a wide zero. */
2414 memset (q
, 0, WCHAR_BYTES
);
2422 value
= build_string (q
- buf
, buf
);
2425 TREE_TYPE (value
) = wchar_array_type_node
;
2427 TREE_TYPE (value
) = char_array_type_node
;
2432 lex_charconst (str
, len
, wide
)
2437 const char *limit
= str
+ len
;
2441 unsigned width
= TYPE_PRECISION (char_type_node
);
2446 #ifdef MULTIBYTE_CHARS
2447 int longest_char
= local_mb_cur_max ();
2448 (void) local_mbtowc (NULL_PTR
, NULL_PTR
, 0);
2451 max_chars
= TYPE_PRECISION (integer_type_node
) / width
;
2453 width
= WCHAR_TYPE_SIZE
;
2457 #ifdef MULTIBYTE_CHARS
2461 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
2464 warning ("Ignoring invalid multibyte character");
2480 str
= readescape (str
, limit
, &c
);
2481 if (width
< HOST_BITS_PER_INT
2482 && (unsigned) c
>= ((unsigned)1 << width
))
2483 pedwarn ("escape sequence out of range for character");
2485 #ifdef MAP_CHARACTER
2487 c
= MAP_CHARACTER (c
);
2490 /* Merge character into result; ignore excess chars. */
2491 num_chars
+= (width
/ TYPE_PRECISION (char_type_node
));
2492 if (num_chars
< max_chars
+ 1)
2494 if (width
< HOST_BITS_PER_INT
)
2495 result
= (result
<< width
) | (c
& ((1 << width
) - 1));
2501 if (chars_seen
== 0)
2502 error ("empty character constant");
2503 else if (num_chars
> max_chars
)
2505 num_chars
= max_chars
;
2506 error ("character constant too long");
2508 else if (chars_seen
!= 1 && ! flag_traditional
&& warn_multichar
)
2509 warning ("multi-character character constant");
2511 /* If char type is signed, sign-extend the constant. */
2514 int num_bits
= num_chars
* width
;
2516 /* We already got an error; avoid invalid shift. */
2517 value
= build_int_2 (0, 0);
2518 else if (TREE_UNSIGNED (char_type_node
)
2519 || ((result
>> (num_bits
- 1)) & 1) == 0)
2520 value
= build_int_2 (result
& (~(unsigned HOST_WIDE_INT
) 0
2521 >> (HOST_BITS_PER_WIDE_INT
- num_bits
)),
2524 value
= build_int_2 (result
| ~(~(unsigned HOST_WIDE_INT
) 0
2525 >> (HOST_BITS_PER_WIDE_INT
- num_bits
)),
2527 /* In C, a character constant has type 'int'; in C++, 'char'. */
2528 if (chars_seen
<= 1 && c_language
== clk_cplusplus
)
2529 TREE_TYPE (value
) = char_type_node
;
2531 TREE_TYPE (value
) = integer_type_node
;
2535 value
= build_int_2 (result
, 0);
2536 TREE_TYPE (value
) = wchar_type_node
;