]> gcc.gnu.org Git - gcc.git/blob - gcc/c-lex.c
* cfgloop.c (flow_loops_cfg_dump): Use bb->index, not i.
[gcc.git] / gcc / c-lex.c
1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24
25 #include "real.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "expr.h"
29 #include "input.h"
30 #include "output.h"
31 #include "c-tree.h"
32 #include "c-common.h"
33 #include "flags.h"
34 #include "timevar.h"
35 #include "cpplib.h"
36 #include "c-pragma.h"
37 #include "toplev.h"
38 #include "intl.h"
39 #include "tm_p.h"
40 #include "splay-tree.h"
41 #include "debug.h"
42
43 #ifdef MULTIBYTE_CHARS
44 #include "mbchar.h"
45 #include <locale.h>
46 #endif /* MULTIBYTE_CHARS */
47 #ifndef GET_ENVIRONMENT
48 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
49 #endif
50
51 /* The current line map. */
52 static const struct line_map *map;
53
54 /* The line used to refresh the lineno global variable after each token. */
55 static unsigned int src_lineno;
56
57 /* We may keep statistics about how long which files took to compile. */
58 static int header_time, body_time;
59 static splay_tree file_info_tree;
60
61 /* File used for outputting assembler code. */
62 extern FILE *asm_out_file;
63
64 #undef WCHAR_TYPE_SIZE
65 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
66
67 /* Number of bytes in a wide character. */
68 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
69
70 int pending_lang_change; /* If we need to switch languages - C++ only */
71 int c_header_level; /* depth in C headers - C++ only */
72
73 /* Nonzero tells yylex to ignore \ in string constants. */
74 static int ignore_escape_flag;
75
76 static tree lex_number PARAMS ((const char *, unsigned int));
77 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
78 int));
79 static tree lex_charconst PARAMS ((const cpp_token *));
80 static void update_header_times PARAMS ((const char *));
81 static int dump_one_header PARAMS ((splay_tree_node, void *));
82 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
83 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
84 const cpp_string *));
85 static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
86 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
87 static void cb_define PARAMS ((cpp_reader *, unsigned int,
88 cpp_hashnode *));
89 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
90 cpp_hashnode *));
91 \f
92 const char *
93 init_c_lex (filename)
94 const char *filename;
95 {
96 struct cpp_callbacks *cb;
97 struct c_fileinfo *toplevel;
98
99 /* Set up filename timing. Must happen before cpp_read_main_file. */
100 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
101 0,
102 (splay_tree_delete_value_fn)free);
103 toplevel = get_fileinfo ("<top level>");
104 if (flag_detailed_statistics)
105 {
106 header_time = 0;
107 body_time = get_run_time ();
108 toplevel->time = body_time;
109 }
110
111 #ifdef MULTIBYTE_CHARS
112 /* Change to the native locale for multibyte conversions. */
113 setlocale (LC_CTYPE, "");
114 GET_ENVIRONMENT (literal_codeset, "LANG");
115 #endif
116
117 cb = cpp_get_callbacks (parse_in);
118
119 cb->line_change = cb_line_change;
120 cb->ident = cb_ident;
121 cb->file_change = cb_file_change;
122 cb->def_pragma = cb_def_pragma;
123
124 /* Set the debug callbacks if we can use them. */
125 if (debug_info_level == DINFO_LEVEL_VERBOSE
126 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
127 || write_symbols == VMS_AND_DWARF2_DEBUG))
128 {
129 cb->define = cb_define;
130 cb->undef = cb_undef;
131 }
132
133 /* Start it at 0. */
134 lineno = 0;
135
136 if (filename == NULL || !strcmp (filename, "-"))
137 filename = "";
138
139 return cpp_read_main_file (parse_in, filename, ident_hash);
140 }
141
142 /* A thin wrapper around the real parser that initializes the
143 integrated preprocessor after debug output has been initialized.
144 Also, make sure the start_source_file debug hook gets called for
145 the primary source file. */
146
147 void
148 c_common_parse_file (set_yydebug)
149 int set_yydebug ATTRIBUTE_UNUSED;
150 {
151 #if YYDEBUG != 0
152 yydebug = set_yydebug;
153 #else
154 warning ("YYDEBUG not defined");
155 #endif
156
157 (*debug_hooks->start_source_file) (lineno, input_filename);
158 cpp_finish_options (parse_in);
159
160 yyparse ();
161 free_parser_stacks ();
162 }
163
164 struct c_fileinfo *
165 get_fileinfo (name)
166 const char *name;
167 {
168 splay_tree_node n;
169 struct c_fileinfo *fi;
170
171 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
172 if (n)
173 return (struct c_fileinfo *) n->value;
174
175 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
176 fi->time = 0;
177 fi->interface_only = 0;
178 fi->interface_unknown = 1;
179 splay_tree_insert (file_info_tree, (splay_tree_key) name,
180 (splay_tree_value) fi);
181 return fi;
182 }
183
184 static void
185 update_header_times (name)
186 const char *name;
187 {
188 /* Changing files again. This means currently collected time
189 is charged against header time, and body time starts back at 0. */
190 if (flag_detailed_statistics)
191 {
192 int this_time = get_run_time ();
193 struct c_fileinfo *file = get_fileinfo (name);
194 header_time += this_time - body_time;
195 file->time += this_time - body_time;
196 body_time = this_time;
197 }
198 }
199
200 static int
201 dump_one_header (n, dummy)
202 splay_tree_node n;
203 void *dummy ATTRIBUTE_UNUSED;
204 {
205 print_time ((const char *) n->key,
206 ((struct c_fileinfo *) n->value)->time);
207 return 0;
208 }
209
210 void
211 dump_time_statistics ()
212 {
213 struct c_fileinfo *file = get_fileinfo (input_filename);
214 int this_time = get_run_time ();
215 file->time += this_time - body_time;
216
217 fprintf (stderr, "\n******\n");
218 print_time ("header files (total)", header_time);
219 print_time ("main file (total)", this_time - body_time);
220 fprintf (stderr, "ratio = %g : 1\n",
221 (double)header_time / (double)(this_time - body_time));
222 fprintf (stderr, "\n******\n");
223
224 splay_tree_foreach (file_info_tree, dump_one_header, 0);
225 }
226
227 static void
228 cb_ident (pfile, line, str)
229 cpp_reader *pfile ATTRIBUTE_UNUSED;
230 unsigned int line ATTRIBUTE_UNUSED;
231 const cpp_string *str ATTRIBUTE_UNUSED;
232 {
233 #ifdef ASM_OUTPUT_IDENT
234 if (! flag_no_ident)
235 {
236 /* Convert escapes in the string. */
237 tree value = lex_string (str->text, str->len, 0);
238 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
239 }
240 #endif
241 }
242
243 /* Called at the start of every non-empty line. TOKEN is the first
244 lexed token on the line. Used for diagnostic line numbers. */
245 static void
246 cb_line_change (pfile, token, parsing_args)
247 cpp_reader *pfile ATTRIBUTE_UNUSED;
248 const cpp_token *token;
249 int parsing_args ATTRIBUTE_UNUSED;
250 {
251 src_lineno = SOURCE_LINE (map, token->line);
252 }
253
254 static void
255 cb_file_change (pfile, new_map)
256 cpp_reader *pfile ATTRIBUTE_UNUSED;
257 const struct line_map *new_map;
258 {
259 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
260
261 if (new_map->reason == LC_ENTER)
262 {
263 /* Don't stack the main buffer on the input stack;
264 we already did in compile_file. */
265 if (map == NULL)
266 main_input_filename = new_map->to_file;
267 else
268 {
269 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
270
271 lineno = included_at;
272 push_srcloc (new_map->to_file, 1);
273 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
274 #ifndef NO_IMPLICIT_EXTERN_C
275 if (c_header_level)
276 ++c_header_level;
277 else if (new_map->sysp == 2)
278 {
279 c_header_level = 1;
280 ++pending_lang_change;
281 }
282 #endif
283 }
284 }
285 else if (new_map->reason == LC_LEAVE)
286 {
287 #ifndef NO_IMPLICIT_EXTERN_C
288 if (c_header_level && --c_header_level == 0)
289 {
290 if (new_map->sysp == 2)
291 warning ("badly nested C headers from preprocessor");
292 --pending_lang_change;
293 }
294 #endif
295 pop_srcloc ();
296
297 (*debug_hooks->end_source_file) (to_line);
298 }
299
300 update_header_times (new_map->to_file);
301 in_system_header = new_map->sysp != 0;
302 input_filename = new_map->to_file;
303 lineno = to_line;
304 map = new_map;
305
306 /* Hook for C++. */
307 extract_interface_info ();
308 }
309
310 static void
311 cb_def_pragma (pfile, line)
312 cpp_reader *pfile;
313 unsigned int line;
314 {
315 /* Issue a warning message if we have been asked to do so. Ignore
316 unknown pragmas in system headers unless an explicit
317 -Wunknown-pragmas has been given. */
318 if (warn_unknown_pragmas > in_system_header)
319 {
320 const unsigned char *space, *name = 0;
321 const cpp_token *s;
322
323 s = cpp_get_token (pfile);
324 space = cpp_token_as_text (pfile, s);
325 s = cpp_get_token (pfile);
326 if (s->type == CPP_NAME)
327 name = cpp_token_as_text (pfile, s);
328
329 lineno = SOURCE_LINE (map, line);
330 if (name)
331 warning ("ignoring #pragma %s %s", space, name);
332 else
333 warning ("ignoring #pragma %s", space);
334 }
335 }
336
337 /* #define callback for DWARF and DWARF2 debug info. */
338 static void
339 cb_define (pfile, line, node)
340 cpp_reader *pfile;
341 unsigned int line;
342 cpp_hashnode *node;
343 {
344 (*debug_hooks->define) (SOURCE_LINE (map, line),
345 (const char *) cpp_macro_definition (pfile, node));
346 }
347
348 /* #undef callback for DWARF and DWARF2 debug info. */
349 static void
350 cb_undef (pfile, line, node)
351 cpp_reader *pfile ATTRIBUTE_UNUSED;
352 unsigned int line;
353 cpp_hashnode *node;
354 {
355 (*debug_hooks->undef) (SOURCE_LINE (map, line),
356 (const char *) NODE_NAME (node));
357 }
358
359 #if 0 /* not yet */
360 /* Returns nonzero if C is a universal-character-name. Give an error if it
361 is not one which may appear in an identifier, as per [extendid].
362
363 Note that extended character support in identifiers has not yet been
364 implemented. It is my personal opinion that this is not a desirable
365 feature. Portable code cannot count on support for more than the basic
366 identifier character set. */
367
368 static inline int
369 is_extended_char (c)
370 int c;
371 {
372 #ifdef TARGET_EBCDIC
373 return 0;
374 #else
375 /* ASCII. */
376 if (c < 0x7f)
377 return 0;
378
379 /* None of the valid chars are outside the Basic Multilingual Plane (the
380 low 16 bits). */
381 if (c > 0xffff)
382 {
383 error ("universal-character-name '\\U%08x' not valid in identifier", c);
384 return 1;
385 }
386
387 /* Latin */
388 if ((c >= 0x00c0 && c <= 0x00d6)
389 || (c >= 0x00d8 && c <= 0x00f6)
390 || (c >= 0x00f8 && c <= 0x01f5)
391 || (c >= 0x01fa && c <= 0x0217)
392 || (c >= 0x0250 && c <= 0x02a8)
393 || (c >= 0x1e00 && c <= 0x1e9a)
394 || (c >= 0x1ea0 && c <= 0x1ef9))
395 return 1;
396
397 /* Greek */
398 if ((c == 0x0384)
399 || (c >= 0x0388 && c <= 0x038a)
400 || (c == 0x038c)
401 || (c >= 0x038e && c <= 0x03a1)
402 || (c >= 0x03a3 && c <= 0x03ce)
403 || (c >= 0x03d0 && c <= 0x03d6)
404 || (c == 0x03da)
405 || (c == 0x03dc)
406 || (c == 0x03de)
407 || (c == 0x03e0)
408 || (c >= 0x03e2 && c <= 0x03f3)
409 || (c >= 0x1f00 && c <= 0x1f15)
410 || (c >= 0x1f18 && c <= 0x1f1d)
411 || (c >= 0x1f20 && c <= 0x1f45)
412 || (c >= 0x1f48 && c <= 0x1f4d)
413 || (c >= 0x1f50 && c <= 0x1f57)
414 || (c == 0x1f59)
415 || (c == 0x1f5b)
416 || (c == 0x1f5d)
417 || (c >= 0x1f5f && c <= 0x1f7d)
418 || (c >= 0x1f80 && c <= 0x1fb4)
419 || (c >= 0x1fb6 && c <= 0x1fbc)
420 || (c >= 0x1fc2 && c <= 0x1fc4)
421 || (c >= 0x1fc6 && c <= 0x1fcc)
422 || (c >= 0x1fd0 && c <= 0x1fd3)
423 || (c >= 0x1fd6 && c <= 0x1fdb)
424 || (c >= 0x1fe0 && c <= 0x1fec)
425 || (c >= 0x1ff2 && c <= 0x1ff4)
426 || (c >= 0x1ff6 && c <= 0x1ffc))
427 return 1;
428
429 /* Cyrillic */
430 if ((c >= 0x0401 && c <= 0x040d)
431 || (c >= 0x040f && c <= 0x044f)
432 || (c >= 0x0451 && c <= 0x045c)
433 || (c >= 0x045e && c <= 0x0481)
434 || (c >= 0x0490 && c <= 0x04c4)
435 || (c >= 0x04c7 && c <= 0x04c8)
436 || (c >= 0x04cb && c <= 0x04cc)
437 || (c >= 0x04d0 && c <= 0x04eb)
438 || (c >= 0x04ee && c <= 0x04f5)
439 || (c >= 0x04f8 && c <= 0x04f9))
440 return 1;
441
442 /* Armenian */
443 if ((c >= 0x0531 && c <= 0x0556)
444 || (c >= 0x0561 && c <= 0x0587))
445 return 1;
446
447 /* Hebrew */
448 if ((c >= 0x05d0 && c <= 0x05ea)
449 || (c >= 0x05f0 && c <= 0x05f4))
450 return 1;
451
452 /* Arabic */
453 if ((c >= 0x0621 && c <= 0x063a)
454 || (c >= 0x0640 && c <= 0x0652)
455 || (c >= 0x0670 && c <= 0x06b7)
456 || (c >= 0x06ba && c <= 0x06be)
457 || (c >= 0x06c0 && c <= 0x06ce)
458 || (c >= 0x06e5 && c <= 0x06e7))
459 return 1;
460
461 /* Devanagari */
462 if ((c >= 0x0905 && c <= 0x0939)
463 || (c >= 0x0958 && c <= 0x0962))
464 return 1;
465
466 /* Bengali */
467 if ((c >= 0x0985 && c <= 0x098c)
468 || (c >= 0x098f && c <= 0x0990)
469 || (c >= 0x0993 && c <= 0x09a8)
470 || (c >= 0x09aa && c <= 0x09b0)
471 || (c == 0x09b2)
472 || (c >= 0x09b6 && c <= 0x09b9)
473 || (c >= 0x09dc && c <= 0x09dd)
474 || (c >= 0x09df && c <= 0x09e1)
475 || (c >= 0x09f0 && c <= 0x09f1))
476 return 1;
477
478 /* Gurmukhi */
479 if ((c >= 0x0a05 && c <= 0x0a0a)
480 || (c >= 0x0a0f && c <= 0x0a10)
481 || (c >= 0x0a13 && c <= 0x0a28)
482 || (c >= 0x0a2a && c <= 0x0a30)
483 || (c >= 0x0a32 && c <= 0x0a33)
484 || (c >= 0x0a35 && c <= 0x0a36)
485 || (c >= 0x0a38 && c <= 0x0a39)
486 || (c >= 0x0a59 && c <= 0x0a5c)
487 || (c == 0x0a5e))
488 return 1;
489
490 /* Gujarati */
491 if ((c >= 0x0a85 && c <= 0x0a8b)
492 || (c == 0x0a8d)
493 || (c >= 0x0a8f && c <= 0x0a91)
494 || (c >= 0x0a93 && c <= 0x0aa8)
495 || (c >= 0x0aaa && c <= 0x0ab0)
496 || (c >= 0x0ab2 && c <= 0x0ab3)
497 || (c >= 0x0ab5 && c <= 0x0ab9)
498 || (c == 0x0ae0))
499 return 1;
500
501 /* Oriya */
502 if ((c >= 0x0b05 && c <= 0x0b0c)
503 || (c >= 0x0b0f && c <= 0x0b10)
504 || (c >= 0x0b13 && c <= 0x0b28)
505 || (c >= 0x0b2a && c <= 0x0b30)
506 || (c >= 0x0b32 && c <= 0x0b33)
507 || (c >= 0x0b36 && c <= 0x0b39)
508 || (c >= 0x0b5c && c <= 0x0b5d)
509 || (c >= 0x0b5f && c <= 0x0b61))
510 return 1;
511
512 /* Tamil */
513 if ((c >= 0x0b85 && c <= 0x0b8a)
514 || (c >= 0x0b8e && c <= 0x0b90)
515 || (c >= 0x0b92 && c <= 0x0b95)
516 || (c >= 0x0b99 && c <= 0x0b9a)
517 || (c == 0x0b9c)
518 || (c >= 0x0b9e && c <= 0x0b9f)
519 || (c >= 0x0ba3 && c <= 0x0ba4)
520 || (c >= 0x0ba8 && c <= 0x0baa)
521 || (c >= 0x0bae && c <= 0x0bb5)
522 || (c >= 0x0bb7 && c <= 0x0bb9))
523 return 1;
524
525 /* Telugu */
526 if ((c >= 0x0c05 && c <= 0x0c0c)
527 || (c >= 0x0c0e && c <= 0x0c10)
528 || (c >= 0x0c12 && c <= 0x0c28)
529 || (c >= 0x0c2a && c <= 0x0c33)
530 || (c >= 0x0c35 && c <= 0x0c39)
531 || (c >= 0x0c60 && c <= 0x0c61))
532 return 1;
533
534 /* Kannada */
535 if ((c >= 0x0c85 && c <= 0x0c8c)
536 || (c >= 0x0c8e && c <= 0x0c90)
537 || (c >= 0x0c92 && c <= 0x0ca8)
538 || (c >= 0x0caa && c <= 0x0cb3)
539 || (c >= 0x0cb5 && c <= 0x0cb9)
540 || (c >= 0x0ce0 && c <= 0x0ce1))
541 return 1;
542
543 /* Malayalam */
544 if ((c >= 0x0d05 && c <= 0x0d0c)
545 || (c >= 0x0d0e && c <= 0x0d10)
546 || (c >= 0x0d12 && c <= 0x0d28)
547 || (c >= 0x0d2a && c <= 0x0d39)
548 || (c >= 0x0d60 && c <= 0x0d61))
549 return 1;
550
551 /* Thai */
552 if ((c >= 0x0e01 && c <= 0x0e30)
553 || (c >= 0x0e32 && c <= 0x0e33)
554 || (c >= 0x0e40 && c <= 0x0e46)
555 || (c >= 0x0e4f && c <= 0x0e5b))
556 return 1;
557
558 /* Lao */
559 if ((c >= 0x0e81 && c <= 0x0e82)
560 || (c == 0x0e84)
561 || (c == 0x0e87)
562 || (c == 0x0e88)
563 || (c == 0x0e8a)
564 || (c == 0x0e0d)
565 || (c >= 0x0e94 && c <= 0x0e97)
566 || (c >= 0x0e99 && c <= 0x0e9f)
567 || (c >= 0x0ea1 && c <= 0x0ea3)
568 || (c == 0x0ea5)
569 || (c == 0x0ea7)
570 || (c == 0x0eaa)
571 || (c == 0x0eab)
572 || (c >= 0x0ead && c <= 0x0eb0)
573 || (c == 0x0eb2)
574 || (c == 0x0eb3)
575 || (c == 0x0ebd)
576 || (c >= 0x0ec0 && c <= 0x0ec4)
577 || (c == 0x0ec6))
578 return 1;
579
580 /* Georgian */
581 if ((c >= 0x10a0 && c <= 0x10c5)
582 || (c >= 0x10d0 && c <= 0x10f6))
583 return 1;
584
585 /* Hiragana */
586 if ((c >= 0x3041 && c <= 0x3094)
587 || (c >= 0x309b && c <= 0x309e))
588 return 1;
589
590 /* Katakana */
591 if ((c >= 0x30a1 && c <= 0x30fe))
592 return 1;
593
594 /* Bopmofo */
595 if ((c >= 0x3105 && c <= 0x312c))
596 return 1;
597
598 /* Hangul */
599 if ((c >= 0x1100 && c <= 0x1159)
600 || (c >= 0x1161 && c <= 0x11a2)
601 || (c >= 0x11a8 && c <= 0x11f9))
602 return 1;
603
604 /* CJK Unified Ideographs */
605 if ((c >= 0xf900 && c <= 0xfa2d)
606 || (c >= 0xfb1f && c <= 0xfb36)
607 || (c >= 0xfb38 && c <= 0xfb3c)
608 || (c == 0xfb3e)
609 || (c >= 0xfb40 && c <= 0xfb41)
610 || (c >= 0xfb42 && c <= 0xfb44)
611 || (c >= 0xfb46 && c <= 0xfbb1)
612 || (c >= 0xfbd3 && c <= 0xfd3f)
613 || (c >= 0xfd50 && c <= 0xfd8f)
614 || (c >= 0xfd92 && c <= 0xfdc7)
615 || (c >= 0xfdf0 && c <= 0xfdfb)
616 || (c >= 0xfe70 && c <= 0xfe72)
617 || (c == 0xfe74)
618 || (c >= 0xfe76 && c <= 0xfefc)
619 || (c >= 0xff21 && c <= 0xff3a)
620 || (c >= 0xff41 && c <= 0xff5a)
621 || (c >= 0xff66 && c <= 0xffbe)
622 || (c >= 0xffc2 && c <= 0xffc7)
623 || (c >= 0xffca && c <= 0xffcf)
624 || (c >= 0xffd2 && c <= 0xffd7)
625 || (c >= 0xffda && c <= 0xffdc)
626 || (c >= 0x4e00 && c <= 0x9fa5))
627 return 1;
628
629 error ("universal-character-name '\\u%04x' not valid in identifier", c);
630 return 1;
631 #endif
632 }
633
634 /* Add the UTF-8 representation of C to the token_buffer. */
635
636 static void
637 utf8_extend_token (c)
638 int c;
639 {
640 int shift, mask;
641
642 if (c <= 0x0000007f)
643 {
644 extend_token (c);
645 return;
646 }
647 else if (c <= 0x000007ff)
648 shift = 6, mask = 0xc0;
649 else if (c <= 0x0000ffff)
650 shift = 12, mask = 0xe0;
651 else if (c <= 0x001fffff)
652 shift = 18, mask = 0xf0;
653 else if (c <= 0x03ffffff)
654 shift = 24, mask = 0xf8;
655 else
656 shift = 30, mask = 0xfc;
657
658 extend_token (mask | (c >> shift));
659 do
660 {
661 shift -= 6;
662 extend_token ((unsigned char) (0x80 | (c >> shift)));
663 }
664 while (shift);
665 }
666 #endif
667
668 #if 0
669 struct try_type
670 {
671 tree *const node_var;
672 const char unsigned_flag;
673 const char long_flag;
674 const char long_long_flag;
675 };
676
677 struct try_type type_sequence[] =
678 {
679 { &integer_type_node, 0, 0, 0},
680 { &unsigned_type_node, 1, 0, 0},
681 { &long_integer_type_node, 0, 1, 0},
682 { &long_unsigned_type_node, 1, 1, 0},
683 { &long_long_integer_type_node, 0, 1, 1},
684 { &long_long_unsigned_type_node, 1, 1, 1}
685 };
686 #endif /* 0 */
687 \f
688 int
689 c_lex (value)
690 tree *value;
691 {
692 const cpp_token *tok;
693
694 retry:
695 timevar_push (TV_CPP);
696 do
697 tok = cpp_get_token (parse_in);
698 while (tok->type == CPP_PADDING);
699 timevar_pop (TV_CPP);
700
701 /* The C++ front end does horrible things with the current line
702 number. To ensure an accurate line number, we must reset it
703 every time we return a token. */
704 lineno = src_lineno;
705
706 *value = NULL_TREE;
707 switch (tok->type)
708 {
709 /* Issue this error here, where we can get at tok->val.c. */
710 case CPP_OTHER:
711 if (ISGRAPH (tok->val.c))
712 error ("stray '%c' in program", tok->val.c);
713 else
714 error ("stray '\\%o' in program", tok->val.c);
715 goto retry;
716
717 case CPP_NAME:
718 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
719 break;
720
721 case CPP_NUMBER:
722 *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len);
723 break;
724
725 case CPP_CHAR:
726 case CPP_WCHAR:
727 *value = lex_charconst (tok);
728 break;
729
730 case CPP_STRING:
731 case CPP_WSTRING:
732 *value = lex_string (tok->val.str.text, tok->val.str.len,
733 tok->type == CPP_WSTRING);
734 break;
735
736 /* These tokens should not be visible outside cpplib. */
737 case CPP_HEADER_NAME:
738 case CPP_COMMENT:
739 case CPP_MACRO_ARG:
740 abort ();
741
742 default: break;
743 }
744
745 return tok->type;
746 }
747
748 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
749
750 static tree
751 lex_number (str, len)
752 const char *str;
753 unsigned int len;
754 {
755 int base = 10;
756 int count = 0;
757 int largest_digit = 0;
758 int numdigits = 0;
759 int overflow = 0;
760 int c;
761 tree value;
762 const char *p;
763 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
764
765 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
766 The code below which fills the parts array assumes that a host
767 int is at least twice as wide as a host char, and that
768 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
769 Two HOST_WIDE_INTs is the largest int literal we can store.
770 In order to detect overflow below, the number of parts (TOTAL_PARTS)
771 must be exactly the number of parts needed to hold the bits
772 of two HOST_WIDE_INTs. */
773 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
774 unsigned int parts[TOTAL_PARTS];
775
776 /* Optimize for most frequent case. */
777 if (len == 1)
778 {
779 if (*str == '0')
780 return integer_zero_node;
781 else if (*str == '1')
782 return integer_one_node;
783 else
784 return build_int_2 (*str - '0', 0);
785 }
786
787 for (count = 0; count < TOTAL_PARTS; count++)
788 parts[count] = 0;
789
790 /* len is known to be >1 at this point. */
791 p = str;
792
793 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
794 {
795 base = 16;
796 p = str + 2;
797 }
798 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
799 else if (str[0] == '0' && ISDIGIT (str[1]))
800 {
801 base = 8;
802 p = str + 1;
803 }
804
805 do
806 {
807 c = *p++;
808
809 if (c == '.')
810 {
811 if (floatflag == AFTER_POINT)
812 ERROR ("too many decimal points in floating constant");
813 else if (floatflag == AFTER_EXPON)
814 ERROR ("decimal point in exponent - impossible!");
815 else
816 floatflag = AFTER_POINT;
817
818 if (base == 8)
819 base = 10;
820 }
821 else if (c == '_')
822 /* Possible future extension: silently ignore _ in numbers,
823 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
824 but somewhat easier to read. Ada has this? */
825 ERROR ("underscore in number");
826 else
827 {
828 int n;
829 /* It is not a decimal point.
830 It should be a digit (perhaps a hex digit). */
831
832 if (ISDIGIT (c)
833 || (base == 16 && ISXDIGIT (c)))
834 {
835 n = hex_value (c);
836 }
837 else if (base <= 10 && (c == 'e' || c == 'E'))
838 {
839 base = 10;
840 floatflag = AFTER_EXPON;
841 break;
842 }
843 else if (base == 16 && (c == 'p' || c == 'P'))
844 {
845 floatflag = AFTER_EXPON;
846 break; /* start of exponent */
847 }
848 else
849 {
850 p--;
851 break; /* start of suffix */
852 }
853
854 if (n >= largest_digit)
855 largest_digit = n;
856 numdigits++;
857
858 for (count = 0; count < TOTAL_PARTS; count++)
859 {
860 parts[count] *= base;
861 if (count)
862 {
863 parts[count]
864 += (parts[count-1] >> HOST_BITS_PER_CHAR);
865 parts[count-1]
866 &= (1 << HOST_BITS_PER_CHAR) - 1;
867 }
868 else
869 parts[0] += n;
870 }
871
872 /* If the highest-order part overflows (gets larger than
873 a host char will hold) then the whole number has
874 overflowed. Record this and truncate the highest-order
875 part. */
876 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
877 {
878 overflow = 1;
879 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
880 }
881 }
882 }
883 while (p < str + len);
884
885 /* This can happen on input like `int i = 0x;' */
886 if (numdigits == 0)
887 ERROR ("numeric constant with no digits");
888
889 if (largest_digit >= base)
890 ERROR ("numeric constant contains digits beyond the radix");
891
892 if (floatflag != NOT_FLOAT)
893 {
894 tree type;
895 const char *typename;
896 int imag, fflag, lflag;
897 REAL_VALUE_TYPE real;
898 char *copy;
899
900 if (base == 16 && floatflag != AFTER_EXPON)
901 ERROR ("hexadecimal floating constant has no exponent");
902
903 /* Read explicit exponent if any, and put it in tokenbuf. */
904 if ((base == 10 && ((c == 'e') || (c == 'E')))
905 || (base == 16 && (c == 'p' || c == 'P')))
906 {
907 if (p < str + len)
908 c = *p++;
909 if (p < str + len && (c == '+' || c == '-'))
910 c = *p++;
911 /* Exponent is decimal, even if string is a hex float. */
912 if (! ISDIGIT (c))
913 ERROR ("floating constant exponent has no digits");
914 while (p < str + len && ISDIGIT (c))
915 c = *p++;
916 if (! ISDIGIT (c))
917 p--;
918 }
919
920 /* Copy the float constant now; we don't want any suffixes in the
921 string passed to parse_float. */
922 copy = alloca (p - str + 1);
923 memcpy (copy, str, p - str);
924 copy[p - str] = '\0';
925
926 /* Now parse suffixes. */
927 fflag = lflag = imag = 0;
928 while (p < str + len)
929 switch (*p++)
930 {
931 case 'f': case 'F':
932 if (fflag)
933 ERROR ("more than one 'f' suffix on floating constant");
934 else if (warn_traditional && !in_system_header
935 && ! cpp_sys_macro_p (parse_in))
936 warning ("traditional C rejects the 'f' suffix");
937
938 fflag = 1;
939 break;
940
941 case 'l': case 'L':
942 if (lflag)
943 ERROR ("more than one 'l' suffix on floating constant");
944 else if (warn_traditional && !in_system_header
945 && ! cpp_sys_macro_p (parse_in))
946 warning ("traditional C rejects the 'l' suffix");
947
948 lflag = 1;
949 break;
950
951 case 'i': case 'I':
952 case 'j': case 'J':
953 if (imag)
954 ERROR ("more than one 'i' or 'j' suffix on floating constant");
955 else if (pedantic)
956 pedwarn ("ISO C forbids imaginary numeric constants");
957 imag = 1;
958 break;
959
960 default:
961 ERROR ("invalid suffix on floating constant");
962 }
963
964 type = double_type_node;
965 typename = "double";
966
967 if (fflag)
968 {
969 if (lflag)
970 ERROR ("both 'f' and 'l' suffixes on floating constant");
971
972 type = float_type_node;
973 typename = "float";
974 }
975 else if (lflag)
976 {
977 type = long_double_type_node;
978 typename = "long double";
979 }
980 else if (flag_single_precision_constant)
981 {
982 type = float_type_node;
983 typename = "float";
984 }
985
986 /* Warn about this only after we know we're not issuing an error. */
987 if (base == 16 && pedantic && !flag_isoc99)
988 pedwarn ("hexadecimal floating constants are only valid in C99");
989
990 /* The second argument, machine_mode, of REAL_VALUE_ATOF
991 tells the desired precision of the binary result
992 of decimal-to-binary conversion. */
993 if (base == 16)
994 real = REAL_VALUE_HTOF (copy, TYPE_MODE (type));
995 else
996 real = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
997
998 /* A diagnostic is required here by some ISO C testsuites.
999 This is not pedwarn, because some people don't want
1000 an error for this. */
1001 if (REAL_VALUE_ISINF (real) && pedantic)
1002 warning ("floating point number exceeds range of 'double'");
1003
1004 /* Create a node with determined type and value. */
1005 if (imag)
1006 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
1007 build_real (type, real));
1008 else
1009 value = build_real (type, real);
1010 }
1011 else
1012 {
1013 tree trad_type, type;
1014 HOST_WIDE_INT high, low;
1015 int spec_unsigned = 0;
1016 int spec_long = 0;
1017 int spec_long_long = 0;
1018 int spec_imag = 0;
1019 int suffix_lu = 0;
1020 int warn = 0, i;
1021
1022 trad_type = type = NULL_TREE;
1023 while (p < str + len)
1024 {
1025 c = *p++;
1026 switch (c)
1027 {
1028 case 'u': case 'U':
1029 if (spec_unsigned)
1030 error ("two 'u' suffixes on integer constant");
1031 else if (warn_traditional && !in_system_header
1032 && ! cpp_sys_macro_p (parse_in))
1033 warning ("traditional C rejects the 'u' suffix");
1034
1035 spec_unsigned = 1;
1036 if (spec_long)
1037 suffix_lu = 1;
1038 break;
1039
1040 case 'l': case 'L':
1041 if (spec_long)
1042 {
1043 if (spec_long_long)
1044 error ("three 'l' suffixes on integer constant");
1045 else if (suffix_lu)
1046 error ("'lul' is not a valid integer suffix");
1047 else if (c != spec_long)
1048 error ("'Ll' and 'lL' are not valid integer suffixes");
1049 else if (pedantic && ! flag_isoc99
1050 && ! in_system_header && warn_long_long)
1051 pedwarn ("ISO C89 forbids long long integer constants");
1052 spec_long_long = 1;
1053 }
1054 spec_long = c;
1055 break;
1056
1057 case 'i': case 'I': case 'j': case 'J':
1058 if (spec_imag)
1059 error ("more than one 'i' or 'j' suffix on integer constant");
1060 else if (pedantic)
1061 pedwarn ("ISO C forbids imaginary numeric constants");
1062 spec_imag = 1;
1063 break;
1064
1065 default:
1066 ERROR ("invalid suffix on integer constant");
1067 }
1068 }
1069
1070 /* If the literal overflowed, pedwarn about it now. */
1071 if (overflow)
1072 {
1073 warn = 1;
1074 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
1075 }
1076
1077 /* This is simplified by the fact that our constant
1078 is always positive. */
1079
1080 high = low = 0;
1081
1082 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1083 {
1084 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1085 / HOST_BITS_PER_CHAR)]
1086 << (i * HOST_BITS_PER_CHAR));
1087 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1088 }
1089
1090 value = build_int_2 (low, high);
1091 TREE_TYPE (value) = long_long_unsigned_type_node;
1092
1093 /* If warn_traditional, calculate both the ISO type and the
1094 traditional type, then see if they disagree. */
1095 if (warn_traditional)
1096 {
1097 /* Traditionally, any constant is signed; but if unsigned is
1098 specified explicitly, obey that. Use the smallest size
1099 with the right number of bits, except for one special
1100 case with decimal constants. */
1101 if (! spec_long && base != 10
1102 && int_fits_type_p (value, unsigned_type_node))
1103 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
1104 /* A decimal constant must be long if it does not fit in
1105 type int. I think this is independent of whether the
1106 constant is signed. */
1107 else if (! spec_long && base == 10
1108 && int_fits_type_p (value, integer_type_node))
1109 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
1110 else if (! spec_long_long)
1111 trad_type = (spec_unsigned
1112 ? long_unsigned_type_node
1113 : long_integer_type_node);
1114 else if (int_fits_type_p (value,
1115 spec_unsigned
1116 ? long_long_unsigned_type_node
1117 : long_long_integer_type_node))
1118 trad_type = (spec_unsigned
1119 ? long_long_unsigned_type_node
1120 : long_long_integer_type_node);
1121 else
1122 trad_type = (spec_unsigned
1123 ? widest_unsigned_literal_type_node
1124 : widest_integer_literal_type_node);
1125 }
1126
1127 /* Calculate the ISO type. */
1128 if (! spec_long && ! spec_unsigned
1129 && int_fits_type_p (value, integer_type_node))
1130 type = integer_type_node;
1131 else if (! spec_long && (base != 10 || spec_unsigned)
1132 && int_fits_type_p (value, unsigned_type_node))
1133 type = unsigned_type_node;
1134 else if (! spec_unsigned && !spec_long_long
1135 && int_fits_type_p (value, long_integer_type_node))
1136 type = long_integer_type_node;
1137 else if (! spec_long_long
1138 && int_fits_type_p (value, long_unsigned_type_node))
1139 type = long_unsigned_type_node;
1140 else if (! spec_unsigned
1141 && int_fits_type_p (value, long_long_integer_type_node))
1142 type = long_long_integer_type_node;
1143 else if (int_fits_type_p (value, long_long_unsigned_type_node))
1144 type = long_long_unsigned_type_node;
1145 else if (! spec_unsigned
1146 && int_fits_type_p (value, widest_integer_literal_type_node))
1147 type = widest_integer_literal_type_node;
1148 else
1149 type = widest_unsigned_literal_type_node;
1150
1151 /* We assume that constants specified in a non-decimal
1152 base are bit patterns, and that the programmer really
1153 meant what they wrote. */
1154 if (warn_traditional && !in_system_header
1155 && base == 10 && trad_type != type)
1156 {
1157 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (type))
1158 warning ("width of integer constant is different in traditional C");
1159 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (type))
1160 warning ("integer constant is unsigned in ISO C, signed in traditional C");
1161 else
1162 warning ("width of integer constant may change on other systems in traditional C");
1163 }
1164
1165 if (pedantic && (flag_isoc99 || !spec_long_long)
1166 && !warn
1167 && ((flag_isoc99
1168 ? TYPE_PRECISION (long_long_integer_type_node)
1169 : TYPE_PRECISION (long_integer_type_node)) < TYPE_PRECISION (type)))
1170 {
1171 warn = 1;
1172 pedwarn ("integer constant larger than the maximum value of %s",
1173 (flag_isoc99
1174 ? (TREE_UNSIGNED (type)
1175 ? _("an unsigned long long int")
1176 : _("a long long int"))
1177 : _("an unsigned long int")));
1178 }
1179
1180 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
1181 warning ("decimal constant is so large that it is unsigned");
1182
1183 if (spec_imag)
1184 {
1185 if (TYPE_PRECISION (type)
1186 <= TYPE_PRECISION (integer_type_node))
1187 value = build_complex (NULL_TREE, integer_zero_node,
1188 convert (integer_type_node, value));
1189 else
1190 ERROR ("complex integer constant is too wide for 'complex int'");
1191 }
1192 else
1193 TREE_TYPE (value) = type;
1194
1195 /* If it's still an integer (not a complex), and it doesn't
1196 fit in the type we choose for it, then pedwarn. */
1197
1198 if (! warn
1199 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
1200 && ! int_fits_type_p (value, TREE_TYPE (value)))
1201 pedwarn ("integer constant is larger than the maximum value for its type");
1202 }
1203
1204 if (p < str + len)
1205 error ("missing white space after number '%.*s'", (int) (p - str), str);
1206
1207 return value;
1208
1209 syntax_error:
1210 return integer_zero_node;
1211 }
1212
1213 static tree
1214 lex_string (str, len, wide)
1215 const unsigned char *str;
1216 unsigned int len;
1217 int wide;
1218 {
1219 tree value;
1220 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
1221 char *q = buf;
1222 const unsigned char *p = str, *limit = str + len;
1223 cppchar_t c;
1224
1225 #ifdef MULTIBYTE_CHARS
1226 /* Reset multibyte conversion state. */
1227 (void) local_mbtowc (NULL, NULL, 0);
1228 #endif
1229
1230 while (p < limit)
1231 {
1232 #ifdef MULTIBYTE_CHARS
1233 wchar_t wc;
1234 int char_len;
1235
1236 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
1237 if (char_len == -1)
1238 {
1239 warning ("ignoring invalid multibyte character");
1240 char_len = 1;
1241 c = *p++;
1242 }
1243 else
1244 {
1245 p += char_len;
1246 c = wc;
1247 }
1248 #else
1249 c = *p++;
1250 #endif
1251
1252 if (c == '\\' && !ignore_escape_flag)
1253 c = cpp_parse_escape (parse_in, &p, limit, wide);
1254
1255 /* Add this single character into the buffer either as a wchar_t,
1256 a multibyte sequence, or as a single byte. */
1257 if (wide)
1258 {
1259 unsigned charwidth = TYPE_PRECISION (char_type_node);
1260 unsigned bytemask = (1 << charwidth) - 1;
1261 int byte;
1262
1263 for (byte = 0; byte < WCHAR_BYTES; ++byte)
1264 {
1265 int n;
1266 if (byte >= (int) sizeof (c))
1267 n = 0;
1268 else
1269 n = (c >> (byte * charwidth)) & bytemask;
1270 if (BYTES_BIG_ENDIAN)
1271 q[WCHAR_BYTES - byte - 1] = n;
1272 else
1273 q[byte] = n;
1274 }
1275 q += WCHAR_BYTES;
1276 }
1277 #ifdef MULTIBYTE_CHARS
1278 else if (char_len > 1)
1279 {
1280 /* We're dealing with a multibyte character. */
1281 for ( ; char_len >0; --char_len)
1282 {
1283 *q++ = *(p - char_len);
1284 }
1285 }
1286 #endif
1287 else
1288 {
1289 *q++ = c;
1290 }
1291 }
1292
1293 /* Terminate the string value, either with a single byte zero
1294 or with a wide zero. */
1295
1296 if (wide)
1297 {
1298 memset (q, 0, WCHAR_BYTES);
1299 q += WCHAR_BYTES;
1300 }
1301 else
1302 {
1303 *q++ = '\0';
1304 }
1305
1306 value = build_string (q - buf, buf);
1307
1308 if (wide)
1309 TREE_TYPE (value) = wchar_array_type_node;
1310 else
1311 TREE_TYPE (value) = char_array_type_node;
1312 return value;
1313 }
1314
1315 /* Converts a (possibly wide) character constant token into a tree. */
1316 static tree
1317 lex_charconst (token)
1318 const cpp_token *token;
1319 {
1320 cppchar_t result;
1321 tree type, value;
1322 unsigned int chars_seen;
1323 int unsignedp;
1324
1325 result = cpp_interpret_charconst (parse_in, token,
1326 &chars_seen, &unsignedp);
1327
1328 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1329 before possibly widening to HOST_WIDE_INT for build_int_2. */
1330 if (unsignedp || (cppchar_signed_t) result >= 0)
1331 value = build_int_2 (result, 0);
1332 else
1333 value = build_int_2 ((cppchar_signed_t) result, -1);
1334
1335 if (token->type == CPP_WCHAR)
1336 type = wchar_type_node;
1337 /* In C, a character constant has type 'int'.
1338 In C++ 'char', but multi-char charconsts have type 'int'. */
1339 else if ((c_language == clk_c || c_language == clk_objective_c)
1340 || chars_seen > 1)
1341 type = integer_type_node;
1342 else
1343 type = char_type_node;
1344
1345 TREE_TYPE (value) = type;
1346 return value;
1347 }
This page took 0.097535 seconds and 5 git commands to generate.