]> gcc.gnu.org Git - gcc.git/blame - gcc/c-lex.c
testsuite_hooks.h: Suppress runtime exception thrown by missing named locale.
[gcc.git] / gcc / c-lex.c
CommitLineData
b9305c66 1/* Mainly the interface between cpplib and the C front ends.
517cbe13 2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
5793b276 3 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
e8bbfc4e 4
1322177d 5This file is part of GCC.
e8bbfc4e 6
1322177d
LB
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
e8bbfc4e 11
1322177d
LB
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
e8bbfc4e
RK
16
17You should have received a copy of the GNU General Public License
1322177d
LB
18along with GCC; see the file COPYING. If not, write to the Free
19Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2002111-1307, USA. */
e8bbfc4e 21
e9a25f70 22#include "config.h"
670ee920 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
e8bbfc4e 26
11ad4784 27#include "real.h"
e8bbfc4e
RK
28#include "rtl.h"
29#include "tree.h"
eb3aaa5b 30#include "expr.h"
e8bbfc4e 31#include "input.h"
d6f4ec51 32#include "output.h"
e8bbfc4e 33#include "c-tree.h"
52dabb6c 34#include "c-common.h"
e8bbfc4e 35#include "flags.h"
0e5921e8 36#include "timevar.h"
8b97c5f8 37#include "cpplib.h"
3d6f7931 38#include "c-pragma.h"
5f6da302 39#include "toplev.h"
ab87f8c8 40#include "intl.h"
7bdb32b9 41#include "tm_p.h"
0e5921e8 42#include "splay-tree.h"
7f905405 43#include "debug.h"
ab87f8c8 44
e8bbfc4e 45#ifdef MULTIBYTE_CHARS
56f48ce9 46#include "mbchar.h"
e8bbfc4e 47#include <locale.h>
56f48ce9 48#endif /* MULTIBYTE_CHARS */
e8bbfc4e 49
67821e3a 50/* The current line map. */
47d89cf3 51static const struct line_map *map;
67821e3a 52
97293897
NB
53/* The line used to refresh the lineno global variable after each token. */
54static unsigned int src_lineno;
55
0e5921e8
ZW
56/* We may keep statistics about how long which files took to compile. */
57static int header_time, body_time;
58static splay_tree file_info_tree;
3ab6dd7c 59
e8bbfc4e
RK
60/* File used for outputting assembler code. */
61extern FILE *asm_out_file;
62
12a39b12
JM
63#undef WCHAR_TYPE_SIZE
64#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
e8bbfc4e
RK
65
66/* Number of bytes in a wide character. */
67#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
68
0e5921e8
ZW
69int pending_lang_change; /* If we need to switch languages - C++ only */
70int c_header_level; /* depth in C headers - C++ only */
fbb18613
JM
71
72/* Nonzero tells yylex to ignore \ in string constants. */
73static int ignore_escape_flag;
e9a25f70 74
ceeedfc1
NB
75static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
76static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
77static enum integer_type_kind
78 narrowest_unsigned_type PARAMS ((tree, unsigned int));
79static enum integer_type_kind
80 narrowest_signed_type PARAMS ((tree, unsigned int));
62ae2529
JJ
81static tree lex_string PARAMS ((const unsigned char *, unsigned int,
82 int));
c8a96070 83static tree lex_charconst PARAMS ((const cpp_token *));
0e5921e8
ZW
84static void update_header_times PARAMS ((const char *));
85static int dump_one_header PARAMS ((splay_tree_node, void *));
97293897 86static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
8bbbef34
NB
87static void cb_ident PARAMS ((cpp_reader *, unsigned int,
88 const cpp_string *));
47d89cf3 89static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
8bbbef34
NB
90static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
91static void cb_define PARAMS ((cpp_reader *, unsigned int,
92 cpp_hashnode *));
93static void cb_undef PARAMS ((cpp_reader *, unsigned int,
94 cpp_hashnode *));
e31c7eec 95\f
63973df3
NB
96void
97init_c_lex ()
e3d1fd32 98{
b61c5ed0 99 struct cpp_callbacks *cb;
0e5921e8
ZW
100 struct c_fileinfo *toplevel;
101
f5e99456 102 /* Set up filename timing. Must happen before cpp_read_main_file. */
0e5921e8
ZW
103 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
104 0,
105 (splay_tree_delete_value_fn)free);
a8a05998 106 toplevel = get_fileinfo ("<top level>");
0e5921e8
ZW
107 if (flag_detailed_statistics)
108 {
109 header_time = 0;
110 body_time = get_run_time ();
111 toplevel->time = body_time;
112 }
113
b61c5ed0
NB
114 cb = cpp_get_callbacks (parse_in);
115
9d10c9a9 116 cb->register_builtins = cb_register_builtins;
97293897 117 cb->line_change = cb_line_change;
b61c5ed0
NB
118 cb->ident = cb_ident;
119 cb->file_change = cb_file_change;
120 cb->def_pragma = cb_def_pragma;
17211ab5
GK
121 cb->valid_pch = c_common_valid_pch;
122 cb->read_pch = c_common_read_pch;
0e5921e8 123
65289a3a
NB
124 /* Set the debug callbacks if we can use them. */
125 if (debug_info_level == DINFO_LEVEL_VERBOSE
7a0c8d71
DR
126 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
127 || write_symbols == VMS_AND_DWARF2_DEBUG))
65289a3a 128 {
b61c5ed0
NB
129 cb->define = cb_define;
130 cb->undef = cb_undef;
65289a3a 131 }
e3d1fd32
PB
132}
133
3aac38d7 134/* A thin wrapper around the real parser that initializes the
bebc7e8b
ZW
135 integrated preprocessor after debug output has been initialized.
136 Also, make sure the start_source_file debug hook gets called for
137 the primary source file. */
3aac38d7 138
52dabb6c 139void
ff45c01e
NB
140c_common_parse_file (set_yydebug)
141 int set_yydebug ATTRIBUTE_UNUSED;
3aac38d7 142{
ff45c01e
NB
143#if YYDEBUG != 0
144 yydebug = set_yydebug;
145#else
146 warning ("YYDEBUG not defined");
147#endif
148
bebc7e8b 149 (*debug_hooks->start_source_file) (lineno, input_filename);
f5e99456 150 cpp_finish_options (parse_in);
3aac38d7 151
17211ab5
GK
152 pch_init();
153
52dabb6c 154 yyparse ();
94a50397 155 free_parser_stacks ();
3aac38d7
RH
156}
157
0e5921e8
ZW
158struct c_fileinfo *
159get_fileinfo (name)
160 const char *name;
e3d1fd32 161{
0e5921e8
ZW
162 splay_tree_node n;
163 struct c_fileinfo *fi;
164
165 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
166 if (n)
167 return (struct c_fileinfo *) n->value;
168
169 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
170 fi->time = 0;
171 fi->interface_only = 0;
172 fi->interface_unknown = 1;
173 splay_tree_insert (file_info_tree, (splay_tree_key) name,
174 (splay_tree_value) fi);
175 return fi;
e56e519d 176}
e3d1fd32 177
0e5921e8
ZW
178static void
179update_header_times (name)
180 const char *name;
e8bbfc4e 181{
0e5921e8
ZW
182 /* Changing files again. This means currently collected time
183 is charged against header time, and body time starts back at 0. */
184 if (flag_detailed_statistics)
e8bbfc4e 185 {
0e5921e8
ZW
186 int this_time = get_run_time ();
187 struct c_fileinfo *file = get_fileinfo (name);
188 header_time += this_time - body_time;
189 file->time += this_time - body_time;
190 body_time = this_time;
e8bbfc4e
RK
191 }
192}
193
0e5921e8
ZW
194static int
195dump_one_header (n, dummy)
196 splay_tree_node n;
197 void *dummy ATTRIBUTE_UNUSED;
e8bbfc4e 198{
0e5921e8
ZW
199 print_time ((const char *) n->key,
200 ((struct c_fileinfo *) n->value)->time);
201 return 0;
e8bbfc4e 202}
e8bbfc4e
RK
203
204void
0e5921e8 205dump_time_statistics ()
e8bbfc4e 206{
0e5921e8
ZW
207 struct c_fileinfo *file = get_fileinfo (input_filename);
208 int this_time = get_run_time ();
209 file->time += this_time - body_time;
210
211 fprintf (stderr, "\n******\n");
212 print_time ("header files (total)", header_time);
213 print_time ("main file (total)", this_time - body_time);
214 fprintf (stderr, "ratio = %g : 1\n",
215 (double)header_time / (double)(this_time - body_time));
216 fprintf (stderr, "\n******\n");
217
218 splay_tree_foreach (file_info_tree, dump_one_header, 0);
e8bbfc4e 219}
a6124a42 220
0e5921e8 221static void
8bbbef34 222cb_ident (pfile, line, str)
27e2564a 223 cpp_reader *pfile ATTRIBUTE_UNUSED;
8bbbef34 224 unsigned int line ATTRIBUTE_UNUSED;
f5720527 225 const cpp_string *str ATTRIBUTE_UNUSED;
0e5921e8 226{
0e5921e8 227#ifdef ASM_OUTPUT_IDENT
27e2564a 228 if (! flag_no_ident)
0e5921e8 229 {
27e2564a 230 /* Convert escapes in the string. */
4977bab6 231 tree value ATTRIBUTE_UNUSED = lex_string (str->text, str->len, 0);
27e2564a 232 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
0e5921e8
ZW
233 }
234#endif
27e2564a
NB
235}
236
97293897
NB
237/* Called at the start of every non-empty line. TOKEN is the first
238 lexed token on the line. Used for diagnostic line numbers. */
239static void
240cb_line_change (pfile, token, parsing_args)
241 cpp_reader *pfile ATTRIBUTE_UNUSED;
242 const cpp_token *token;
243 int parsing_args ATTRIBUTE_UNUSED;
244{
245 src_lineno = SOURCE_LINE (map, token->line);
246}
247
27e2564a 248static void
47d89cf3 249cb_file_change (pfile, new_map)
27e2564a 250 cpp_reader *pfile ATTRIBUTE_UNUSED;
47d89cf3 251 const struct line_map *new_map;
27e2564a 252{
47d89cf3 253 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
d82fc108 254
47d89cf3 255 if (new_map->reason == LC_ENTER)
fbb18613 256 {
5bea1ccf
JM
257 /* Don't stack the main buffer on the input stack;
258 we already did in compile_file. */
47d89cf3
NB
259 if (map == NULL)
260 main_input_filename = new_map->to_file;
d82fc108 261 else
0e5921e8 262 {
f78ce0b7
JB
263 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
264
265 lineno = included_at;
47d89cf3 266 push_srcloc (new_map->to_file, 1);
f78ce0b7 267 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
27e2564a
NB
268#ifndef NO_IMPLICIT_EXTERN_C
269 if (c_header_level)
270 ++c_header_level;
47d89cf3 271 else if (new_map->sysp == 2)
27e2564a
NB
272 {
273 c_header_level = 1;
274 ++pending_lang_change;
275 }
0e5921e8 276#endif
27e2564a 277 }
fbb18613 278 }
47d89cf3 279 else if (new_map->reason == LC_LEAVE)
fbb18613 280 {
0e5921e8 281#ifndef NO_IMPLICIT_EXTERN_C
47d89cf3
NB
282 if (c_header_level && --c_header_level == 0)
283 {
284 if (new_map->sysp == 2)
285 warning ("badly nested C headers from preprocessor");
286 --pending_lang_change;
287 }
47d89cf3
NB
288#endif
289 pop_srcloc ();
290
291 (*debug_hooks->end_source_file) (to_line);
e8bbfc4e 292 }
fbb18613 293
47d89cf3
NB
294 update_header_times (new_map->to_file);
295 in_system_header = new_map->sysp != 0;
296 input_filename = new_map->to_file;
297 lineno = to_line;
298 map = new_map;
0e5921e8 299
0e5921e8
ZW
300 /* Hook for C++. */
301 extract_interface_info ();
302}
8b97c5f8
ZW
303
304static void
8bbbef34 305cb_def_pragma (pfile, line)
8b97c5f8 306 cpp_reader *pfile;
67821e3a 307 unsigned int line;
8b97c5f8
ZW
308{
309 /* Issue a warning message if we have been asked to do so. Ignore
310 unknown pragmas in system headers unless an explicit
ec5c56db 311 -Wunknown-pragmas has been given. */
8b97c5f8
ZW
312 if (warn_unknown_pragmas > in_system_header)
313 {
06470238 314 const unsigned char *space, *name;
4ed5bcfb 315 const cpp_token *s;
23356f93 316
06470238 317 space = name = (const unsigned char *) "";
4ed5bcfb 318 s = cpp_get_token (pfile);
06470238
NB
319 if (s->type != CPP_EOF)
320 {
321 space = cpp_token_as_text (pfile, s);
322 s = cpp_get_token (pfile);
323 if (s->type == CPP_NAME)
324 name = cpp_token_as_text (pfile, s);
325 }
8b97c5f8 326
67821e3a 327 lineno = SOURCE_LINE (map, line);
06470238 328 warning ("ignoring #pragma %s %s", space, name);
8b97c5f8
ZW
329 }
330}
0e5921e8 331
65289a3a
NB
332/* #define callback for DWARF and DWARF2 debug info. */
333static void
8bbbef34 334cb_define (pfile, line, node)
65289a3a 335 cpp_reader *pfile;
67821e3a 336 unsigned int line;
65289a3a
NB
337 cpp_hashnode *node;
338{
67821e3a 339 (*debug_hooks->define) (SOURCE_LINE (map, line),
7f905405 340 (const char *) cpp_macro_definition (pfile, node));
65289a3a
NB
341}
342
343/* #undef callback for DWARF and DWARF2 debug info. */
344static void
8bbbef34 345cb_undef (pfile, line, node)
67821e3a
NB
346 cpp_reader *pfile ATTRIBUTE_UNUSED;
347 unsigned int line;
65289a3a
NB
348 cpp_hashnode *node;
349{
67821e3a 350 (*debug_hooks->undef) (SOURCE_LINE (map, line),
7f905405 351 (const char *) NODE_NAME (node));
65289a3a
NB
352}
353
0e5921e8
ZW
354#if 0 /* not yet */
355/* Returns nonzero if C is a universal-character-name. Give an error if it
356 is not one which may appear in an identifier, as per [extendid].
357
358 Note that extended character support in identifiers has not yet been
359 implemented. It is my personal opinion that this is not a desirable
360 feature. Portable code cannot count on support for more than the basic
361 identifier character set. */
362
363static inline int
364is_extended_char (c)
365 int c;
366{
367#ifdef TARGET_EBCDIC
368 return 0;
369#else
370 /* ASCII. */
371 if (c < 0x7f)
372 return 0;
373
374 /* None of the valid chars are outside the Basic Multilingual Plane (the
375 low 16 bits). */
376 if (c > 0xffff)
377 {
378 error ("universal-character-name '\\U%08x' not valid in identifier", c);
379 return 1;
380 }
381
382 /* Latin */
383 if ((c >= 0x00c0 && c <= 0x00d6)
384 || (c >= 0x00d8 && c <= 0x00f6)
385 || (c >= 0x00f8 && c <= 0x01f5)
386 || (c >= 0x01fa && c <= 0x0217)
387 || (c >= 0x0250 && c <= 0x02a8)
388 || (c >= 0x1e00 && c <= 0x1e9a)
389 || (c >= 0x1ea0 && c <= 0x1ef9))
390 return 1;
391
392 /* Greek */
393 if ((c == 0x0384)
394 || (c >= 0x0388 && c <= 0x038a)
395 || (c == 0x038c)
396 || (c >= 0x038e && c <= 0x03a1)
397 || (c >= 0x03a3 && c <= 0x03ce)
398 || (c >= 0x03d0 && c <= 0x03d6)
399 || (c == 0x03da)
400 || (c == 0x03dc)
401 || (c == 0x03de)
402 || (c == 0x03e0)
403 || (c >= 0x03e2 && c <= 0x03f3)
404 || (c >= 0x1f00 && c <= 0x1f15)
405 || (c >= 0x1f18 && c <= 0x1f1d)
406 || (c >= 0x1f20 && c <= 0x1f45)
407 || (c >= 0x1f48 && c <= 0x1f4d)
408 || (c >= 0x1f50 && c <= 0x1f57)
409 || (c == 0x1f59)
410 || (c == 0x1f5b)
411 || (c == 0x1f5d)
412 || (c >= 0x1f5f && c <= 0x1f7d)
413 || (c >= 0x1f80 && c <= 0x1fb4)
414 || (c >= 0x1fb6 && c <= 0x1fbc)
415 || (c >= 0x1fc2 && c <= 0x1fc4)
416 || (c >= 0x1fc6 && c <= 0x1fcc)
417 || (c >= 0x1fd0 && c <= 0x1fd3)
418 || (c >= 0x1fd6 && c <= 0x1fdb)
419 || (c >= 0x1fe0 && c <= 0x1fec)
420 || (c >= 0x1ff2 && c <= 0x1ff4)
421 || (c >= 0x1ff6 && c <= 0x1ffc))
422 return 1;
423
424 /* Cyrillic */
425 if ((c >= 0x0401 && c <= 0x040d)
426 || (c >= 0x040f && c <= 0x044f)
427 || (c >= 0x0451 && c <= 0x045c)
428 || (c >= 0x045e && c <= 0x0481)
429 || (c >= 0x0490 && c <= 0x04c4)
430 || (c >= 0x04c7 && c <= 0x04c8)
431 || (c >= 0x04cb && c <= 0x04cc)
432 || (c >= 0x04d0 && c <= 0x04eb)
433 || (c >= 0x04ee && c <= 0x04f5)
434 || (c >= 0x04f8 && c <= 0x04f9))
435 return 1;
436
437 /* Armenian */
438 if ((c >= 0x0531 && c <= 0x0556)
439 || (c >= 0x0561 && c <= 0x0587))
440 return 1;
441
442 /* Hebrew */
443 if ((c >= 0x05d0 && c <= 0x05ea)
444 || (c >= 0x05f0 && c <= 0x05f4))
445 return 1;
446
447 /* Arabic */
448 if ((c >= 0x0621 && c <= 0x063a)
449 || (c >= 0x0640 && c <= 0x0652)
450 || (c >= 0x0670 && c <= 0x06b7)
451 || (c >= 0x06ba && c <= 0x06be)
452 || (c >= 0x06c0 && c <= 0x06ce)
453 || (c >= 0x06e5 && c <= 0x06e7))
454 return 1;
455
456 /* Devanagari */
457 if ((c >= 0x0905 && c <= 0x0939)
458 || (c >= 0x0958 && c <= 0x0962))
459 return 1;
460
461 /* Bengali */
462 if ((c >= 0x0985 && c <= 0x098c)
463 || (c >= 0x098f && c <= 0x0990)
464 || (c >= 0x0993 && c <= 0x09a8)
465 || (c >= 0x09aa && c <= 0x09b0)
466 || (c == 0x09b2)
467 || (c >= 0x09b6 && c <= 0x09b9)
468 || (c >= 0x09dc && c <= 0x09dd)
469 || (c >= 0x09df && c <= 0x09e1)
470 || (c >= 0x09f0 && c <= 0x09f1))
471 return 1;
472
473 /* Gurmukhi */
474 if ((c >= 0x0a05 && c <= 0x0a0a)
475 || (c >= 0x0a0f && c <= 0x0a10)
476 || (c >= 0x0a13 && c <= 0x0a28)
477 || (c >= 0x0a2a && c <= 0x0a30)
478 || (c >= 0x0a32 && c <= 0x0a33)
479 || (c >= 0x0a35 && c <= 0x0a36)
480 || (c >= 0x0a38 && c <= 0x0a39)
481 || (c >= 0x0a59 && c <= 0x0a5c)
482 || (c == 0x0a5e))
483 return 1;
484
485 /* Gujarati */
486 if ((c >= 0x0a85 && c <= 0x0a8b)
487 || (c == 0x0a8d)
488 || (c >= 0x0a8f && c <= 0x0a91)
489 || (c >= 0x0a93 && c <= 0x0aa8)
490 || (c >= 0x0aaa && c <= 0x0ab0)
491 || (c >= 0x0ab2 && c <= 0x0ab3)
492 || (c >= 0x0ab5 && c <= 0x0ab9)
493 || (c == 0x0ae0))
494 return 1;
495
496 /* Oriya */
497 if ((c >= 0x0b05 && c <= 0x0b0c)
498 || (c >= 0x0b0f && c <= 0x0b10)
499 || (c >= 0x0b13 && c <= 0x0b28)
500 || (c >= 0x0b2a && c <= 0x0b30)
501 || (c >= 0x0b32 && c <= 0x0b33)
502 || (c >= 0x0b36 && c <= 0x0b39)
503 || (c >= 0x0b5c && c <= 0x0b5d)
504 || (c >= 0x0b5f && c <= 0x0b61))
505 return 1;
506
507 /* Tamil */
508 if ((c >= 0x0b85 && c <= 0x0b8a)
509 || (c >= 0x0b8e && c <= 0x0b90)
510 || (c >= 0x0b92 && c <= 0x0b95)
511 || (c >= 0x0b99 && c <= 0x0b9a)
512 || (c == 0x0b9c)
513 || (c >= 0x0b9e && c <= 0x0b9f)
514 || (c >= 0x0ba3 && c <= 0x0ba4)
515 || (c >= 0x0ba8 && c <= 0x0baa)
516 || (c >= 0x0bae && c <= 0x0bb5)
517 || (c >= 0x0bb7 && c <= 0x0bb9))
518 return 1;
519
520 /* Telugu */
521 if ((c >= 0x0c05 && c <= 0x0c0c)
522 || (c >= 0x0c0e && c <= 0x0c10)
523 || (c >= 0x0c12 && c <= 0x0c28)
524 || (c >= 0x0c2a && c <= 0x0c33)
525 || (c >= 0x0c35 && c <= 0x0c39)
526 || (c >= 0x0c60 && c <= 0x0c61))
527 return 1;
528
529 /* Kannada */
530 if ((c >= 0x0c85 && c <= 0x0c8c)
531 || (c >= 0x0c8e && c <= 0x0c90)
532 || (c >= 0x0c92 && c <= 0x0ca8)
533 || (c >= 0x0caa && c <= 0x0cb3)
534 || (c >= 0x0cb5 && c <= 0x0cb9)
535 || (c >= 0x0ce0 && c <= 0x0ce1))
536 return 1;
537
538 /* Malayalam */
539 if ((c >= 0x0d05 && c <= 0x0d0c)
540 || (c >= 0x0d0e && c <= 0x0d10)
541 || (c >= 0x0d12 && c <= 0x0d28)
542 || (c >= 0x0d2a && c <= 0x0d39)
543 || (c >= 0x0d60 && c <= 0x0d61))
544 return 1;
545
546 /* Thai */
547 if ((c >= 0x0e01 && c <= 0x0e30)
548 || (c >= 0x0e32 && c <= 0x0e33)
549 || (c >= 0x0e40 && c <= 0x0e46)
550 || (c >= 0x0e4f && c <= 0x0e5b))
551 return 1;
552
553 /* Lao */
554 if ((c >= 0x0e81 && c <= 0x0e82)
555 || (c == 0x0e84)
556 || (c == 0x0e87)
557 || (c == 0x0e88)
558 || (c == 0x0e8a)
559 || (c == 0x0e0d)
560 || (c >= 0x0e94 && c <= 0x0e97)
561 || (c >= 0x0e99 && c <= 0x0e9f)
562 || (c >= 0x0ea1 && c <= 0x0ea3)
563 || (c == 0x0ea5)
564 || (c == 0x0ea7)
565 || (c == 0x0eaa)
566 || (c == 0x0eab)
567 || (c >= 0x0ead && c <= 0x0eb0)
568 || (c == 0x0eb2)
569 || (c == 0x0eb3)
570 || (c == 0x0ebd)
571 || (c >= 0x0ec0 && c <= 0x0ec4)
572 || (c == 0x0ec6))
573 return 1;
574
575 /* Georgian */
576 if ((c >= 0x10a0 && c <= 0x10c5)
577 || (c >= 0x10d0 && c <= 0x10f6))
578 return 1;
579
580 /* Hiragana */
581 if ((c >= 0x3041 && c <= 0x3094)
582 || (c >= 0x309b && c <= 0x309e))
583 return 1;
584
585 /* Katakana */
586 if ((c >= 0x30a1 && c <= 0x30fe))
587 return 1;
588
589 /* Bopmofo */
590 if ((c >= 0x3105 && c <= 0x312c))
591 return 1;
592
593 /* Hangul */
594 if ((c >= 0x1100 && c <= 0x1159)
595 || (c >= 0x1161 && c <= 0x11a2)
596 || (c >= 0x11a8 && c <= 0x11f9))
597 return 1;
598
599 /* CJK Unified Ideographs */
600 if ((c >= 0xf900 && c <= 0xfa2d)
601 || (c >= 0xfb1f && c <= 0xfb36)
602 || (c >= 0xfb38 && c <= 0xfb3c)
603 || (c == 0xfb3e)
604 || (c >= 0xfb40 && c <= 0xfb41)
605 || (c >= 0xfb42 && c <= 0xfb44)
606 || (c >= 0xfb46 && c <= 0xfbb1)
607 || (c >= 0xfbd3 && c <= 0xfd3f)
608 || (c >= 0xfd50 && c <= 0xfd8f)
609 || (c >= 0xfd92 && c <= 0xfdc7)
610 || (c >= 0xfdf0 && c <= 0xfdfb)
611 || (c >= 0xfe70 && c <= 0xfe72)
612 || (c == 0xfe74)
613 || (c >= 0xfe76 && c <= 0xfefc)
614 || (c >= 0xff21 && c <= 0xff3a)
615 || (c >= 0xff41 && c <= 0xff5a)
616 || (c >= 0xff66 && c <= 0xffbe)
617 || (c >= 0xffc2 && c <= 0xffc7)
618 || (c >= 0xffca && c <= 0xffcf)
619 || (c >= 0xffd2 && c <= 0xffd7)
620 || (c >= 0xffda && c <= 0xffdc)
621 || (c >= 0x4e00 && c <= 0x9fa5))
622 return 1;
623
624 error ("universal-character-name '\\u%04x' not valid in identifier", c);
625 return 1;
626#endif
627}
628
629/* Add the UTF-8 representation of C to the token_buffer. */
630
631static void
632utf8_extend_token (c)
633 int c;
e8bbfc4e 634{
0e5921e8
ZW
635 int shift, mask;
636
637 if (c <= 0x0000007f)
638 {
639 extend_token (c);
640 return;
641 }
642 else if (c <= 0x000007ff)
643 shift = 6, mask = 0xc0;
644 else if (c <= 0x0000ffff)
645 shift = 12, mask = 0xe0;
646 else if (c <= 0x001fffff)
647 shift = 18, mask = 0xf0;
648 else if (c <= 0x03ffffff)
649 shift = 24, mask = 0xf8;
e8bbfc4e 650 else
0e5921e8
ZW
651 shift = 30, mask = 0xfc;
652
653 extend_token (mask | (c >> shift));
654 do
655 {
656 shift -= 6;
657 extend_token ((unsigned char) (0x80 | (c >> shift)));
658 }
659 while (shift);
e8bbfc4e 660}
0e5921e8 661#endif
e8bbfc4e 662\f
0e5921e8
ZW
663int
664c_lex (value)
665 tree *value;
fbb18613 666{
4ed5bcfb 667 const cpp_token *tok;
0e5921e8
ZW
668
669 retry:
670 timevar_push (TV_CPP);
4ed5bcfb
NB
671 do
672 tok = cpp_get_token (parse_in);
673 while (tok->type == CPP_PADDING);
0e5921e8
ZW
674 timevar_pop (TV_CPP);
675
676 /* The C++ front end does horrible things with the current line
677 number. To ensure an accurate line number, we must reset it
23356f93 678 every time we return a token. */
97293897 679 lineno = src_lineno;
0e5921e8
ZW
680
681 *value = NULL_TREE;
a23c9413 682 switch (tok->type)
0e5921e8 683 {
4ed5bcfb 684 /* Issue this error here, where we can get at tok->val.c. */
0e5921e8 685 case CPP_OTHER:
4ed5bcfb
NB
686 if (ISGRAPH (tok->val.c))
687 error ("stray '%c' in program", tok->val.c);
0e5921e8 688 else
4ed5bcfb 689 error ("stray '\\%o' in program", tok->val.c);
0e5921e8
ZW
690 goto retry;
691
0e5921e8 692 case CPP_NAME:
4ed5bcfb 693 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
0e5921e8 694 break;
fbb18613 695
0e5921e8 696 case CPP_NUMBER:
ceeedfc1
NB
697 {
698 unsigned int flags = cpp_classify_number (parse_in, tok);
699
700 switch (flags & CPP_N_CATEGORY)
701 {
702 case CPP_N_INVALID:
703 /* cpplib has issued an error. */
e8f2b18d 704 *value = error_mark_node;
ceeedfc1
NB
705 break;
706
707 case CPP_N_INTEGER:
708 *value = interpret_integer (tok, flags);
709 break;
710
711 case CPP_N_FLOATING:
712 *value = interpret_float (tok, flags);
713 break;
714
715 default:
716 abort ();
717 }
718 }
0e5921e8 719 break;
93868d11 720
0e5921e8
ZW
721 case CPP_CHAR:
722 case CPP_WCHAR:
4ed5bcfb 723 *value = lex_charconst (tok);
0e5921e8 724 break;
fbb18613 725
0e5921e8
ZW
726 case CPP_STRING:
727 case CPP_WSTRING:
62ae2529
JJ
728 *value = lex_string (tok->val.str.text, tok->val.str.len,
729 tok->type == CPP_WSTRING);
0e5921e8 730 break;
fbb18613 731
0e5921e8
ZW
732 /* These tokens should not be visible outside cpplib. */
733 case CPP_HEADER_NAME:
734 case CPP_COMMENT:
735 case CPP_MACRO_ARG:
0e5921e8
ZW
736 abort ();
737
738 default: break;
739 }
740
a23c9413 741 return tok->type;
0e5921e8 742}
8d9bfdc5 743
ceeedfc1
NB
744/* Returns the narrowest C-visible unsigned type, starting with the
745 minimum specified by FLAGS, that can fit VALUE, or itk_none if
746 there isn't one. */
747static enum integer_type_kind
748narrowest_unsigned_type (value, flags)
749 tree value;
750 unsigned int flags;
0e5921e8 751{
ceeedfc1 752 enum integer_type_kind itk;
56f48ce9 753
ceeedfc1
NB
754 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
755 itk = itk_unsigned_int;
756 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
757 itk = itk_unsigned_long;
758 else
759 itk = itk_unsigned_long_long;
e8bbfc4e 760
ceeedfc1
NB
761 /* int_fits_type_p must think the type of its first argument is
762 wider than its second argument, or it won't do the proper check. */
763 TREE_TYPE (value) = widest_unsigned_literal_type_node;
e8bbfc4e 764
ceeedfc1
NB
765 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
766 if (int_fits_type_p (value, integer_types[itk]))
767 return itk;
56f48ce9 768
ceeedfc1
NB
769 return itk_none;
770}
e8bbfc4e 771
ceeedfc1
NB
772/* Ditto, but narrowest signed type. */
773static enum integer_type_kind
774narrowest_signed_type (value, flags)
775 tree value;
776 unsigned int flags;
777{
778 enum integer_type_kind itk;
e8bbfc4e 779
ceeedfc1
NB
780 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
781 itk = itk_int;
782 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
783 itk = itk_long;
784 else
785 itk = itk_long_long;
e8bbfc4e 786
ceeedfc1
NB
787 /* int_fits_type_p must think the type of its first argument is
788 wider than its second argument, or it won't do the proper check. */
789 TREE_TYPE (value) = widest_unsigned_literal_type_node;
e8bbfc4e 790
ceeedfc1
NB
791 for (; itk < itk_none; itk += 2 /* skip signed types */)
792 if (int_fits_type_p (value, integer_types[itk]))
793 return itk;
15e5ad76 794
ceeedfc1
NB
795 return itk_none;
796}
15e5ad76 797
ceeedfc1
NB
798/* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
799static tree
800interpret_integer (token, flags)
801 const cpp_token *token;
802 unsigned int flags;
803{
804 tree value, type;
805 enum integer_type_kind itk;
806 cpp_num integer;
807 cpp_options *options = cpp_get_options (parse_in);
808
809 integer = cpp_interpret_integer (parse_in, token, flags);
810 integer = cpp_num_sign_extend (integer, options->precision);
811 value = build_int_2_wide (integer.low, integer.high);
812
813 /* The type of a constant with a U suffix is straightforward. */
814 if (flags & CPP_N_UNSIGNED)
815 itk = narrowest_unsigned_type (value, flags);
0e5921e8
ZW
816 else
817 {
ceeedfc1
NB
818 /* The type of a potentially-signed integer constant varies
819 depending on the base it's in, the standard in use, and the
820 length suffixes. */
821 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
822 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
823
824 /* In both C89 and C99, octal and hex constants may be signed or
825 unsigned, whichever fits tighter. We do not warn about this
826 choice differing from the traditional choice, as the constant
827 is probably a bit pattern and either way will work. */
828 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
829 itk = MIN (itk_u, itk_s);
830 else
0e5921e8 831 {
ceeedfc1
NB
832 /* In C99, decimal constants are always signed.
833 In C89, decimal constants that don't fit in long have
8d9afc4e 834 undefined behavior; we try to make them unsigned long.
ceeedfc1
NB
835 In GCC's extended C89, that last is true of decimal
836 constants that don't fit in long long, too. */
837
838 itk = itk_s;
839 if (itk_s > itk_u && itk_s > itk_long)
0e5921e8 840 {
ceeedfc1 841 if (!flag_isoc99)
0e5921e8 842 {
ceeedfc1
NB
843 if (itk_u < itk_unsigned_long)
844 itk_u = itk_unsigned_long;
845 itk = itk_u;
56508306 846 warning ("this decimal constant is unsigned only in ISO C90");
0e5921e8 847 }
ceeedfc1 848 else if (warn_traditional)
56508306 849 warning ("this decimal constant would be unsigned in ISO C90");
0e5921e8
ZW
850 }
851 }
ceeedfc1 852 }
56f48ce9 853
ceeedfc1
NB
854 if (itk == itk_none)
855 /* cpplib has already issued a warning for overflow. */
856 type = ((flags & CPP_N_UNSIGNED)
857 ? widest_unsigned_literal_type_node
858 : widest_integer_literal_type_node);
859 else
860 type = integer_types[itk];
e8bbfc4e 861
ceeedfc1
NB
862 if (itk > itk_unsigned_long
863 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
864 && ! in_system_header && ! flag_isoc99)
865 pedwarn ("integer constant is too large for \"%s\" type",
866 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
0468bc75 867
ceeedfc1 868 TREE_TYPE (value) = type;
e8bbfc4e 869
ceeedfc1
NB
870 /* Convert imaginary to a complex type. */
871 if (flags & CPP_N_IMAGINARY)
872 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
e8bbfc4e 873
ceeedfc1
NB
874 return value;
875}
e8bbfc4e 876
ceeedfc1
NB
877/* Interpret TOKEN, a floating point number with FLAGS as classified
878 by cpplib. */
879static tree
880interpret_float (token, flags)
881 const cpp_token *token;
882 unsigned int flags;
883{
884 tree type;
885 tree value;
886 REAL_VALUE_TYPE real;
887 char *copy;
888 size_t copylen;
889 const char *typename;
e8bbfc4e 890
ceeedfc1
NB
891 /* FIXME: make %T work in error/warning, then we don't need typename. */
892 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
893 {
894 type = long_double_type_node;
895 typename = "long double";
896 }
897 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
898 || flag_single_precision_constant)
899 {
900 type = float_type_node;
901 typename = "float";
902 }
903 else
904 {
905 type = double_type_node;
906 typename = "double";
907 }
e8bbfc4e 908
ceeedfc1
NB
909 /* Copy the constant to a nul-terminated buffer. If the constant
910 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
911 can't handle them. */
912 copylen = token->val.str.len;
913 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
914 /* Must be an F or L suffix. */
915 copylen--;
916 if (flags & CPP_N_IMAGINARY)
917 /* I or J suffix. */
918 copylen--;
919
920 copy = alloca (copylen + 1);
921 memcpy (copy, token->val.str.text, copylen);
922 copy[copylen] = '\0';
923
efdc7e19
RH
924 real_from_string (&real, copy);
925 real_convert (&real, TYPE_MODE (type), &real);
fbb18613 926
ceeedfc1
NB
927 /* A diagnostic is required for "soft" overflow by some ISO C
928 testsuites. This is not pedwarn, because some people don't want
929 an error for this.
930 ??? That's a dubious reason... is this a mandatory diagnostic or
931 isn't it? -- zw, 2001-08-21. */
932 if (REAL_VALUE_ISINF (real) && pedantic)
933 warning ("floating constant exceeds range of \"%s\"", typename);
fbb18613 934
ceeedfc1
NB
935 /* Create a node with determined type and value. */
936 value = build_real (type, real);
937 if (flags & CPP_N_IMAGINARY)
938 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
e8bbfc4e 939
0e5921e8 940 return value;
0e5921e8 941}
e8bbfc4e 942
0e5921e8
ZW
943static tree
944lex_string (str, len, wide)
62ae2529 945 const unsigned char *str;
0e5921e8
ZW
946 unsigned int len;
947 int wide;
948{
949 tree value;
950 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
951 char *q = buf;
62ae2529 952 const unsigned char *p = str, *limit = str + len;
4268e8bb 953 cppchar_t c;
e9a25f70 954
0e5921e8
ZW
955#ifdef MULTIBYTE_CHARS
956 /* Reset multibyte conversion state. */
9714cf43 957 (void) local_mbtowc (NULL, NULL, 0);
0e5921e8 958#endif
e9a25f70 959
0e5921e8
ZW
960 while (p < limit)
961 {
962#ifdef MULTIBYTE_CHARS
963 wchar_t wc;
964 int char_len;
965
62ae2529 966 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
0e5921e8
ZW
967 if (char_len == -1)
968 {
53fcdc76 969 warning ("ignoring invalid multibyte character");
0e5921e8
ZW
970 char_len = 1;
971 c = *p++;
972 }
973 else
974 {
975 p += char_len;
976 c = wc;
977 }
978#else
979 c = *p++;
980#endif
981
982 if (c == '\\' && !ignore_escape_flag)
4268e8bb 983 c = cpp_parse_escape (parse_in, &p, limit, wide);
0e5921e8 984
64cdc383
MH
985 /* Add this single character into the buffer either as a wchar_t,
986 a multibyte sequence, or as a single byte. */
0e5921e8
ZW
987 if (wide)
988 {
989 unsigned charwidth = TYPE_PRECISION (char_type_node);
5c80f6e6 990 unsigned bytemask = (1 << charwidth) - 1;
0e5921e8
ZW
991 int byte;
992
993 for (byte = 0; byte < WCHAR_BYTES; ++byte)
994 {
995 int n;
996 if (byte >= (int) sizeof (c))
997 n = 0;
998 else
999 n = (c >> (byte * charwidth)) & bytemask;
1000 if (BYTES_BIG_ENDIAN)
1001 q[WCHAR_BYTES - byte - 1] = n;
1002 else
1003 q[byte] = n;
1004 }
1005 q += WCHAR_BYTES;
1006 }
64cdc383
MH
1007#ifdef MULTIBYTE_CHARS
1008 else if (char_len > 1)
1009 {
4b7e68e7 1010 /* We're dealing with a multibyte character. */
64cdc383
MH
1011 for ( ; char_len >0; --char_len)
1012 {
1013 *q++ = *(p - char_len);
1014 }
1015 }
1016#endif
0e5921e8
ZW
1017 else
1018 {
1019 *q++ = c;
1020 }
e8bbfc4e
RK
1021 }
1022
0e5921e8
ZW
1023 /* Terminate the string value, either with a single byte zero
1024 or with a wide zero. */
e8bbfc4e 1025
0e5921e8
ZW
1026 if (wide)
1027 {
1028 memset (q, 0, WCHAR_BYTES);
1029 q += WCHAR_BYTES;
1030 }
1031 else
1032 {
1033 *q++ = '\0';
1034 }
1035
1036 value = build_string (q - buf, buf);
1037
1038 if (wide)
1039 TREE_TYPE (value) = wchar_array_type_node;
1040 else
1041 TREE_TYPE (value) = char_array_type_node;
e8bbfc4e
RK
1042 return value;
1043}
1044
c8a96070 1045/* Converts a (possibly wide) character constant token into a tree. */
0e5921e8 1046static tree
c8a96070
NB
1047lex_charconst (token)
1048 const cpp_token *token;
e8bbfc4e 1049{
4268e8bb 1050 cppchar_t result;
9340544b 1051 tree type, value;
c8a96070 1052 unsigned int chars_seen;
4268e8bb 1053 int unsignedp;
ceeedfc1 1054
a5a49440 1055 result = cpp_interpret_charconst (parse_in, token,
4268e8bb 1056 &chars_seen, &unsignedp);
9340544b 1057
4268e8bb
NB
1058 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1059 before possibly widening to HOST_WIDE_INT for build_int_2. */
1060 if (unsignedp || (cppchar_signed_t) result >= 0)
1061 value = build_int_2 (result, 0);
1062 else
1063 value = build_int_2 ((cppchar_signed_t) result, -1);
9340544b 1064
4268e8bb
NB
1065 if (token->type == CPP_WCHAR)
1066 type = wchar_type_node;
1067 /* In C, a character constant has type 'int'.
1068 In C++ 'char', but multi-char charconsts have type 'int'. */
0f7866e7 1069 else if ((c_language == clk_c) || chars_seen > 1)
4268e8bb
NB
1070 type = integer_type_node;
1071 else
1072 type = char_type_node;
9340544b 1073
4268e8bb 1074 TREE_TYPE (value) = type;
0e5921e8 1075 return value;
e8bbfc4e 1076}
This page took 1.079842 seconds and 5 git commands to generate.