1 /* Parse C expressions for CCCP.
2 Copyright (C) 1987, 1992 Free Software Foundation.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding!
22 Adapted from expread.y of GDB by Paul Rubin, July 1986.
24 /* Parse a C expression from text in a string */
29 /* #define YYDEBUG 1 */
31 #ifdef MULTIBYTE_CHARS
38 typedef unsigned char U_CHAR;
40 /* This is used for communicating lists of keywords with cccp.c. */
48 /* Define a generic NULL if one hasn't already been defined. */
55 #if defined (USE_PROTOTYPES) ? USE_PROTOTYPES : defined (__STDC__)
56 #define GENERIC_PTR void *
58 #define GENERIC_PTR char *
63 #define NULL_PTR ((GENERIC_PTR)0)
70 static jmp_buf parse_return_error;
72 /* Nonzero means count most punctuation as part of a name. */
73 static int keyword_parsing = 0;
75 /* some external tables of character types */
76 extern unsigned char is_idstart[], is_idchar[], is_hor_space[];
78 extern char *xmalloc ();
80 /* Flag for -pedantic. */
83 /* Flag for -traditional. */
84 extern int traditional;
86 #ifndef CHAR_TYPE_SIZE
87 #define CHAR_TYPE_SIZE BITS_PER_UNIT
91 #define INT_TYPE_SIZE BITS_PER_WORD
94 #ifndef LONG_TYPE_SIZE
95 #define LONG_TYPE_SIZE BITS_PER_WORD
98 #ifndef WCHAR_TYPE_SIZE
99 #define WCHAR_TYPE_SIZE INT_TYPE_SIZE
104 struct constant {long value; int unsignedp;} integer;
105 struct name {U_CHAR *address; int length;} name;
106 struct arglist *keywords;
111 %type <integer> exp exp1 start
112 %type <keywords> keywords
113 %token <integer> INT CHAR
115 %token <integer> ERROR
125 %left '<' '>' LEQ GEQ
136 { expression_value = $1.value; }
139 /* Expressions, including the comma operator. */
143 pedwarn ("comma operator in operand of `#if'");
147 /* Expressions, not including the comma operator. */
148 exp : '-' exp %prec UNARY
149 { $$.value = - $2.value;
150 $$.unsignedp = $2.unsignedp; }
151 | '!' exp %prec UNARY
152 { $$.value = ! $2.value;
154 | '+' exp %prec UNARY
156 | '~' exp %prec UNARY
157 { $$.value = ~ $2.value;
158 $$.unsignedp = $2.unsignedp; }
160 { $$.value = check_assertion ($2.address, $2.length,
164 { keyword_parsing = 1; }
166 { $$.value = check_assertion ($2.address, $2.length,
174 /* Binary operators in order of decreasing precedence. */
176 { $$.unsignedp = $1.unsignedp || $3.unsignedp;
178 $$.value = (unsigned) $1.value * $3.value;
180 $$.value = $1.value * $3.value; }
184 error ("division by zero in #if");
187 $$.unsignedp = $1.unsignedp || $3.unsignedp;
189 $$.value = (unsigned) $1.value / $3.value;
191 $$.value = $1.value / $3.value; }
195 error ("division by zero in #if");
198 $$.unsignedp = $1.unsignedp || $3.unsignedp;
200 $$.value = (unsigned) $1.value % $3.value;
202 $$.value = $1.value % $3.value; }
204 { $$.value = $1.value + $3.value;
205 $$.unsignedp = $1.unsignedp || $3.unsignedp; }
207 { $$.value = $1.value - $3.value;
208 $$.unsignedp = $1.unsignedp || $3.unsignedp; }
210 { $$.unsignedp = $1.unsignedp;
212 $$.value = (unsigned) $1.value << $3.value;
214 $$.value = $1.value << $3.value; }
216 { $$.unsignedp = $1.unsignedp;
218 $$.value = (unsigned) $1.value >> $3.value;
220 $$.value = $1.value >> $3.value; }
222 { $$.value = ($1.value == $3.value);
225 { $$.value = ($1.value != $3.value);
229 if ($1.unsignedp || $3.unsignedp)
230 $$.value = (unsigned) $1.value <= $3.value;
232 $$.value = $1.value <= $3.value; }
235 if ($1.unsignedp || $3.unsignedp)
236 $$.value = (unsigned) $1.value >= $3.value;
238 $$.value = $1.value >= $3.value; }
241 if ($1.unsignedp || $3.unsignedp)
242 $$.value = (unsigned) $1.value < $3.value;
244 $$.value = $1.value < $3.value; }
247 if ($1.unsignedp || $3.unsignedp)
248 $$.value = (unsigned) $1.value > $3.value;
250 $$.value = $1.value > $3.value; }
252 { $$.value = $1.value & $3.value;
253 $$.unsignedp = $1.unsignedp || $3.unsignedp; }
255 { $$.value = $1.value ^ $3.value;
256 $$.unsignedp = $1.unsignedp || $3.unsignedp; }
258 { $$.value = $1.value | $3.value;
259 $$.unsignedp = $1.unsignedp || $3.unsignedp; }
261 { $$.value = ($1.value && $3.value);
264 { $$.value = ($1.value || $3.value);
266 | exp '?' exp ':' exp
267 { $$.value = $1.value ? $3.value : $5.value;
268 $$.unsignedp = $3.unsignedp || $5.unsignedp; }
270 { $$ = yylval.integer; }
272 { $$ = yylval.integer; }
280 | '(' keywords ')' keywords
281 { struct arglist *temp;
282 $$ = (struct arglist *) xmalloc (sizeof (struct arglist));
284 $$->name = (U_CHAR *) "(";
287 while (temp != 0 && temp->next != 0)
289 temp->next = (struct arglist *) xmalloc (sizeof (struct arglist));
290 temp->next->next = $4;
291 temp->next->name = (U_CHAR *) ")";
292 temp->next->length = 1; }
294 { $$ = (struct arglist *) xmalloc (sizeof (struct arglist));
295 $$->name = $1.address;
296 $$->length = $1.length;
301 /* During parsing of a C expression, the pointer to the next character
302 is in this variable. */
306 /* Take care of parsing a number (anything that starts with a digit).
307 Set yylval and return the token type; update lexptr.
308 LEN is the number of characters in it. */
310 /* maybe needs to actually deal with floating point numbers */
316 register char *p = lexptr;
319 register int base = 10;
320 register int len = olen;
322 for (c = 0; c < len; c++)
324 /* It's a float since it contains a point. */
325 yyerror ("floating point numbers not allowed in #if expressions");
329 yylval.integer.unsignedp = 0;
331 if (len >= 3 && (!strncmp (p, "0x", 2) || !strncmp (p, "0X", 2))) {
342 if (c >= 'A' && c <= 'Z') c += 'a' - 'A';
344 if (c >= '0' && c <= '9') {
347 } else if (base == 16 && c >= 'a' && c <= 'f') {
351 /* `l' means long, and `u' means unsigned. */
353 if (c == 'l' || c == 'L')
355 else if (c == 'u' || c == 'U')
356 yylval.integer.unsignedp = 1;
365 /* Don't look for any more digits after the suffixes. */
371 yyerror ("Invalid number in #if expression");
375 /* If too big to be signed, consider it unsigned. */
377 yylval.integer.unsignedp = 1;
380 yylval.integer.value = n;
389 static struct token tokentab2[] = {
403 /* Read one token, getting characters through lexptr. */
409 register int namelen;
410 register char *tokstart;
411 register struct token *toktab;
418 /* See if it is a special token of length 2. */
419 if (! keyword_parsing)
420 for (toktab = tokentab2; toktab->operator != NULL; toktab++)
421 if (c == *toktab->operator && tokstart[1] == toktab->operator[1]) {
423 if (toktab->token == ERROR)
425 char *buf = (char *) alloca (40);
426 sprintf (buf, "`%s' not allowed in operand of `#if'", toktab->operator);
429 return toktab->token;
444 /* Capital L may start a wide-string or wide-character constant. */
445 if (lexptr[1] == '\'')
451 if (lexptr[1] == '"')
455 goto string_constant;
463 if (keyword_parsing) {
464 char *start_ptr = lexptr - 1;
468 c = parse_escape (&lexptr);
472 yylval.name.address = (U_CHAR *) tokstart;
473 yylval.name.length = lexptr - start_ptr;
477 /* This code for reading a character constant
478 handles multicharacter constants and wide characters.
479 It is mostly copied from c-lex.c. */
481 register int result = 0;
482 register num_chars = 0;
483 unsigned width = CHAR_TYPE_SIZE;
489 width = WCHAR_TYPE_SIZE;
490 #ifdef MULTIBYTE_CHARS
491 max_chars = MB_CUR_MAX;
497 max_chars = LONG_TYPE_SIZE / width;
499 token_buffer = (char *) alloca (max_chars + 1);
505 if (c == '\'' || c == EOF)
510 c = parse_escape (&lexptr);
511 if (width < HOST_BITS_PER_INT
512 && (unsigned) c >= (1 << width))
513 pedwarn ("escape sequence out of range for character");
518 /* Merge character into result; ignore excess chars. */
519 if (num_chars < max_chars + 1)
521 if (width < HOST_BITS_PER_INT)
522 result = (result << width) | (c & ((1 << width) - 1));
525 token_buffer[num_chars - 1] = c;
529 token_buffer[num_chars] = 0;
532 error ("malformatted character constant");
533 else if (num_chars == 0)
534 error ("empty character constant");
535 else if (num_chars > max_chars)
537 num_chars = max_chars;
538 error ("character constant too long");
540 else if (num_chars != 1 && ! traditional)
541 warning ("multi-character character constant");
543 /* If char type is signed, sign-extend the constant. */
546 int num_bits = num_chars * width;
548 if (lookup ("__CHAR_UNSIGNED__", sizeof ("__CHAR_UNSIGNED__")-1, -1)
549 || ((result >> (num_bits - 1)) & 1) == 0)
551 = result & ((unsigned) ~0 >> (HOST_BITS_PER_INT - num_bits));
554 = result | ~((unsigned) ~0 >> (HOST_BITS_PER_INT - num_bits));
558 #ifdef MULTIBYTE_CHARS
559 /* Set the initial shift state and convert the next sequence. */
561 /* In all locales L'\0' is zero and mbtowc will return zero,
564 || (num_chars == 1 && token_buffer[0] != '\0'))
567 (void) mbtowc (NULL_PTR, NULL_PTR, 0);
568 if (mbtowc (& wc, token_buffer, num_chars) == num_chars)
571 warning ("Ignoring invalid multibyte character");
574 yylval.integer.value = result;
578 /* This is always a signed type. */
579 yylval.integer.unsignedp = 0;
583 /* some of these chars are invalid in constant expressions;
584 maybe do something about them later */
617 if (keyword_parsing) {
618 char *start_ptr = lexptr;
623 c = parse_escape (&lexptr);
627 yylval.name.address = (U_CHAR *) tokstart;
628 yylval.name.length = lexptr - start_ptr;
631 yyerror ("string constants not allowed in #if expressions");
635 if (c >= '0' && c <= '9' && !keyword_parsing) {
638 c = tokstart[namelen], is_idchar[c] || c == '.';
641 return parse_number (namelen);
644 /* It is a name. See how long it is. */
646 if (keyword_parsing) {
647 for (namelen = 0;; namelen++) {
648 if (is_hor_space[tokstart[namelen]])
650 if (tokstart[namelen] == '(' || tokstart[namelen] == ')')
652 if (tokstart[namelen] == '"' || tokstart[namelen] == '\'')
656 if (!is_idstart[c]) {
657 yyerror ("Invalid token in expression");
661 for (namelen = 0; is_idchar[tokstart[namelen]]; namelen++)
666 yylval.name.address = (U_CHAR *) tokstart;
667 yylval.name.length = namelen;
672 /* Parse a C escape sequence. STRING_PTR points to a variable
673 containing a pointer to the string to parse. That pointer
674 is updated past the characters we use. The value of the
675 escape sequence is returned.
677 A negative value means the sequence \ newline was seen,
678 which is supposed to be equivalent to nothing at all.
680 If \ is followed by a null character, we return a negative
681 value and leave the string pointer pointing at the null character.
683 If \ is followed by 000, we return 0 and leave the string pointer
684 after the zeros. A value of 0 does not mean end of string. */
687 parse_escape (string_ptr)
690 register int c = *(*string_ptr)++;
702 return TARGET_NEWLINE;
715 c = *(*string_ptr)++;
717 c = parse_escape (string_ptr);
720 return (c & 0200) | (c & 037);
731 register int i = c - '0';
732 register int count = 0;
735 c = *(*string_ptr)++;
736 if (c >= '0' && c <= '7')
737 i = (i << 3) + c - '0';
744 if ((i & ~((1 << CHAR_TYPE_SIZE) - 1)) != 0)
746 i &= (1 << CHAR_TYPE_SIZE) - 1;
747 warning ("octal character constant does not fit in a byte");
756 c = *(*string_ptr)++;
757 if (c >= '0' && c <= '9')
758 i = (i << 4) + c - '0';
759 else if (c >= 'a' && c <= 'f')
760 i = (i << 4) + c - 'a' + 10;
761 else if (c >= 'A' && c <= 'F')
762 i = (i << 4) + c - 'A' + 10;
769 if ((i & ~((1 << BITS_PER_UNIT) - 1)) != 0)
771 i &= (1 << BITS_PER_UNIT) - 1;
772 warning ("hex character constant does not fit in a byte");
786 longjmp (parse_return_error, 1);
789 /* This page contains the entry point to this file. */
791 /* Parse STRING as an expression, and complain if this fails
792 to use up all of the contents of STRING. */
793 /* We do not support C comments. They should be removed before
794 this function is called. */
797 parse_c_expression (string)
802 if (lexptr == 0 || *lexptr == 0) {
803 error ("empty #if expression");
804 return 0; /* don't include the #if group */
807 /* if there is some sort of scanning error, just return 0 and assume
808 the parsing routine has printed an error message somewhere.
809 there is surely a better thing to do than this. */
810 if (setjmp (parse_return_error))
814 return 0; /* actually this is never reached
815 the way things stand. */
817 error ("Junk after end of expression.");
819 return expression_value; /* set by yyparse () */
822 #ifdef TEST_EXP_READER
825 /* Main program for testing purposes. */
835 initialize_random_junk ();
838 printf ("enter expression: ");
840 while ((buf[n] = getchar ()) != '\n' && buf[n] != EOF)
845 printf ("parser returned %d\n", parse_c_expression (buf));
851 /* table to tell if char can be part of a C identifier. */
852 unsigned char is_idchar[256];
853 /* table to tell if char can be first char of a c identifier. */
854 unsigned char is_idstart[256];
855 /* table to tell if c is horizontal space. isspace () thinks that
856 newline is space; this is not a good idea for this program. */
857 char is_hor_space[256];
860 * initialize random junk in the hash table and maybe other places
862 initialize_random_junk ()
867 * Set up is_idchar and is_idstart tables. These should be
868 * faster than saying (is_alpha (c) || c == '_'), etc.
869 * Must do set up these things before calling any routines tthat
872 for (i = 'a'; i <= 'z'; i++) {
873 ++is_idchar[i - 'a' + 'A'];
875 ++is_idstart[i - 'a' + 'A'];
878 for (i = '0'; i <= '9'; i++)
882 #if DOLLARS_IN_IDENTIFIERS
887 /* horizontal space table */
889 ++is_hor_space['\t'];
894 printf ("error: %s\n", msg);
899 printf ("warning: %s\n", msg);
903 lookup (name, len, hash)
908 return (DEFAULT_SIGNED_CHAR) ? 0 : ((struct hashnode *) -1);