cpplib: more charconst fixes. [was: 4 GCC regressions, 4 new...]

Sun May 5 08:49:00 GMT 2002

This CPP arithmetic precision work has been well worth it; it has exposed
a bunch of cross-compiler bugs.  My recent charconst improvements broke
the powerpc-eabisim cross (1 regression in charconst.c, the rest were
not my patch).  They exposed a latent bug that was being papered over
before.

The problem was

int c;
c = 'very long';

producing an "overflow in implicit constant conversion" message.
cpplib allows as many chars in a charconst as can fit in the type it
uses for valuing arbitrary charconsts on the host, which can produce
some non-wide constants that are very large.

This is wrong of course; it should only allow however many fit into the
target int (both C and C++).  So CPP needs another precision variable
with this information, and to use it in the cpp_interpret_charconst.
Done with the patch below.  This required setting the precision from
the front end as it is not accessible to cpplib directly.  So whilst
I was at it I also set the char_precision and wchar_precision properly.
This leaves CPP arithmetic precision still to be set correctly
and implemented.

I also took the opportunity to further expand on the sanity checks
performed by cpplib if ENABLE_CHECKING.  I think the patch also obsoletes
MAX_CHAR_TYPE_SIZE as cpplib assumes a target wchar_t is at least as
big as a target char; I'll remove it in some future patch.  Finally,
the test file charconst.c uses int for all assignments; it should really
use int for the charconsts and __WCHAR_TYPE__ for the wide charconsts,
and the comment that we don't do a test for L'ab' because it depends on
sizeof (wchar_t) is wrong; it was only a problem previously because the
charconst code was so broken.  So I've corrected these issues too.

I've confirmed this patch fixes the "regression", and bootstrapped
x86 Linux.

Incidentally, looking into this made me think we don't do the right
thing for multi-character charconsts.  We give them the same signedness
as target char; I tend to think they should be signed just like their
"int" type is.  Also, if target char is signed, we calculate their value
in the most wacky way, since each char's value is not sign-extended
to int, but kept in the narrow target char and or-ed into the target int.
Finally, in view of the way we calculate their value (first char is most
significant), it seems odd to ignore excess trailing chars rather than
excess leading chars like normal C arithmetic would when overflowing.
Do you agree with these points, Zack?  I'll come up with a patch for
these issues, too.

Neil.

	* c-common.c (c_common_init): Set up CPP arithmetic.
	* cppinit.c (cpp_create_reader): Default CPP arithmetic to
	something reasonable for the host.
	(sanity_checks): Add checks.
	(cpp_read_main_file): Call sanity_checks() from here...
	(cpp_post_options): ... not here.
	* cpplex.c (cpp_interpret_charconst): Get max_chars right.
	* cpplib.h (struct cpp_options): New member int_precision.
testsuite:
	* gcc.dg/cpp/charconst.c: Update tests.

============================================================
Index: gcc/c-common.c

--- gcc/c-common.c	29 Apr 2002 18:40:42 -0000	1.316
+++ gcc/c-common.c	5 May 2002 15:19:12 -0000
@@ -4299,6 +4299,14 @@ const char *
 c_common_init (filename)
      const char *filename;
 {
+  cpp_options *options = cpp_get_options (parse_in);
+
+  /* Set up preprocessor arithmetic.  Must be done after call to
+     c_common_nodes_and_builtins for wchar_type_node to be good.  */
+  options->char_precision = TYPE_PRECISION (char_type_node);
+  options->int_precision = TYPE_PRECISION (integer_type_node);
+  options->wchar_precision = TYPE_PRECISION (wchar_type_node);
+
   /* NULL is passed up to toplev.c and we exit quickly.  */
   if (flag_preprocess_only)
     {
============================================================
Index: gcc/cppinit.c
--- gcc/cppinit.c	4 May 2002 20:14:58 -0000	1.218
+++ gcc/cppinit.c	5 May 2002 15:19:17 -0000
@@ -502,14 +502,13 @@ cpp_create_reader (lang)
   CPP_OPTION (pfile, pending) =
     (struct cpp_pending *) xcalloc (1, sizeof (struct cpp_pending));
 
-  /* CPP arithmetic done to existing rules for now.  */
+  /* Default CPP arithmetic to something sensible for the host for the
+     benefit of dumb users like fix-header.  */
 #define BITS_PER_HOST_WIDEST_INT (CHAR_BIT * sizeof (HOST_WIDEST_INT))
   CPP_OPTION (pfile, precision) = BITS_PER_HOST_WIDEST_INT;
-#ifndef MAX_CHAR_TYPE_SIZE
-#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
-#endif
-  CPP_OPTION (pfile, char_precision) = MAX_CHAR_TYPE_SIZE;
-  CPP_OPTION (pfile, wchar_precision) = MAX_WCHAR_TYPE_SIZE;
+  CPP_OPTION (pfile, char_precision) = CHAR_BIT;
+  CPP_OPTION (pfile, wchar_precision) = CHAR_BIT * sizeof (int);
+  CPP_OPTION (pfile, int_precision) = CHAR_BIT * sizeof (int);
 
   /* It's simplest to just create this struct whether or not it will
      be needed.  */
@@ -923,6 +922,50 @@ free_chain (head)
     }
 }
 
+/* Sanity-checks are dependent on command-line options, so it is
+   called as a subroutine of cpp_read_main_file ().  */
+#if ENABLE_CHECKING
+static void sanity_checks PARAMS ((cpp_reader *));
+static void sanity_checks (pfile)
+     cpp_reader *pfile;
+{
+  cppchar_t test = 0;
+
+  /* Sanity checks for assumptions about CPP arithmetic and target
+     type precisions made by cpplib.  */
+  test--;
+  if (test < 1)
+    cpp_error (pfile, DL_FATAL, "cppchar_t must be an unsigned type");
+
+  if (CPP_OPTION (pfile, precision) > BITS_PER_HOST_WIDEST_INT)
+    cpp_error (pfile, DL_FATAL,
+	       "preprocessor arithmetic has maximum precision of %u bits; target requires %u bits",
+	       BITS_PER_HOST_WIDEST_INT, CPP_OPTION (pfile, precision));
+
+  if (CPP_OPTION (pfile, precision) < CPP_OPTION (pfile, int_precision))
+    cpp_error (pfile, DL_FATAL,
+	       "CPP arithmetic must be at least as precise as a target int");
+
+  if (CPP_OPTION (pfile, char_precision) < 8)
+    cpp_error (pfile, DL_FATAL, "target char is less than 8 bits wide");
+
+  if (CPP_OPTION (pfile, wchar_precision) < CPP_OPTION (pfile, char_precision))
+    cpp_error (pfile, DL_FATAL,
+	       "target wchar_t is narrower than target char");
+
+  if (CPP_OPTION (pfile, int_precision) < CPP_OPTION (pfile, char_precision))
+    cpp_error (pfile, DL_FATAL,
+	       "target int is narrower than target char");
+
+  if (CPP_OPTION (pfile, wchar_precision) > BITS_PER_CPPCHAR_T)
+    cpp_error (pfile, DL_FATAL,
+	       "CPP on this host cannot handle wide character constants over %u bits, but the target requires %u bits",
+	       BITS_PER_CPPCHAR_T, CPP_OPTION (pfile, wchar_precision));
+}
+#else
+# define sanity_checks(PFILE)
+#endif
+
 /* This is called after options have been parsed, and partially
    processed.  Setup for processing input from the file named FNAME,
    or stdin if it is the empty string.  Return the original filename
@@ -933,6 +976,8 @@ cpp_read_main_file (pfile, fname, table)
      const char *fname;
      hash_table *table;
 {
+  sanity_checks (pfile);
+
   /* The front ends don't set up the hash table until they have
      finished processing the command line options, so initializing the
      hashtable is deferred until now.  */
@@ -1788,46 +1833,12 @@ cpp_handle_options (pfile, argc, argv)
   return i;
 }
 
-/* Sanity-checks are dependent on command-line options, so it is
-   called as a subroutine of cpp_post_options ().  */
-#if ENABLE_CHECKING
-static void sanity_checks PARAMS ((cpp_reader *));
-static void sanity_checks (pfile)
-     cpp_reader *pfile;
-{
-  cppchar_t test = 0;
-  size_t max_prec;
-
-  /* Sanity checks for CPP arithmetic.  */
-  test--;
-  if (test < 1)
-    cpp_error (pfile, DL_FATAL, "cppchar_t must be an unsigned type");
-
-  if (CPP_OPTION (pfile, precision) > BITS_PER_HOST_WIDEST_INT)
-    cpp_error (pfile, DL_FATAL,
-	       "preprocessor arithmetic has maximum precision of %u bits; target requires %u bits",
-	       BITS_PER_HOST_WIDEST_INT, CPP_OPTION (pfile, precision));
-
-  max_prec = CPP_OPTION (pfile, char_precision);
-  if (max_prec < CPP_OPTION (pfile, wchar_precision))
-    max_prec = CPP_OPTION (pfile, wchar_precision);
-  if (max_prec > BITS_PER_CPPCHAR_T)
-    cpp_error (pfile, DL_FATAL,
-	       "CPP on this host cannot handle (wide) character constants over %u bits, but the target requires %u bits",
-	       BITS_PER_CPPCHAR_T, max_prec);
-}
-#else
-# define sanity_checks(PFILE)
-#endif
-
 /* Extra processing when all options are parsed, after all calls to
    cpp_handle_option[s].  Consistency checks etc.  */
 void
 cpp_post_options (pfile)
      cpp_reader *pfile;
 {
-  sanity_checks (pfile);
-
   if (pfile->print_version)
     {
       fprintf (stderr, _("GNU CPP version %s (cpplib)"), version_string);
============================================================
Index: gcc/cpplex.c
--- gcc/cpplex.c	4 May 2002 19:42:01 -0000	1.200
+++ gcc/cpplex.c	5 May 2002 15:19:22 -0000
@@ -1883,11 +1883,13 @@ cpp_interpret_charconst (pfile, token, w
   if (token->type == CPP_CHAR)
     {
       width = CPP_OPTION (pfile, char_precision);
+      max_chars = CPP_OPTION (pfile, int_precision) / width;
       unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
     }
   else
     {
       width = CPP_OPTION (pfile, wchar_precision);
+      max_chars = 1;
       unsigned_p = WCHAR_UNSIGNED;
     }
 
@@ -1895,7 +1897,6 @@ cpp_interpret_charconst (pfile, token, w
     mask = ((cppchar_t) 1 << width) - 1;
   else
     mask = ~0;
-  max_chars = BITS_PER_CPPCHAR_T / width;
 
   while (str < limit)
     {
============================================================
Index: gcc/cpplib.h
--- gcc/cpplib.h	4 May 2002 20:14:58 -0000	1.213
+++ gcc/cpplib.h	5 May 2002 15:19:24 -0000
@@ -251,9 +251,9 @@ struct cpp_options
   /* -fleading_underscore sets this to "_".  */
   const char *user_label_prefix;
 
-  /* Precision for target CPP arithmetic, target characters and target
-     wide characters, respectively.  */
-  size_t precision, char_precision, wchar_precision;
+  /* Precision for target CPP arithmetic, target characters, target
+     ints and target wide characters, respectively.  */
+  size_t precision, char_precision, int_precision, wchar_precision;
 
   /* The language we're preprocessing.  */
   enum c_lang lang;
============================================================
Index: gcc/testsuite/gcc.dg/cpp/charconst.c
--- gcc/testsuite/gcc.dg/cpp/charconst.c	23 May 2001 22:50:28 -0000	1.1
+++ gcc/testsuite/gcc.dg/cpp/charconst.c	5 May 2002 15:19:24 -0000
@@ -21,13 +21,16 @@
 
 void foo ()
 {
-  int c = '';		/* { dg-warning "empty" "empty charconst" } */
-  c = L'';		/* { dg-warning "empty" "empty wide charconst" } */
+  int c;
+  __WCHAR_TYPE__ w;
+
+  c = '';		/* { dg-warning "empty" "empty charconst" } */
+  w = L'';		/* { dg-warning "empty" "empty wide charconst" } */
 
   c = 'very long';	/* { dg-warning "too long" "long charconst" } */
-  c = L'very long';	/* { dg-warning "too long" "long wide charconst" } */
+  w = L'very long';	/* { dg-warning "too long" "long wide charconst" } */
 
-  /* Don't do this test for L'ab'; it depends upon sizeof (wchar_t).  */
-  c = 'ab';		/* { dg-warning "multi-char" "multi-character" } */
- 
+  c = 'ab';		/* { dg-warning "multi-char" "multi-char" } */
+  /* Wide charconsts cannot contain more than one wide character.  */
+  w = L'ab';		/* { dg-warning "too long" "multi-char wide" } */
 }