This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Further patch to string constant initializer handling


Another fix for a bug found in reading the code while fixing something
else: the rules for what arrays can be initialized by what strings
were wrong.  Arrays of character type, which appears to exclude enums
that happen to be compatible with a character type, can be initialized
by narrow string constants.  Arrays of element type compatible with
wchar_t, which clearly includes compatible enums, can be initialized
by wide string constants.  We were inconsistent about the handling of
enums, most code paths not letting them get through to digest_init but
digest_init accepting them, albeit a part of digest_init other than
that meant to handle string constants as array initializers.  We also
wrongly allowed arrays of signed and unsigned versions of wchar_t to
be initialized by wide strings; the array must have element type
compatible with wchar_t, not the corresponding type of opposite
signedness.

With regard to arrays with element type a qualified version of
wchar_t, there is a clear defect in the standard not allowing for
these.  I've added a draft DR on this to my list of potential and
draft DRs <http://www.srcf.ucam.org/~jsm28/gcc/#drs>, to be considered
by the UK C Panel at a future meeting for possible submission to WG14.

I think the known C90 status as regards compiler code (there's also
the need for proper documentation of implementation-defined behavior)
after this patch is now back to the state that the known problems are
documented in the dependencies of bug 16620, i.e. I don't have more
miscellanous conformance issues like this, identified through reading
code or otherwise, sitting around that aren't filed in Bugzilla.  (If
anyone has such issues, without immediate plans to fix them, then
please file them in Bugzilla.)

Bootstrapped with no regressions on i686-pc-linux-gnu.  Applied to
mainline.

-- 
Joseph S. Myers               http://www.srcf.ucam.org/~jsm28/gcc/
    jsm@polyomino.org.uk (personal mail)
    jsm28@gcc.gnu.org (Bugzilla assignments and CCs)

2004-07-25  Joseph S. Myers  <jsm@polyomino.org.uk>

	* c-typeck.c (digest_init): Don't allow arrays of signed or
	unsigned variants of wchar_t to be initialized by wide string
	constants.  Do allow arrays of enumerated types compatible with
	wchar_t to be initialized by wide string constants.  Refine tests
	distinguishing wide and narrow strings and arrays.  Give specific
	error for arrays of other integer types initialized by string
	constants.
	(output_init_element, process_init_element): Check for
	INTEGRAL_TYPE_P rather than just for INTEGER_TYPE when
	initializing with string constants.

testsuite:
2004-07-25  Joseph S. Myers  <jsm@polyomino.org.uk>

	* gcc.dg/init-string-2.c: New test.

diff -rupN GCC.orig/gcc/c-typeck.c GCC/gcc/c-typeck.c
--- GCC.orig/gcc/c-typeck.c	2004-07-21 23:57:58.000000000 +0000
+++ GCC/gcc/c-typeck.c	2004-07-24 21:35:29.000000000 +0000
@@ -3930,37 +3930,41 @@ digest_init (tree type, tree init, bool 
   /* Initialization of an array of chars from a string constant
      optionally enclosed in braces.  */
 
-  if (code == ARRAY_TYPE)
+  if (code == ARRAY_TYPE && inside_init
+      && TREE_CODE (inside_init) == STRING_CST)
     {
       tree typ1 = TYPE_MAIN_VARIANT (TREE_TYPE (type));
-      if ((typ1 == char_type_node
-	   || typ1 == signed_char_type_node
-	   || typ1 == unsigned_char_type_node
-	   || typ1 == unsigned_wchar_type_node
-	   || typ1 == signed_wchar_type_node)
-	  && ((inside_init && TREE_CODE (inside_init) == STRING_CST)))
+      /* Note that an array could be both an array of character type
+	 and an array of wchar_t if wchar_t is signed char or unsigned
+	 char.  */
+      bool char_array = (typ1 == char_type_node
+			 || typ1 == signed_char_type_node
+			 || typ1 == unsigned_char_type_node);
+      bool wchar_array = !!comptypes (typ1, wchar_type_node);
+      if (char_array || wchar_array)
 	{
 	  struct c_expr expr;
+	  bool char_string;
 	  expr.value = inside_init;
 	  expr.original_code = (strict_string ? STRING_CST : ERROR_MARK);
 	  maybe_warn_string_init (type, expr);
 
+	  char_string
+	    = (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)))
+	       == char_type_node);
+
 	  if (comptypes (TYPE_MAIN_VARIANT (TREE_TYPE (inside_init)),
 			 TYPE_MAIN_VARIANT (type)))
 	    return inside_init;
 
-	  if ((TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)))
-	       != char_type_node)
-	      && TYPE_PRECISION (typ1) == TYPE_PRECISION (char_type_node))
+	  if (!wchar_array && !char_string)
 	    {
 	      error_init ("char-array initialized from wide string");
 	      return error_mark_node;
 	    }
-	  if ((TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)))
-	       == char_type_node)
-	      && TYPE_PRECISION (typ1) != TYPE_PRECISION (char_type_node))
+	  if (char_string && !char_array)
 	    {
-	      error_init ("int-array initialized from non-wide string");
+	      error_init ("wchar_t-array initialized from non-wide string");
 	      return error_mark_node;
 	    }
 
@@ -3982,6 +3986,12 @@ digest_init (tree type, tree init, bool 
 
 	  return inside_init;
 	}
+      else if (INTEGRAL_TYPE_P (typ1))
+	{
+	  error_init ("array of inappropriate type initialized "
+		      "from string constant");
+	  return error_mark_node;
+	}
     }
 
   /* Build a VECTOR_CST from a *constant* vector constructor.  If the
@@ -5476,7 +5486,7 @@ output_init_element (tree value, bool st
       || (TREE_CODE (TREE_TYPE (value)) == ARRAY_TYPE
 	  && !(TREE_CODE (value) == STRING_CST
 	       && TREE_CODE (type) == ARRAY_TYPE
-	       && TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE)
+	       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
 	  && !comptypes (TYPE_MAIN_VARIANT (TREE_TYPE (value)),
 			 TYPE_MAIN_VARIANT (type))))
     value = default_conversion (value);
@@ -5776,7 +5786,7 @@ process_init_element (struct c_expr valu
   if (string_flag
       && constructor_type
       && TREE_CODE (constructor_type) == ARRAY_TYPE
-      && TREE_CODE (TREE_TYPE (constructor_type)) == INTEGER_TYPE
+      && INTEGRAL_TYPE_P (TREE_TYPE (constructor_type))
       && integer_zerop (constructor_unfilled_index))
     {
       if (constructor_stack->replacement_value.value)
@@ -5855,7 +5865,7 @@ process_init_element (struct c_expr valu
 	  /* Accept a string constant to initialize a subarray.  */
 	  if (value.value != 0
 	      && fieldcode == ARRAY_TYPE
-	      && TREE_CODE (TREE_TYPE (fieldtype)) == INTEGER_TYPE
+	      && INTEGRAL_TYPE_P (TREE_TYPE (fieldtype))
 	      && string_flag)
 	    value.value = orig_value;
 	  /* Otherwise, if we have come to a subaggregate,
@@ -5943,7 +5953,7 @@ process_init_element (struct c_expr valu
 	  /* Accept a string constant to initialize a subarray.  */
 	  if (value.value != 0
 	      && fieldcode == ARRAY_TYPE
-	      && TREE_CODE (TREE_TYPE (fieldtype)) == INTEGER_TYPE
+	      && INTEGRAL_TYPE_P (TREE_TYPE (fieldtype))
 	      && string_flag)
 	    value.value = orig_value;
 	  /* Otherwise, if we have come to a subaggregate,
@@ -5983,7 +5993,7 @@ process_init_element (struct c_expr valu
 	  /* Accept a string constant to initialize a subarray.  */
 	  if (value.value != 0
 	      && eltcode == ARRAY_TYPE
-	      && TREE_CODE (TREE_TYPE (elttype)) == INTEGER_TYPE
+	      && INTEGRAL_TYPE_P (TREE_TYPE (elttype))
 	      && string_flag)
 	    value.value = orig_value;
 	  /* Otherwise, if we have come to a subaggregate,
diff -rupN GCC.orig/gcc/testsuite/gcc.dg/init-string-2.c GCC/gcc/testsuite/gcc.dg/init-string-2.c
--- GCC.orig/gcc/testsuite/gcc.dg/init-string-2.c	1970-01-01 00:00:00.000000000 +0000
+++ GCC/gcc/testsuite/gcc.dg/init-string-2.c	2004-07-23 22:15:58.000000000 +0000
@@ -0,0 +1,56 @@
+/* Character arrays but not arrays of compatible enum type may be
+   initialized by narrow string literals.  Arrays of type compatible
+   with wchar_t, including compatible enums, may be initialized by
+   wide string literals.  Use -fshort-enums -fshort-wchar so the
+   relevant circumstances can be obtained portably; may still fail if
+   char, short and int do not all have distinct precisions.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -pedantic-errors -fshort-enums -fshort-wchar" } */
+
+#include <limits.h>
+#include <stddef.h>
+
+typedef enum { schar_min = SCHAR_MIN, schar_max = SCHAR_MAX } schar;
+typedef enum { uchar_max = UCHAR_MAX } uchar;
+typedef enum { shrt_min = SHRT_MIN, shrt_max = SHRT_MAX } sshrt;
+typedef enum { ushrt_max = USHRT_MAX } ushrt;
+
+char a0[] = "foo";
+const signed char a2[4] = "foo";
+volatile unsigned char a3[3] = "foo";
+wchar_t a4[] = L"foo";
+const wchar_t a5[3] = L"foo";
+volatile ushrt a6[] = L"foo";
+
+schar a7[] = "foo"; /* { dg-error "string constant" "a7" } */
+uchar a8[] = "foo"; /* { dg-error "string constant" "a8" } */
+const schar a9[] = "foo"; /* { dg-error "string constant" "a9" } */
+short a10[] = L"foo"; /* { dg-error "string constant" "a10" } */
+const sshrt a11[] = L"foo"; /* { dg-error "string constant" "a11" } */
+char a12[] = L"foo"; /* { dg-error "from wide string" "a12" } */
+wchar_t a13[] = "foo"; /* { dg-error "non-wide string" "a13" } */
+
+char b0[] = { "foo" };
+const signed char b2[4] = { "foo" };
+volatile unsigned char b3[3] = { "foo" };
+wchar_t b4[] = { L"foo" };
+const wchar_t b5[3] = { L"foo" };
+volatile ushrt b6[] = { L"foo" };
+
+schar b7[] = { "foo" }; /* { dg-error "string constant" "b7" } */
+uchar b8[] = { "foo" }; /* { dg-error "string constant" "b8" } */
+const schar b9[] = { "foo" }; /* { dg-error "string constant" "b9" } */
+short b10[] = { L"foo" }; /* { dg-error "string constant" "b10" } */
+const sshrt b11[] = { L"foo" }; /* { dg-error "string constant" "b11" } */
+char b12[] = { L"foo" }; /* { dg-error "from wide string" "b12" } */
+wchar_t b13[] = { "foo" }; /* { dg-error "non-wide string" "b13" } */
+
+struct s { signed char a[10]; int b; ushrt c[10]; };
+
+struct s c = { "foo", 0, L"bar" };
+struct s d = { .c = L"bar", .a = "foo" };
+
+ushrt *e = (ushrt [7]){ L"bar" };
+
+wchar_t f[5][5] = { L"foo", L"bar" };
+ushrt g[5][5] = { L"foo", L"bar" };


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]