This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Revision 2: Handle string constant initializers for array of char16_t or char32_t


Oracle has a full copyright assignment in place with the FSF.

This patch addresses the problem reported by Holger Hopp and Jakub Jelinek:

	http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01444.html
	http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01454.html

and the first review sparking this revision:

	http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01553.html

This patch replaces the first version.  It addresses the problem that the
char16_t/char32_t support failed to ensure that the use of string constant
initializers for arrays of char16_t and char32_t were handled correctly,
both in C and C++.  It also addresses the feedback Joseph Myers concerning
incremental initializers.

The fix for the incremental initializer handling removes a gcc_assert for
the bitsize of the elements in the initializer string constant.  This is
safe and correct, because with char16_t and char32_t added in, non-char
doesn't mean wchar_t anymore.  It is safe to not have the assert there,
because other code earlier in the code paths leading here ensures that the
string constant is of the right type.

This problem doesn't seem to be logged in bugzilla as a bug, hence the lack
of reference to a bugzilla bug id.

ChangeLog entries:
------------------
gcc/ChangeLog:
2008-04-21  Kris Van Hees <kris.van.hees@oracle.com>
	  
	* c-typeck.c (digest_init): Support char16_t and char32_t.
	(set_nonincremental_init_from_string): Idem.

gcc/cp/ChangeLog:
2008-04-21  Kris Van Hees <kris.van.hees@oracle.com>

	* typeck2.c (digest_init): Support char16_t and char32_t.

gcc/testsuite/ChangeLog:
2008-04-21  Kris Van Hees <kris.van.hees@oracle.com>

	Tests for char16_t and char32_t support.
	* g++.dg/ext/utf-array.C: New
	* g++.dg/ext/utf-array-short-wchar.C: New
	* gcc.dg/utf-array.c: New
	* gcc.dg/utf-array-short-wchar.c: New
	* gcc.dg/utf-inc-init.c: New

Bootstrapping and testing:
--------------------------
The source tree was built on the following platforms (target == host):

	i686-linux
	x86_64-linux

Builds were done for both the unpatched tree and the patched tree, and
testsuite (make -k check) summary results were verified to be identical,
except for the added tests in the patched tree.  This was done to ensure
that the patch does not introduce regressions.

Index: gcc/testsuite/gcc.dg/utf-array.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-array.c	(revision 0)
+++ gcc/testsuite/gcc.dg/utf-array.c	(revision 0)
@@ -0,0 +1,41 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+#include <wchar.h>
+
+typedef short unsigned int	char16_t;
+typedef unsigned int		char32_t;
+
+const char	s_0[]	= "ab";
+const char	s_1[]	= u"ab";	/* { dg-error "from wide string" } */
+const char	s_2[]	= U"ab";	/* { dg-error "from wide string" } */
+const char	s_3[]	= L"ab";	/* { dg-error "from wide string" } */
+
+const char16_t	s16_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char16_t	s16_1[]	= u"ab";
+const char16_t	s16_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const char16_t	s16_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s16_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_6[2] = u"ab";
+const char16_t	s16_7[3] = u"ab";
+const char16_t	s16_8[4] = u"ab";
+
+const char32_t	s32_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char32_t	s32_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const char32_t	s32_2[]	= U"ab";
+const char32_t	s32_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s32_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_6[2] = u"ab";
+const char16_t	s32_7[3] = u"ab";
+const char16_t	s32_8[4] = u"ab";
+
+const wchar_t	sw_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const wchar_t	sw_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_3[]	= L"ab";
Index: gcc/testsuite/gcc.dg/utf-inc-init.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-inc-init.c	(revision 0)
+++ gcc/testsuite/gcc.dg/utf-inc-init.c	(revision 0)
@@ -0,0 +1,45 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Test incremental initializers for char16_t/char32_t arrays. */
+/* { dg-do run } */
+/* { dg-options "-std=gnu99" } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef short unsigned int char16_t;
+typedef unsigned int char32_t;
+
+extern int memcmp (const void *, const void *, size_t);
+extern void abort (void);
+extern void exit (int);
+
+struct A {
+  char16_t S[6];
+  int M;
+} a[] = { { { u"foo" }, 1 }, [0].S[2] = u'x', [0].S[4] = u'y' };
+struct A b[] = { { { u"foo" }, 1 }, [0] = { .S[0] = u'b' } };
+struct A c[] = { { { u"foo" }, 1 }, [0].S = { u"a" }, [0].M = 2 };
+
+struct B {
+  char32_t S[6];
+  int M;
+} d[] = { { { U"foo" }, 1 }, [0].S[2] = U'x', [0].S[4] = U'y' };
+struct B e[] = { { { U"foo" }, 1 }, [0] = { .S[0] = U'b' } };
+struct B f[] = { { { U"foo" }, 1 }, [0].S = { U"a" }, [0].M = 2 };
+
+int main (void)
+{
+  if (memcmp (a[0].S, u"fox\0y", 6 * sizeof(char16_t)) || a[0].M != 1)
+    abort ();
+  if (memcmp (b[0].S, u"b\0\0\0\0", 6) || b[0].M)
+    abort ();
+  if (memcmp (c[0].S, u"a\0\0\0\0", 6) || c[0].M != 2)
+    abort ();
+
+  if (memcmp (d[0].S, U"fox\0y", 6 * sizeof(char32_t)) || d[0].M != 1)
+    abort ();
+  if (memcmp (e[0].S, U"b\0\0\0\0", 6) || e[0].M)
+    abort ();
+  if (memcmp (f[0].S, U"a\0\0\0\0", 6) || f[0].M != 2)
+    abort ();
+
+  exit(0);
+}
Index: gcc/testsuite/gcc.dg/utf-array-short-wchar.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-array-short-wchar.c	(revision 0)
+++ gcc/testsuite/gcc.dg/utf-array-short-wchar.c	(revision 0)
@@ -0,0 +1,41 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99 -fshort-wchar" } */
+
+#include <wchar.h>
+
+typedef short unsigned int	char16_t;
+typedef unsigned int		char32_t;
+
+const char	s_0[]	= "ab";
+const char	s_1[]	= u"ab";	/* { dg-error "from wide string" } */
+const char	s_2[]	= U"ab";	/* { dg-error "from wide string" } */
+const char	s_3[]	= L"ab";	/* { dg-error "from wide string" } */
+
+const char16_t	s16_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char16_t	s16_1[]	= u"ab";
+const char16_t	s16_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const char16_t	s16_3[]	= L"ab";
+
+const char16_t	s16_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_6[2] = u"ab";
+const char16_t	s16_7[3] = u"ab";
+const char16_t	s16_8[4] = u"ab";
+
+const char32_t	s32_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char32_t	s32_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const char32_t	s32_2[]	= U"ab";
+const char32_t	s32_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s32_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_6[2] = u"ab";
+const char16_t	s32_7[3] = u"ab";
+const char16_t	s32_8[4] = u"ab";
+
+const wchar_t	sw_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const wchar_t	sw_1[]	= u"ab";
+const wchar_t	sw_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_3[]	= L"ab";
Index: gcc/testsuite/g++.dg/ext/utf-array.C
===================================================================
--- gcc/testsuite/g++.dg/ext/utf-array.C	(revision 0)
+++ gcc/testsuite/g++.dg/ext/utf-array.C	(revision 0)
@@ -0,0 +1,36 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=c++0x" } */
+
+const char	s_0[]	= "ab";
+const char	s_1[]	= u"ab";	/* { dg-error "from wide string" } */
+const char	s_2[]	= U"ab";	/* { dg-error "from wide string" } */
+const char	s_3[]	= L"ab";	/* { dg-error "from wide string" } */
+
+const char16_t	s16_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char16_t	s16_1[]	= u"ab";
+const char16_t	s16_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const char16_t	s16_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s16_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_6[2] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_7[3] = u"ab";
+const char16_t	s16_8[4] = u"ab";
+
+const char32_t	s32_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char32_t	s32_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const char32_t	s32_2[]	= U"ab";
+const char32_t	s32_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s32_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_6[2] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_7[3] = u"ab";
+const char16_t	s32_8[4] = u"ab";
+
+const wchar_t	sw_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const wchar_t	sw_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_3[]	= L"ab";
Index: gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C
===================================================================
--- gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C	(revision 0)
+++ gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C	(revision 0)
@@ -0,0 +1,36 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=c++0x -fshort-wchar" } */
+
+const char	s_0[]	= "ab";
+const char	s_1[]	= u"ab";	/* { dg-error "from wide string" } */
+const char	s_2[]	= U"ab";	/* { dg-error "from wide string" } */
+const char	s_3[]	= L"ab";	/* { dg-error "from wide string" } */
+
+const char16_t	s16_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char16_t	s16_1[]	= u"ab";
+const char16_t	s16_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const char16_t	s16_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s16_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_6[2] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_7[3] = u"ab";
+const char16_t	s16_8[4] = u"ab";
+
+const char32_t	s32_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char32_t	s32_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const char32_t	s32_2[]	= U"ab";
+const char32_t	s32_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s32_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_6[2] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_7[3] = u"ab";
+const char16_t	s32_8[4] = u"ab";
+
+const wchar_t	sw_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const wchar_t	sw_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_3[]	= L"ab";
Index: gcc/cp/typeck2.c
===================================================================
--- gcc/cp/typeck2.c	(revision 134495)
+++ gcc/cp/typeck2.c	(working copy)
@@ -671,17 +671,26 @@ digest_init (tree type, tree init)
 	{
 	  tree char_type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (init)));
 
-	  if (char_type != char_type_node
-	      && TYPE_PRECISION (typ1) == BITS_PER_UNIT)
+	  if (TYPE_PRECISION (typ1) == BITS_PER_UNIT)
 	    {
-	      error ("char-array initialized from wide string");
-	      return error_mark_node;
+	      if (char_type != char_type_node)
+		{
+		  error ("char-array initialized from wide string");
+		  return error_mark_node;
+		}
 	    }
-	  if (char_type == char_type_node
-	      && TYPE_PRECISION (typ1) != BITS_PER_UNIT)
+	  else
 	    {
-	      error ("int-array initialized from non-wide string");
-	      return error_mark_node;
+	      if (char_type == char_type_node)
+		{
+		  error ("int-array initialized from non-wide string");
+		  return error_mark_node;
+		}
+	      else if (char_type != typ1)
+		{
+		  error ("int-array initialized from incompatible wide string");
+		  return error_mark_node;
+		}
 	    }
 
 	  TREE_TYPE (init) = type;
Index: gcc/c-typeck.c
===================================================================
--- gcc/c-typeck.c	(revision 134495)
+++ gcc/c-typeck.c	(working copy)
@@ -4708,47 +4708,56 @@ digest_init (tree type, tree init, bool 
 			 || typ1 == signed_char_type_node
 			 || typ1 == unsigned_char_type_node);
       bool wchar_array = !!comptypes (typ1, wchar_type_node);
-      if (char_array || wchar_array)
+      bool char16_array = !!comptypes (typ1, char16_type_node);
+      bool char32_array = !!comptypes (typ1, char32_type_node);
+
+      if (char_array || wchar_array || char16_array || char32_array)
 	{
 	  struct c_expr expr;
-	  bool char_string;
+	  tree typ2 = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)));
 	  expr.value = inside_init;
 	  expr.original_code = (strict_string ? STRING_CST : ERROR_MARK);
 	  maybe_warn_string_init (type, expr);
 
-	  char_string
-	    = (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)))
-	       == char_type_node);
-
 	  if (comptypes (TYPE_MAIN_VARIANT (TREE_TYPE (inside_init)),
 			 TYPE_MAIN_VARIANT (type)))
 	    return inside_init;
 
-	  if (!wchar_array && !char_string)
+	  if (char_array)
 	    {
-	      error_init ("char-array initialized from wide string");
-	      return error_mark_node;
+	      if (typ2 != char_type_node)
+		{
+		  error_init ("char-array initialized from wide string");
+		  return error_mark_node;
+		}
 	    }
-	  if (char_string && !char_array)
+	  else
 	    {
-	      error_init ("wchar_t-array initialized from non-wide string");
-	      return error_mark_node;
+	      if (typ2 == char_type_node)
+		{
+		  error_init ("wide character array initialized from non-wide "
+			      "string");
+		  return error_mark_node;
+		}
+	      else if (!comptypes (typ1, typ2))
+		{
+		  error_init ("wide character array initialized from "
+			      "incompatible wide string");
+		  return error_mark_node;
+		}
 	    }
 
 	  TREE_TYPE (inside_init) = type;
 	  if (TYPE_DOMAIN (type) != 0
 	      && TYPE_SIZE (type) != 0
 	      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
-	      /* Subtract 1 (or sizeof (wchar_t))
+	      /* Subtract the size of a single (possibly wide) character
 		 because it's ok to ignore the terminating null char
 		 that is counted in the length of the constant.  */
 	      && 0 > compare_tree_int (TYPE_SIZE_UNIT (type),
 				       TREE_STRING_LENGTH (inside_init)
-				       - ((TYPE_PRECISION (typ1)
-					   != TYPE_PRECISION (char_type_node))
-					  ? (TYPE_PRECISION (wchar_type_node)
-					     / BITS_PER_UNIT)
-					  : 1)))
+				       - (TYPE_PRECISION (typ1)
+					  / BITS_PER_UNIT)))
 	    pedwarn_init ("initializer-string for array of chars is too long");
 
 	  return inside_init;
@@ -6096,15 +6105,7 @@ set_nonincremental_init_from_string (tre
 
   gcc_assert (TREE_CODE (constructor_type) == ARRAY_TYPE);
 
-  if (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (str)))
-      == TYPE_PRECISION (char_type_node))
-    wchar_bytes = 1;
-  else
-    {
-      gcc_assert (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (str)))
-		  == TYPE_PRECISION (wchar_type_node));
-      wchar_bytes = TYPE_PRECISION (wchar_type_node) / BITS_PER_UNIT;
-    }
+  wchar_bytes = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (str))) / BITS_PER_UNIT;
   charwidth = TYPE_PRECISION (char_type_node);
   type = TREE_TYPE (constructor_type);
   p = TREE_STRING_POINTER (str);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]