This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Handle string constant initializesr for array of char16_t or char32_t


Oracle has a full copyright assignment in place with the FSF.

This patch addresses the problem reported by Holger Hopp and Jakub Jelinek:

	http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01444.html
	http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01454.html

The utf-16/32 support did indeed fail to appropriately ensure that the use
of string constant initializers for arrays of char16_t and char32_t are
handled correctly in C and C++.  The patch below addresses this problem, and
provides tests for it.

This problem doesn't seem to be logged in bugzilla as a bug, hence the lack
of reference to a bugzilla bug id.

ChangeLog entries:
------------------
gcc/ChangeLog:
2008-04-20  Kris Van Hees <kris.van.hees@oracle.com>
	  
	* c-typeck.c (digest_init): Support char16_t and char32_t.

gcc/cp/ChangeLog:
2008-04-20  Kris Van Hees <kris.van.hees@oracle.com>

	* typeck2.c (digest_init): Support char16_t and char32_t.

gcc/testsuite/ChangeLog:
2008-04-20  Kris Van Hees <kris.van.hees@oracle.com>

	Tests for char16_t and char32_t support.
	* g++.dg/ext/utf-array.C: New
	* g++.dg/ext/utf-array-short-wchar.C: New
	* gcc.dg/utf-array.c: New
	* gcc.dg/utf-array-short-wchar.c: New

Bootstrapping and testing:
--------------------------
The source tree was built on the following platforms (target == host):

	i686-linux
	x86_64-linux

Builds were done for both the unpatched tree and the patched tree, and
testsuite (make -k check) summary results were verified to be identical,
except for the added tests in the patched tree.  This was done to ensure
that the patch does not introduce regressions.

Index: gcc/testsuite/gcc.dg/utf-array.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-array.c	(revision 0)
+++ gcc/testsuite/gcc.dg/utf-array.c	(revision 0)
@@ -0,0 +1,41 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+#include <wchar.h>
+
+typedef short unsigned int	char16_t;
+typedef unsigned int		char32_t;
+
+const char	s_0[]	= "ab";
+const char	s_1[]	= u"ab";	/* { dg-error "from wide string" } */
+const char	s_2[]	= U"ab";	/* { dg-error "from wide string" } */
+const char	s_3[]	= L"ab";	/* { dg-error "from wide string" } */
+
+const char16_t	s16_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char16_t	s16_1[]	= u"ab";
+const char16_t	s16_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const char16_t	s16_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s16_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_6[2] = u"ab";
+const char16_t	s16_7[3] = u"ab";
+const char16_t	s16_8[4] = u"ab";
+
+const char32_t	s32_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char32_t	s32_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const char32_t	s32_2[]	= U"ab";
+const char32_t	s32_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s32_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_6[2] = u"ab";
+const char16_t	s32_7[3] = u"ab";
+const char16_t	s32_8[4] = u"ab";
+
+const wchar_t	sw_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const wchar_t	sw_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_3[]	= L"ab";
Index: gcc/testsuite/gcc.dg/utf-array-short-wchar.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-array-short-wchar.c	(revision 0)
+++ gcc/testsuite/gcc.dg/utf-array-short-wchar.c	(revision 0)
@@ -0,0 +1,41 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99 -fshort-wchar" } */
+
+#include <wchar.h>
+
+typedef short unsigned int	char16_t;
+typedef unsigned int		char32_t;
+
+const char	s_0[]	= "ab";
+const char	s_1[]	= u"ab";	/* { dg-error "from wide string" } */
+const char	s_2[]	= U"ab";	/* { dg-error "from wide string" } */
+const char	s_3[]	= L"ab";	/* { dg-error "from wide string" } */
+
+const char16_t	s16_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char16_t	s16_1[]	= u"ab";
+const char16_t	s16_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const char16_t	s16_3[]	= L"ab";
+
+const char16_t	s16_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_6[2] = u"ab";
+const char16_t	s16_7[3] = u"ab";
+const char16_t	s16_8[4] = u"ab";
+
+const char32_t	s32_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char32_t	s32_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const char32_t	s32_2[]	= U"ab";
+const char32_t	s32_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s32_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_6[2] = u"ab";
+const char16_t	s32_7[3] = u"ab";
+const char16_t	s32_8[4] = u"ab";
+
+const wchar_t	sw_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const wchar_t	sw_1[]	= u"ab";
+const wchar_t	sw_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_3[]	= L"ab";
Index: gcc/testsuite/g++.dg/ext/utf-array.C
===================================================================
--- gcc/testsuite/g++.dg/ext/utf-array.C	(revision 0)
+++ gcc/testsuite/g++.dg/ext/utf-array.C	(revision 0)
@@ -0,0 +1,36 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=c++0x" } */
+
+const char	s_0[]	= "ab";
+const char	s_1[]	= u"ab";	/* { dg-error "from wide string" } */
+const char	s_2[]	= U"ab";	/* { dg-error "from wide string" } */
+const char	s_3[]	= L"ab";	/* { dg-error "from wide string" } */
+
+const char16_t	s16_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char16_t	s16_1[]	= u"ab";
+const char16_t	s16_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const char16_t	s16_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s16_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_6[2] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_7[3] = u"ab";
+const char16_t	s16_8[4] = u"ab";
+
+const char32_t	s32_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char32_t	s32_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const char32_t	s32_2[]	= U"ab";
+const char32_t	s32_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s32_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_6[2] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_7[3] = u"ab";
+const char16_t	s32_8[4] = u"ab";
+
+const wchar_t	sw_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const wchar_t	sw_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_3[]	= L"ab";
Index: gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C
===================================================================
--- gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C	(revision 0)
+++ gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C	(revision 0)
@@ -0,0 +1,36 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=c++0x -fshort-wchar" } */
+
+const char	s_0[]	= "ab";
+const char	s_1[]	= u"ab";	/* { dg-error "from wide string" } */
+const char	s_2[]	= U"ab";	/* { dg-error "from wide string" } */
+const char	s_3[]	= L"ab";	/* { dg-error "from wide string" } */
+
+const char16_t	s16_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char16_t	s16_1[]	= u"ab";
+const char16_t	s16_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const char16_t	s16_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s16_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_6[2] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s16_7[3] = u"ab";
+const char16_t	s16_8[4] = u"ab";
+
+const char32_t	s32_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const char32_t	s32_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const char32_t	s32_2[]	= U"ab";
+const char32_t	s32_3[]	= L"ab";	/* { dg-error "from incompatible" } */
+
+const char16_t	s32_4[0] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_5[1] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_6[2] = u"ab";	/* { dg-warning "chars is too long" } */
+const char16_t	s32_7[3] = u"ab";
+const char16_t	s32_8[4] = u"ab";
+
+const wchar_t	sw_0[]	= "ab";		/* { dg-error "from non-wide" } */
+const wchar_t	sw_1[]	= u"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_2[]	= U"ab";	/* { dg-error "from incompatible" } */
+const wchar_t	sw_3[]	= L"ab";
Index: gcc/cp/typeck2.c
===================================================================
--- gcc/cp/typeck2.c	(revision 134495)
+++ gcc/cp/typeck2.c	(working copy)
@@ -671,17 +671,26 @@ digest_init (tree type, tree init)
 	{
 	  tree char_type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (init)));
 
-	  if (char_type != char_type_node
-	      && TYPE_PRECISION (typ1) == BITS_PER_UNIT)
+	  if (TYPE_PRECISION (typ1) == BITS_PER_UNIT)
 	    {
-	      error ("char-array initialized from wide string");
-	      return error_mark_node;
+	      if (char_type != char_type_node)
+		{
+		  error ("char-array initialized from wide string");
+		  return error_mark_node;
+		}
 	    }
-	  if (char_type == char_type_node
-	      && TYPE_PRECISION (typ1) != BITS_PER_UNIT)
+	  else
 	    {
-	      error ("int-array initialized from non-wide string");
-	      return error_mark_node;
+	      if (char_type == char_type_node)
+		{
+		  error ("int-array initialized from non-wide string");
+		  return error_mark_node;
+		}
+	      else if (char_type != typ1)
+		{
+		  error ("int-array initialized from incompatible wide string");
+		  return error_mark_node;
+		}
 	    }
 
 	  TREE_TYPE (init) = type;
Index: gcc/c-typeck.c
===================================================================
--- gcc/c-typeck.c	(revision 134495)
+++ gcc/c-typeck.c	(working copy)
@@ -4708,47 +4708,56 @@ digest_init (tree type, tree init, bool 
 			 || typ1 == signed_char_type_node
 			 || typ1 == unsigned_char_type_node);
       bool wchar_array = !!comptypes (typ1, wchar_type_node);
-      if (char_array || wchar_array)
+      bool char16_array = !!comptypes (typ1, char16_type_node);
+      bool char32_array = !!comptypes (typ1, char32_type_node);
+
+      if (char_array || wchar_array || char16_array || char32_array)
 	{
 	  struct c_expr expr;
-	  bool char_string;
+	  tree typ2 = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)));
 	  expr.value = inside_init;
 	  expr.original_code = (strict_string ? STRING_CST : ERROR_MARK);
 	  maybe_warn_string_init (type, expr);
 
-	  char_string
-	    = (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)))
-	       == char_type_node);
-
 	  if (comptypes (TYPE_MAIN_VARIANT (TREE_TYPE (inside_init)),
 			 TYPE_MAIN_VARIANT (type)))
 	    return inside_init;
 
-	  if (!wchar_array && !char_string)
+	  if (char_array)
 	    {
-	      error_init ("char-array initialized from wide string");
-	      return error_mark_node;
+	      if (typ2 != char_type_node)
+		{
+		  error_init ("char-array initialized from wide string");
+		  return error_mark_node;
+		}
 	    }
-	  if (char_string && !char_array)
+	  else
 	    {
-	      error_init ("wchar_t-array initialized from non-wide string");
-	      return error_mark_node;
+	      if (typ2 == char_type_node)
+		{
+		  error_init ("wide character array initialized from non-wide "
+			      "string");
+		  return error_mark_node;
+		}
+	      else if (!comptypes(typ1, typ2))
+		{
+		  error_init ("wide character array initialized from "
+			      "incompatible wide string");
+		  return error_mark_node;
+		}
 	    }
 
 	  TREE_TYPE (inside_init) = type;
 	  if (TYPE_DOMAIN (type) != 0
 	      && TYPE_SIZE (type) != 0
 	      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
-	      /* Subtract 1 (or sizeof (wchar_t))
+	      /* Subtract the size of a single (possibly wide) character
 		 because it's ok to ignore the terminating null char
 		 that is counted in the length of the constant.  */
 	      && 0 > compare_tree_int (TYPE_SIZE_UNIT (type),
 				       TREE_STRING_LENGTH (inside_init)
-				       - ((TYPE_PRECISION (typ1)
-					   != TYPE_PRECISION (char_type_node))
-					  ? (TYPE_PRECISION (wchar_type_node)
-					     / BITS_PER_UNIT)
-					  : 1)))
+				       - (TYPE_PRECISION (typ1)
+					  / BITS_PER_UNIT)))
 	    pedwarn_init ("initializer-string for array of chars is too long");
 
 	  return inside_init;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]