This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Handle string constant initializesr for array of char16_t or char32_t
Oracle has a full copyright assignment in place with the FSF.
This patch addresses the problem reported by Holger Hopp and Jakub Jelinek:
http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01444.html
http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01454.html
The utf-16/32 support did indeed fail to appropriately ensure that the use
of string constant initializers for arrays of char16_t and char32_t are
handled correctly in C and C++. The patch below addresses this problem, and
provides tests for it.
This problem doesn't seem to be logged in bugzilla as a bug, hence the lack
of reference to a bugzilla bug id.
ChangeLog entries:
------------------
gcc/ChangeLog:
2008-04-20 Kris Van Hees <kris.van.hees@oracle.com>
* c-typeck.c (digest_init): Support char16_t and char32_t.
gcc/cp/ChangeLog:
2008-04-20 Kris Van Hees <kris.van.hees@oracle.com>
* typeck2.c (digest_init): Support char16_t and char32_t.
gcc/testsuite/ChangeLog:
2008-04-20 Kris Van Hees <kris.van.hees@oracle.com>
Tests for char16_t and char32_t support.
* g++.dg/ext/utf-array.C: New
* g++.dg/ext/utf-array-short-wchar.C: New
* gcc.dg/utf-array.c: New
* gcc.dg/utf-array-short-wchar.c: New
Bootstrapping and testing:
--------------------------
The source tree was built on the following platforms (target == host):
i686-linux
x86_64-linux
Builds were done for both the unpatched tree and the patched tree, and
testsuite (make -k check) summary results were verified to be identical,
except for the added tests in the patched tree. This was done to ensure
that the patch does not introduce regressions.
Index: gcc/testsuite/gcc.dg/utf-array.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-array.c (revision 0)
+++ gcc/testsuite/gcc.dg/utf-array.c (revision 0)
@@ -0,0 +1,41 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+#include <wchar.h>
+
+typedef short unsigned int char16_t;
+typedef unsigned int char32_t;
+
+const char s_0[] = "ab";
+const char s_1[] = u"ab"; /* { dg-error "from wide string" } */
+const char s_2[] = U"ab"; /* { dg-error "from wide string" } */
+const char s_3[] = L"ab"; /* { dg-error "from wide string" } */
+
+const char16_t s16_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char16_t s16_1[] = u"ab";
+const char16_t s16_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const char16_t s16_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s16_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_6[2] = u"ab";
+const char16_t s16_7[3] = u"ab";
+const char16_t s16_8[4] = u"ab";
+
+const char32_t s32_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char32_t s32_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const char32_t s32_2[] = U"ab";
+const char32_t s32_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s32_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_6[2] = u"ab";
+const char16_t s32_7[3] = u"ab";
+const char16_t s32_8[4] = u"ab";
+
+const wchar_t sw_0[] = "ab"; /* { dg-error "from non-wide" } */
+const wchar_t sw_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_3[] = L"ab";
Index: gcc/testsuite/gcc.dg/utf-array-short-wchar.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-array-short-wchar.c (revision 0)
+++ gcc/testsuite/gcc.dg/utf-array-short-wchar.c (revision 0)
@@ -0,0 +1,41 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99 -fshort-wchar" } */
+
+#include <wchar.h>
+
+typedef short unsigned int char16_t;
+typedef unsigned int char32_t;
+
+const char s_0[] = "ab";
+const char s_1[] = u"ab"; /* { dg-error "from wide string" } */
+const char s_2[] = U"ab"; /* { dg-error "from wide string" } */
+const char s_3[] = L"ab"; /* { dg-error "from wide string" } */
+
+const char16_t s16_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char16_t s16_1[] = u"ab";
+const char16_t s16_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const char16_t s16_3[] = L"ab";
+
+const char16_t s16_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_6[2] = u"ab";
+const char16_t s16_7[3] = u"ab";
+const char16_t s16_8[4] = u"ab";
+
+const char32_t s32_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char32_t s32_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const char32_t s32_2[] = U"ab";
+const char32_t s32_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s32_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_6[2] = u"ab";
+const char16_t s32_7[3] = u"ab";
+const char16_t s32_8[4] = u"ab";
+
+const wchar_t sw_0[] = "ab"; /* { dg-error "from non-wide" } */
+const wchar_t sw_1[] = u"ab";
+const wchar_t sw_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_3[] = L"ab";
Index: gcc/testsuite/g++.dg/ext/utf-array.C
===================================================================
--- gcc/testsuite/g++.dg/ext/utf-array.C (revision 0)
+++ gcc/testsuite/g++.dg/ext/utf-array.C (revision 0)
@@ -0,0 +1,36 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=c++0x" } */
+
+const char s_0[] = "ab";
+const char s_1[] = u"ab"; /* { dg-error "from wide string" } */
+const char s_2[] = U"ab"; /* { dg-error "from wide string" } */
+const char s_3[] = L"ab"; /* { dg-error "from wide string" } */
+
+const char16_t s16_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char16_t s16_1[] = u"ab";
+const char16_t s16_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const char16_t s16_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s16_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_6[2] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_7[3] = u"ab";
+const char16_t s16_8[4] = u"ab";
+
+const char32_t s32_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char32_t s32_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const char32_t s32_2[] = U"ab";
+const char32_t s32_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s32_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_6[2] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_7[3] = u"ab";
+const char16_t s32_8[4] = u"ab";
+
+const wchar_t sw_0[] = "ab"; /* { dg-error "from non-wide" } */
+const wchar_t sw_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_3[] = L"ab";
Index: gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C
===================================================================
--- gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C (revision 0)
+++ gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C (revision 0)
@@ -0,0 +1,36 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=c++0x -fshort-wchar" } */
+
+const char s_0[] = "ab";
+const char s_1[] = u"ab"; /* { dg-error "from wide string" } */
+const char s_2[] = U"ab"; /* { dg-error "from wide string" } */
+const char s_3[] = L"ab"; /* { dg-error "from wide string" } */
+
+const char16_t s16_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char16_t s16_1[] = u"ab";
+const char16_t s16_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const char16_t s16_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s16_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_6[2] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_7[3] = u"ab";
+const char16_t s16_8[4] = u"ab";
+
+const char32_t s32_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char32_t s32_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const char32_t s32_2[] = U"ab";
+const char32_t s32_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s32_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_6[2] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_7[3] = u"ab";
+const char16_t s32_8[4] = u"ab";
+
+const wchar_t sw_0[] = "ab"; /* { dg-error "from non-wide" } */
+const wchar_t sw_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_3[] = L"ab";
Index: gcc/cp/typeck2.c
===================================================================
--- gcc/cp/typeck2.c (revision 134495)
+++ gcc/cp/typeck2.c (working copy)
@@ -671,17 +671,26 @@ digest_init (tree type, tree init)
{
tree char_type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (init)));
- if (char_type != char_type_node
- && TYPE_PRECISION (typ1) == BITS_PER_UNIT)
+ if (TYPE_PRECISION (typ1) == BITS_PER_UNIT)
{
- error ("char-array initialized from wide string");
- return error_mark_node;
+ if (char_type != char_type_node)
+ {
+ error ("char-array initialized from wide string");
+ return error_mark_node;
+ }
}
- if (char_type == char_type_node
- && TYPE_PRECISION (typ1) != BITS_PER_UNIT)
+ else
{
- error ("int-array initialized from non-wide string");
- return error_mark_node;
+ if (char_type == char_type_node)
+ {
+ error ("int-array initialized from non-wide string");
+ return error_mark_node;
+ }
+ else if (char_type != typ1)
+ {
+ error ("int-array initialized from incompatible wide string");
+ return error_mark_node;
+ }
}
TREE_TYPE (init) = type;
Index: gcc/c-typeck.c
===================================================================
--- gcc/c-typeck.c (revision 134495)
+++ gcc/c-typeck.c (working copy)
@@ -4708,47 +4708,56 @@ digest_init (tree type, tree init, bool
|| typ1 == signed_char_type_node
|| typ1 == unsigned_char_type_node);
bool wchar_array = !!comptypes (typ1, wchar_type_node);
- if (char_array || wchar_array)
+ bool char16_array = !!comptypes (typ1, char16_type_node);
+ bool char32_array = !!comptypes (typ1, char32_type_node);
+
+ if (char_array || wchar_array || char16_array || char32_array)
{
struct c_expr expr;
- bool char_string;
+ tree typ2 = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)));
expr.value = inside_init;
expr.original_code = (strict_string ? STRING_CST : ERROR_MARK);
maybe_warn_string_init (type, expr);
- char_string
- = (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)))
- == char_type_node);
-
if (comptypes (TYPE_MAIN_VARIANT (TREE_TYPE (inside_init)),
TYPE_MAIN_VARIANT (type)))
return inside_init;
- if (!wchar_array && !char_string)
+ if (char_array)
{
- error_init ("char-array initialized from wide string");
- return error_mark_node;
+ if (typ2 != char_type_node)
+ {
+ error_init ("char-array initialized from wide string");
+ return error_mark_node;
+ }
}
- if (char_string && !char_array)
+ else
{
- error_init ("wchar_t-array initialized from non-wide string");
- return error_mark_node;
+ if (typ2 == char_type_node)
+ {
+ error_init ("wide character array initialized from non-wide "
+ "string");
+ return error_mark_node;
+ }
+ else if (!comptypes(typ1, typ2))
+ {
+ error_init ("wide character array initialized from "
+ "incompatible wide string");
+ return error_mark_node;
+ }
}
TREE_TYPE (inside_init) = type;
if (TYPE_DOMAIN (type) != 0
&& TYPE_SIZE (type) != 0
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
- /* Subtract 1 (or sizeof (wchar_t))
+ /* Subtract the size of a single (possibly wide) character
because it's ok to ignore the terminating null char
that is counted in the length of the constant. */
&& 0 > compare_tree_int (TYPE_SIZE_UNIT (type),
TREE_STRING_LENGTH (inside_init)
- - ((TYPE_PRECISION (typ1)
- != TYPE_PRECISION (char_type_node))
- ? (TYPE_PRECISION (wchar_type_node)
- / BITS_PER_UNIT)
- : 1)))
+ - (TYPE_PRECISION (typ1)
+ / BITS_PER_UNIT)))
pedwarn_init ("initializer-string for array of chars is too long");
return inside_init;