This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Revision 2: Handle string constant initializers for array of char16_t or char32_t
Oracle has a full copyright assignment in place with the FSF.
This patch addresses the problem reported by Holger Hopp and Jakub Jelinek:
http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01444.html
http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01454.html
and the first review sparking this revision:
http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01553.html
This patch replaces the first version. It addresses the problem that the
char16_t/char32_t support failed to ensure that the use of string constant
initializers for arrays of char16_t and char32_t were handled correctly,
both in C and C++. It also addresses the feedback Joseph Myers concerning
incremental initializers.
The fix for the incremental initializer handling removes a gcc_assert for
the bitsize of the elements in the initializer string constant. This is
safe and correct, because with char16_t and char32_t added in, non-char
doesn't mean wchar_t anymore. It is safe to not have the assert there,
because other code earlier in the code paths leading here ensures that the
string constant is of the right type.
This problem doesn't seem to be logged in bugzilla as a bug, hence the lack
of reference to a bugzilla bug id.
ChangeLog entries:
------------------
gcc/ChangeLog:
2008-04-21 Kris Van Hees <kris.van.hees@oracle.com>
* c-typeck.c (digest_init): Support char16_t and char32_t.
(set_nonincremental_init_from_string): Idem.
gcc/cp/ChangeLog:
2008-04-21 Kris Van Hees <kris.van.hees@oracle.com>
* typeck2.c (digest_init): Support char16_t and char32_t.
gcc/testsuite/ChangeLog:
2008-04-21 Kris Van Hees <kris.van.hees@oracle.com>
Tests for char16_t and char32_t support.
* g++.dg/ext/utf-array.C: New
* g++.dg/ext/utf-array-short-wchar.C: New
* gcc.dg/utf-array.c: New
* gcc.dg/utf-array-short-wchar.c: New
* gcc.dg/utf-inc-init.c: New
Bootstrapping and testing:
--------------------------
The source tree was built on the following platforms (target == host):
i686-linux
x86_64-linux
Builds were done for both the unpatched tree and the patched tree, and
testsuite (make -k check) summary results were verified to be identical,
except for the added tests in the patched tree. This was done to ensure
that the patch does not introduce regressions.
Index: gcc/testsuite/gcc.dg/utf-array.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-array.c (revision 0)
+++ gcc/testsuite/gcc.dg/utf-array.c (revision 0)
@@ -0,0 +1,41 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+#include <wchar.h>
+
+typedef short unsigned int char16_t;
+typedef unsigned int char32_t;
+
+const char s_0[] = "ab";
+const char s_1[] = u"ab"; /* { dg-error "from wide string" } */
+const char s_2[] = U"ab"; /* { dg-error "from wide string" } */
+const char s_3[] = L"ab"; /* { dg-error "from wide string" } */
+
+const char16_t s16_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char16_t s16_1[] = u"ab";
+const char16_t s16_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const char16_t s16_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s16_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_6[2] = u"ab";
+const char16_t s16_7[3] = u"ab";
+const char16_t s16_8[4] = u"ab";
+
+const char32_t s32_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char32_t s32_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const char32_t s32_2[] = U"ab";
+const char32_t s32_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s32_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_6[2] = u"ab";
+const char16_t s32_7[3] = u"ab";
+const char16_t s32_8[4] = u"ab";
+
+const wchar_t sw_0[] = "ab"; /* { dg-error "from non-wide" } */
+const wchar_t sw_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_3[] = L"ab";
Index: gcc/testsuite/gcc.dg/utf-inc-init.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-inc-init.c (revision 0)
+++ gcc/testsuite/gcc.dg/utf-inc-init.c (revision 0)
@@ -0,0 +1,45 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Test incremental initializers for char16_t/char32_t arrays. */
+/* { dg-do run } */
+/* { dg-options "-std=gnu99" } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef short unsigned int char16_t;
+typedef unsigned int char32_t;
+
+extern int memcmp (const void *, const void *, size_t);
+extern void abort (void);
+extern void exit (int);
+
+struct A {
+ char16_t S[6];
+ int M;
+} a[] = { { { u"foo" }, 1 }, [0].S[2] = u'x', [0].S[4] = u'y' };
+struct A b[] = { { { u"foo" }, 1 }, [0] = { .S[0] = u'b' } };
+struct A c[] = { { { u"foo" }, 1 }, [0].S = { u"a" }, [0].M = 2 };
+
+struct B {
+ char32_t S[6];
+ int M;
+} d[] = { { { U"foo" }, 1 }, [0].S[2] = U'x', [0].S[4] = U'y' };
+struct B e[] = { { { U"foo" }, 1 }, [0] = { .S[0] = U'b' } };
+struct B f[] = { { { U"foo" }, 1 }, [0].S = { U"a" }, [0].M = 2 };
+
+int main (void)
+{
+ if (memcmp (a[0].S, u"fox\0y", 6 * sizeof(char16_t)) || a[0].M != 1)
+ abort ();
+ if (memcmp (b[0].S, u"b\0\0\0\0", 6) || b[0].M)
+ abort ();
+ if (memcmp (c[0].S, u"a\0\0\0\0", 6) || c[0].M != 2)
+ abort ();
+
+ if (memcmp (d[0].S, U"fox\0y", 6 * sizeof(char32_t)) || d[0].M != 1)
+ abort ();
+ if (memcmp (e[0].S, U"b\0\0\0\0", 6) || e[0].M)
+ abort ();
+ if (memcmp (f[0].S, U"a\0\0\0\0", 6) || f[0].M != 2)
+ abort ();
+
+ exit(0);
+}
Index: gcc/testsuite/gcc.dg/utf-array-short-wchar.c
===================================================================
--- gcc/testsuite/gcc.dg/utf-array-short-wchar.c (revision 0)
+++ gcc/testsuite/gcc.dg/utf-array-short-wchar.c (revision 0)
@@ -0,0 +1,41 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99 -fshort-wchar" } */
+
+#include <wchar.h>
+
+typedef short unsigned int char16_t;
+typedef unsigned int char32_t;
+
+const char s_0[] = "ab";
+const char s_1[] = u"ab"; /* { dg-error "from wide string" } */
+const char s_2[] = U"ab"; /* { dg-error "from wide string" } */
+const char s_3[] = L"ab"; /* { dg-error "from wide string" } */
+
+const char16_t s16_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char16_t s16_1[] = u"ab";
+const char16_t s16_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const char16_t s16_3[] = L"ab";
+
+const char16_t s16_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_6[2] = u"ab";
+const char16_t s16_7[3] = u"ab";
+const char16_t s16_8[4] = u"ab";
+
+const char32_t s32_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char32_t s32_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const char32_t s32_2[] = U"ab";
+const char32_t s32_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s32_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_6[2] = u"ab";
+const char16_t s32_7[3] = u"ab";
+const char16_t s32_8[4] = u"ab";
+
+const wchar_t sw_0[] = "ab"; /* { dg-error "from non-wide" } */
+const wchar_t sw_1[] = u"ab";
+const wchar_t sw_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_3[] = L"ab";
Index: gcc/testsuite/g++.dg/ext/utf-array.C
===================================================================
--- gcc/testsuite/g++.dg/ext/utf-array.C (revision 0)
+++ gcc/testsuite/g++.dg/ext/utf-array.C (revision 0)
@@ -0,0 +1,36 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=c++0x" } */
+
+const char s_0[] = "ab";
+const char s_1[] = u"ab"; /* { dg-error "from wide string" } */
+const char s_2[] = U"ab"; /* { dg-error "from wide string" } */
+const char s_3[] = L"ab"; /* { dg-error "from wide string" } */
+
+const char16_t s16_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char16_t s16_1[] = u"ab";
+const char16_t s16_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const char16_t s16_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s16_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_6[2] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_7[3] = u"ab";
+const char16_t s16_8[4] = u"ab";
+
+const char32_t s32_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char32_t s32_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const char32_t s32_2[] = U"ab";
+const char32_t s32_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s32_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_6[2] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_7[3] = u"ab";
+const char16_t s32_8[4] = u"ab";
+
+const wchar_t sw_0[] = "ab"; /* { dg-error "from non-wide" } */
+const wchar_t sw_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_3[] = L"ab";
Index: gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C
===================================================================
--- gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C (revision 0)
+++ gcc/testsuite/g++.dg/ext/utf-array-short-wchar.C (revision 0)
@@ -0,0 +1,36 @@
+/* Contributed by Kris Van Hees <kris.van.hees@oracle.com> */
+/* Expected errors for char16_t/char32_t string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=c++0x -fshort-wchar" } */
+
+const char s_0[] = "ab";
+const char s_1[] = u"ab"; /* { dg-error "from wide string" } */
+const char s_2[] = U"ab"; /* { dg-error "from wide string" } */
+const char s_3[] = L"ab"; /* { dg-error "from wide string" } */
+
+const char16_t s16_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char16_t s16_1[] = u"ab";
+const char16_t s16_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const char16_t s16_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s16_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_6[2] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s16_7[3] = u"ab";
+const char16_t s16_8[4] = u"ab";
+
+const char32_t s32_0[] = "ab"; /* { dg-error "from non-wide" } */
+const char32_t s32_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const char32_t s32_2[] = U"ab";
+const char32_t s32_3[] = L"ab"; /* { dg-error "from incompatible" } */
+
+const char16_t s32_4[0] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_5[1] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_6[2] = u"ab"; /* { dg-warning "chars is too long" } */
+const char16_t s32_7[3] = u"ab";
+const char16_t s32_8[4] = u"ab";
+
+const wchar_t sw_0[] = "ab"; /* { dg-error "from non-wide" } */
+const wchar_t sw_1[] = u"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_2[] = U"ab"; /* { dg-error "from incompatible" } */
+const wchar_t sw_3[] = L"ab";
Index: gcc/cp/typeck2.c
===================================================================
--- gcc/cp/typeck2.c (revision 134495)
+++ gcc/cp/typeck2.c (working copy)
@@ -671,17 +671,26 @@ digest_init (tree type, tree init)
{
tree char_type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (init)));
- if (char_type != char_type_node
- && TYPE_PRECISION (typ1) == BITS_PER_UNIT)
+ if (TYPE_PRECISION (typ1) == BITS_PER_UNIT)
{
- error ("char-array initialized from wide string");
- return error_mark_node;
+ if (char_type != char_type_node)
+ {
+ error ("char-array initialized from wide string");
+ return error_mark_node;
+ }
}
- if (char_type == char_type_node
- && TYPE_PRECISION (typ1) != BITS_PER_UNIT)
+ else
{
- error ("int-array initialized from non-wide string");
- return error_mark_node;
+ if (char_type == char_type_node)
+ {
+ error ("int-array initialized from non-wide string");
+ return error_mark_node;
+ }
+ else if (char_type != typ1)
+ {
+ error ("int-array initialized from incompatible wide string");
+ return error_mark_node;
+ }
}
TREE_TYPE (init) = type;
Index: gcc/c-typeck.c
===================================================================
--- gcc/c-typeck.c (revision 134495)
+++ gcc/c-typeck.c (working copy)
@@ -4708,47 +4708,56 @@ digest_init (tree type, tree init, bool
|| typ1 == signed_char_type_node
|| typ1 == unsigned_char_type_node);
bool wchar_array = !!comptypes (typ1, wchar_type_node);
- if (char_array || wchar_array)
+ bool char16_array = !!comptypes (typ1, char16_type_node);
+ bool char32_array = !!comptypes (typ1, char32_type_node);
+
+ if (char_array || wchar_array || char16_array || char32_array)
{
struct c_expr expr;
- bool char_string;
+ tree typ2 = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)));
expr.value = inside_init;
expr.original_code = (strict_string ? STRING_CST : ERROR_MARK);
maybe_warn_string_init (type, expr);
- char_string
- = (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (inside_init)))
- == char_type_node);
-
if (comptypes (TYPE_MAIN_VARIANT (TREE_TYPE (inside_init)),
TYPE_MAIN_VARIANT (type)))
return inside_init;
- if (!wchar_array && !char_string)
+ if (char_array)
{
- error_init ("char-array initialized from wide string");
- return error_mark_node;
+ if (typ2 != char_type_node)
+ {
+ error_init ("char-array initialized from wide string");
+ return error_mark_node;
+ }
}
- if (char_string && !char_array)
+ else
{
- error_init ("wchar_t-array initialized from non-wide string");
- return error_mark_node;
+ if (typ2 == char_type_node)
+ {
+ error_init ("wide character array initialized from non-wide "
+ "string");
+ return error_mark_node;
+ }
+ else if (!comptypes (typ1, typ2))
+ {
+ error_init ("wide character array initialized from "
+ "incompatible wide string");
+ return error_mark_node;
+ }
}
TREE_TYPE (inside_init) = type;
if (TYPE_DOMAIN (type) != 0
&& TYPE_SIZE (type) != 0
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
- /* Subtract 1 (or sizeof (wchar_t))
+ /* Subtract the size of a single (possibly wide) character
because it's ok to ignore the terminating null char
that is counted in the length of the constant. */
&& 0 > compare_tree_int (TYPE_SIZE_UNIT (type),
TREE_STRING_LENGTH (inside_init)
- - ((TYPE_PRECISION (typ1)
- != TYPE_PRECISION (char_type_node))
- ? (TYPE_PRECISION (wchar_type_node)
- / BITS_PER_UNIT)
- : 1)))
+ - (TYPE_PRECISION (typ1)
+ / BITS_PER_UNIT)))
pedwarn_init ("initializer-string for array of chars is too long");
return inside_init;
@@ -6096,15 +6105,7 @@ set_nonincremental_init_from_string (tre
gcc_assert (TREE_CODE (constructor_type) == ARRAY_TYPE);
- if (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (str)))
- == TYPE_PRECISION (char_type_node))
- wchar_bytes = 1;
- else
- {
- gcc_assert (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (str)))
- == TYPE_PRECISION (wchar_type_node));
- wchar_bytes = TYPE_PRECISION (wchar_type_node) / BITS_PER_UNIT;
- }
+ wchar_bytes = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (str))) / BITS_PER_UNIT;
charwidth = TYPE_PRECISION (char_type_node);
type = TREE_TYPE (constructor_type);
p = TREE_STRING_POINTER (str);