This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Patch: PR java/5923
- From: Tom Tromey <tromey at redhat dot com>
- To: Java Patch List <java-patches at gcc dot gnu dot org>
- Cc: Gcc Patch List <gcc-patches at gcc dot gnu dot org>, Alexandre Petit-Bianco <apbianco at redhat dot com>
- Date: 12 Mar 2002 16:21:54 -0700
- Subject: Patch: PR java/5923
- Reply-to: tromey at redhat dot com
This patch fixes PR 5923.
This appears to be some old code that thought that strings should be
represented in UCS-2. However, that doesn't work -- the string is
truncated at the first \0.
This patch changes the code to check the UTF-8 and then return a UTF-8
string.
Tested on x86 RHL 6.2.
Ok for trunk and branch?
Tom
Index: ChangeLog
from Tom Tromey <tromey@redhat.com>
* jcf-parse.c (get_constant) [CONSTANT_String]: String values are
UTF-8, not UCS-2. Fixes PR java/5923.
Index: jcf-parse.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/java/jcf-parse.c,v
retrieving revision 1.107
diff -u -r1.107 jcf-parse.c
--- jcf-parse.c 2002/03/03 21:10:09 1.107
+++ jcf-parse.c 2002/03/12 22:19:41
@@ -350,13 +350,11 @@
unsigned char *str_ptr;
unsigned char *str;
const unsigned char *utf8;
- int i, str_len;
+ int i;
- /* Count the number of Unicode characters in the string,
- while checking for a malformed Utf8 string. */
+ /* Check for a malformed Utf8 string. */
utf8 = (const unsigned char *) utf8_ptr;
i = utf8_len;
- str_len = 0;
while (i > 0)
{
int char_len = UT8_CHAR_LENGTH (*utf8);
@@ -365,48 +363,10 @@
utf8 += char_len;
i -= char_len;
- str_len++;
}
- /* Allocate a scratch buffer, convert the string to UCS2, and copy it
- into the new space. */
- str_ptr = (unsigned char *) alloca (2 * str_len);
- str = str_ptr;
- utf8 = (const unsigned char *)utf8_ptr;
-
- for (i = 0; i < str_len; i++)
- {
- int char_value;
- int char_len = UT8_CHAR_LENGTH (*utf8);
- switch (char_len)
- {
- case 1:
- char_value = *utf8++;
- break;
- case 2:
- char_value = *utf8++ & 0x1F;
- char_value = (char_value << 6) | (*utf8++ & 0x3F);
- break;
- case 3:
- char_value = *utf8++ & 0x0F;
- char_value = (char_value << 6) | (*utf8++ & 0x3F);
- char_value = (char_value << 6) | (*utf8++ & 0x3F);
- break;
- default:
- goto bad;
- }
- if (BYTES_BIG_ENDIAN)
- {
- *str++ = char_value >> 8;
- *str++ = char_value & 0xFF;
- }
- else
- {
- *str++ = char_value & 0xFF;
- *str++ = char_value >> 8;
- }
- }
- value = build_string (str - str_ptr, str_ptr);
+ /* Allocate a new string value. */
+ value = build_string (utf8_len, utf8_ptr);
TREE_TYPE (value) = build_pointer_type (string_type_node);
}
break;