This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Java: mangling of unicode characters.
Jason Merrill writes:
> Yes, that's what I meant.
OK. So I'm checking this in. Java hackers will have to rebuild their
runtime entirely. Note that it's been tested on x86/alpha/PPC linux.
./A
2001-02-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
* Make-lang.in (JAVA_OBJS): Added java/mangle_name.o
(JVGENMAIN_OBJS): Likewise.
* java-tree.h (append_gpp_mangled_name): New prototype.
* jcf-parse.c (ggc_mark_jcf): Argument now `void *.'
Removed cast calling `gcc_add_root.'
* jvgenmain.c (mangle_obstack): New global, initialized.
(main): Use it.
(do_mangle_class): Constify local `ptr.'
Removed macro `MANGLE_NAME.' Removed cast in `for.' Call
append_gpp_mangle_name and update `count' if necessary.
Use `mangle_obstack.'
* mangle.c (append_unicode_mangled_name): Removed.
(append_gpp_mangled_name): Likewise.
(unicode_mangling_length): Likewise.
(mangle_member_name): Return type set to `void.'
(mangle_field_decl): Don't append `U' in escaped names.
(mangle_method_decl): Likewise.
(mangle_member_name): Just use `append_gpp_mangled_name.'
* mangle_name.c: New file.
Index: Make-lang.in
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/Make-lang.in,v
retrieving revision 1.50
diff -u -p -r1.50 Make-lang.in
--- Make-lang.in 2001/02/05 05:46:15 1.50
+++ Make-lang.in 2001/02/08 18:59:39
@@ -100,6 +100,7 @@ $(srcdir)/java/keyword.h: $(srcdir)/java
JAVA_OBJS = java/parse.o java/class.o java/decl.o java/expr.o \
java/constants.o java/lang.o java/typeck.o java/except.o java/verify.o \
java/zextract.o java/jcf-io.o java/jcf-parse.o java/mangle.o \
+ java/mangle_name.o \
java/jcf-write.o java/buffer.o java/check-init.o java/jcf-depend.o \
java/jcf-path.o java/xref.o java/boehm.o mkdeps.o
@@ -111,7 +112,7 @@ JVSCAN_OBJS = java/parse-scan.o java/jv-
JCFDUMP_OBJS = java/jcf-dump.o java/jcf-io.o java/jcf-depend.o java/jcf-path.o \
java/zextract.o errors.o version.o mkdeps.o
-JVGENMAIN_OBJS = java/jvgenmain.o
+JVGENMAIN_OBJS = java/jvgenmain.o java/mangle_name.o
# Use loose warnings for this front end.
java-warn =
Index: java-tree.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/java-tree.h,v
retrieving revision 1.96
diff -u -p -r1.96 java-tree.h
--- java-tree.h 2001/02/04 22:44:02 1.96
+++ java-tree.h 2001/02/08 18:59:44
@@ -1115,6 +1115,7 @@ extern tree java_mangle_class_field PARA
extern tree java_mangle_class_field_from_string PARAMS ((struct obstack *, char *));
extern tree java_mangle_vtable PARAMS ((struct obstack *, tree));
extern const char *lang_printable_name_wls PARAMS ((tree, int));
+extern void append_gpp_mangled_name PARAMS ((const char *, int));
/* We use ARGS_SIZE_RTX to indicate that gcc/expr.h has been included
to declare `enum expand_modifier'. */
Index: jvgenmain.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/jvgenmain.c,v
retrieving revision 1.20
diff -u -p -r1.20 jvgenmain.c
--- jvgenmain.c 2001/02/02 00:15:43 1.20
+++ jvgenmain.c 2001/02/08 18:59:44
@@ -34,7 +34,8 @@ The Free Software Foundation is independ
static char * do_mangle_classname PARAMS ((const char *string));
-struct obstack name_obstack;
+struct obstack name_obstack;
+struct obstack *mangle_obstack = &name_obstack;
void
gcc_obstack_init (obstack)
@@ -92,7 +93,7 @@ main (int argc, const char **argv)
classname = argv[i];
- gcc_obstack_init (&name_obstack);
+ gcc_obstack_init (mangle_obstack);
mangled_classname = do_mangle_classname (classname);
if (i < argc - 1 && strcmp (argv[i + 1], "-") != 0)
@@ -150,30 +151,22 @@ static char *
do_mangle_classname (string)
const char *string;
{
- char *ptr;
+ const char *ptr;
int count = 0;
-#define MANGLE_NAME() \
- { \
- char buffer [128]; \
- sprintf (buffer, "%d", count); \
- obstack_grow (&name_obstack, buffer, strlen (buffer)); \
- obstack_grow (&name_obstack, & ptr [-count], count); \
- count = 0; \
- }
-
obstack_grow (&name_obstack, "_ZN", 3);
- for (ptr = (char *)string; *ptr; ptr++ )
+ for (ptr = string; *ptr; ptr++ )
{
if (ptr[0] == '.')
{
- MANGLE_NAME ();
+ append_gpp_mangled_name (&ptr [-count], count);
+ count = 0;
}
else
count++;
}
- MANGLE_NAME ();
- obstack_grow0 (&name_obstack, "6class$E", 8);
- return obstack_finish (&name_obstack);
+ append_gpp_mangled_name (&ptr [-count], count);
+ obstack_grow (mangle_obstack, "6class$E", 8);
+ return obstack_finish (mangle_obstack);
}
Index: mangle.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/mangle.c,v
retrieving revision 1.13
diff -u -p -r1.13 mangle.c
--- mangle.c 2001/02/04 22:44:03 1.13
+++ mangle.c 2001/02/08 18:59:46
@@ -56,10 +56,7 @@ static void init_mangling PARAMS ((struc
static tree finish_mangling PARAMS ((void));
static void compression_table_add PARAMS ((tree));
-static void append_unicode_mangled_name PARAMS ((const char *, int));
-static void append_gpp_mangled_name PARAMS ((const char *, int));
-static int unicode_mangling_length PARAMS ((const char *, int));
-static int mangle_member_name PARAMS ((tree));
+static void mangle_member_name PARAMS ((tree));
/* We use an incoming obstack, always to be provided to the interface
functions. */
@@ -122,19 +119,14 @@ static void
mangle_field_decl (decl)
tree decl;
{
- tree name = DECL_NAME (decl);
- int field_name_needs_escapes = 0;
-
/* Mangle the name of the this the field belongs to */
mangle_record_type (DECL_CONTEXT (decl), /* from_pointer = */ 0);
/* Mangle the name of the field */
- field_name_needs_escapes = mangle_member_name (name);
+ mangle_member_name (DECL_NAME (decl));
/* Terminate the mangled name */
obstack_1grow (mangle_obstack, 'E');
- if (field_name_needs_escapes)
- obstack_1grow (mangle_obstack, 'U');
}
/* This mangles a method decl, first mangling its name and then all
@@ -146,7 +138,6 @@ mangle_method_decl (mdecl)
{
tree method_name = DECL_NAME (mdecl);
tree arglist;
- int method_name_needs_escapes = 0;
/* Mangle the name of the type that contains mdecl */
mangle_record_type (DECL_CONTEXT (mdecl), /* from_pointer = */ 0);
@@ -166,7 +157,7 @@ mangle_method_decl (mdecl)
obstack_grow (mangle_obstack, "C1", 2);
}
else
- method_name_needs_escapes = mangle_member_name (method_name);
+ mangle_member_name (method_name);
obstack_1grow (mangle_obstack, 'E');
/* We mangled type.methodName. Now onto the arguments. */
@@ -183,32 +174,19 @@ mangle_method_decl (mdecl)
for (arg = arglist; arg != end_params_node; arg = TREE_CHAIN (arg))
mangle_type (TREE_VALUE (arg));
}
-
- /* Terminate the mangled name */
- if (method_name_needs_escapes)
- obstack_1grow (mangle_obstack, 'U');
}
/* This mangles a member name, like a function name or a field
name. Handle cases were `name' is a C++ keyword. Return a non zero
value if unicode encoding was required. */
-static int
+static void
mangle_member_name (name)
tree name;
{
- const char * name_string = IDENTIFIER_POINTER (name);
- int len = IDENTIFIER_LENGTH (name);
- int to_return = 0;
+ append_gpp_mangled_name (IDENTIFIER_POINTER (name),
+ IDENTIFIER_LENGTH (name));
- if (unicode_mangling_length (name_string, len) > 0)
- {
- append_unicode_mangled_name (name_string, len);
- to_return = 1;
- }
- else
- append_gpp_mangled_name (name_string, len);
-
/* If NAME happens to be a C++ keyword, add `$' or `.' or `_'. */
if (cxx_keyword_p (IDENTIFIER_POINTER (name), IDENTIFIER_LENGTH (name)))
{
@@ -221,102 +199,6 @@ mangle_member_name (name)
obstack_1grow (mangle_obstack, '_');
#endif /* NO_DOT_IN_LABEL */
#endif /* NO_DOLLAR_IN_LABEL */
- }
-
- return to_return;
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate
- the length of the string as mangled (a la g++) including Unicode escapes.
- If no escapes are needed, return 0. */
-
-static int
-unicode_mangling_length (name, len)
- const char *name;
- int len;
-{
- const unsigned char *ptr;
- const unsigned char *limit = (const unsigned char *)name + len;
- int need_escapes = 0;
- int num_chars = 0;
- int underscores = 0;
- for (ptr = (const unsigned char *) name; ptr < limit; )
- {
- int ch = UTF8_GET(ptr, limit);
- if (ch < 0)
- error ("internal error - invalid Utf8 name");
- if (ch >= '0' && ch <= '9')
- need_escapes += num_chars == 0;
- else if (ch == '_')
- underscores++;
- else if (ch != '$' && (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
- need_escapes++;
- num_chars++;
- }
- if (need_escapes)
- return num_chars + 4 * (need_escapes + underscores);
- else
- return 0;
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
- appropriately mangled (with Unicode escapes) to OBSTACK. */
-
-static void
-append_unicode_mangled_name (name, len)
- const char *name;
- int len;
-{
- const unsigned char *ptr;
- const unsigned char *limit = (const unsigned char *)name + len;
- for (ptr = (const unsigned char *) name; ptr < limit; )
- {
- int ch = UTF8_GET(ptr, limit);
- int emit_escape;
- if (ch < 0)
- {
- error ("internal error - bad Utf8 string");
- break;
- }
- if (ch >= '0' && ch <= '9')
- emit_escape = (ptr == (const unsigned char *) name);
- else
- emit_escape = (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z');
- if (emit_escape)
- {
- char buf[6];
- sprintf (buf, "_%04x", ch);
- obstack_grow (mangle_obstack, buf, 5);
- }
- else
- {
- obstack_1grow (mangle_obstack, ch);
- }
- }
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
- appropriately mangled (with Unicode escapes if needed) to OBSTACK. */
-
-static void
-append_gpp_mangled_name (name, len)
- const char *name;
- int len;
-{
- int encoded_len = unicode_mangling_length (name, len);
- int needs_escapes = encoded_len > 0;
- char buf[6];
- if (needs_escapes)
- {
- sprintf (buf, "U%d", encoded_len);
- obstack_grow (mangle_obstack, buf, strlen(buf));
- append_unicode_mangled_name (name, len);
- }
- else
- {
- sprintf (buf, "%d", len);
- obstack_grow (mangle_obstack, buf, strlen(buf));
- obstack_grow (mangle_obstack, name, len);
}
}
Index: mangle_name.c
===================================================================
RCS file: mangle_name.c
diff -N mangle_name.c
--- /dev/null Tue May 5 13:32:27 1998
+++ mangle_name.c Thu Feb 8 10:59:46 2001
@@ -0,0 +1,223 @@
+/* Shared functions related to mangling names for the GNU compiler
+ for the Java(TM) language.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.
+
+Java and all Java-based marks are trademarks or registered trademarks
+of Sun Microsystems, Inc. in the United States and other countries.
+The Free Software Foundation is independent of Sun Microsystems, Inc. */
+
+/* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
+
+#include "config.h"
+#include "system.h"
+#include "jcf.h"
+#include "tree.h"
+#include "java-tree.h"
+#include "obstack.h"
+#include "toplev.h"
+#include "obstack.h"
+
+static void append_unicode_mangled_name PARAMS ((const char *, int));
+#ifndef HAVE_AS_UTF8
+static int unicode_mangling_length PARAMS ((const char *, int));
+#endif
+
+extern struct obstack *mangle_obstack;
+
+/* If the assembler doesn't support UTF8 in symbol names, some
+ characters might need to be escaped. */
+
+#ifndef HAVE_AS_UTF8
+
+/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
+ appropriately mangled (with Unicode escapes if needed) to
+ MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so
+ frequently that they could be cached. */
+
+void
+append_gpp_mangled_name (name, len)
+ const char *name;
+ int len;
+{
+ int encoded_len = unicode_mangling_length (name, len);
+ int needs_escapes = encoded_len > 0;
+ char buf[6];
+
+ sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
+ obstack_grow (mangle_obstack, buf, strlen (buf));
+
+ if (needs_escapes)
+ append_unicode_mangled_name (name, len);
+ else
+ obstack_grow (mangle_obstack, name, len);
+}
+
+/* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
+ appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
+ Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
+ which case `__U' will be mangled `__U_'. `$' is mangled `$' or
+ __U24_ according to NO_DOLLAR_IN_LABEL. */
+
+static void
+append_unicode_mangled_name (name, len)
+ const char *name;
+ int len;
+{
+ const unsigned char *ptr;
+ const unsigned char *limit = (const unsigned char *)name + len;
+ int uuU = 0;
+ for (ptr = (const unsigned char *) name; ptr < limit; )
+ {
+ int ch = UTF8_GET(ptr, limit);
+
+ if ((ch >= '0' && ch <= '9')
+#ifndef NO_DOLLAR_IN_LABEL
+ || ch == '$'
+#endif
+ || (ch >= 'a' && ch <= 'z')
+ || (ch >= 'A' && ch <= 'Z' && ch != 'U'))
+ obstack_1grow (mangle_obstack, ch);
+ /* Everything else needs encoding */
+ else
+ {
+ char buf [9];
+ if (ch == '_' || ch == 'U')
+ {
+ /* Prepare to recognize __U */
+ if (ch == '_' && (uuU < 3))
+ {
+ uuU++;
+ obstack_1grow (mangle_obstack, ch);
+ }
+ /* We recognize __U that we wish to encode
+ __U_. Finish the encoding. */
+ else if (ch == 'U' && (uuU == 2))
+ {
+ uuU = 0;
+ obstack_grow (mangle_obstack, "U_", 2);
+ }
+ continue;
+ }
+ sprintf (buf, "__U%x_", ch);
+ obstack_grow (mangle_obstack, buf, strlen (buf));
+ uuU = 0;
+ }
+ }
+}
+
+/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
+ length of the string as mangled (a la g++) including Unicode
+ escapes. If no escapes are needed, return 0. */
+
+static int
+unicode_mangling_length (name, len)
+ const char *name;
+ int len;
+{
+ const unsigned char *ptr;
+ const unsigned char *limit = (const unsigned char *)name + len;
+ int need_escapes = 0; /* Whether we need an escape or not */
+ int num_chars = 0; /* Number of characters in the mangled name */
+ int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */
+ for (ptr = (const unsigned char *) name; ptr < limit; )
+ {
+ int ch = UTF8_GET(ptr, limit);
+
+ if (ch < 0)
+ error ("internal error - invalid Utf8 name");
+ if ((ch >= '0' && ch <= '9')
+#ifndef NO_DOLLAR_IN_LABEL
+ || ch == '$'
+#endif
+ || (ch >= 'a' && ch <= 'z')
+ || (ch >= 'A' && ch <= 'Z' && ch != 'U'))
+ num_chars++;
+ /* Everything else needs encoding */
+ else
+ {
+ int encoding_length = 2;
+
+ if (ch == '_' || ch == 'U')
+ {
+ /* Prepare to recognize __U */
+ if (ch == '_' && (uuU < 3))
+ {
+ num_chars++;
+ uuU++;
+ }
+ /* We recognize __U that we wish to encode __U_ */
+ else if (ch == 'U' && (uuU == 2))
+ {
+ num_chars += 2;
+ need_escapes = 1;
+ uuU = 0;
+ }
+ continue;
+ }
+
+ if (ch > 0xff)
+ encoding_length++;
+ if (ch > 0xfff)
+ encoding_length++;
+
+ num_chars += (4 + encoding_length);
+ need_escapes = 1;
+ uuU = 0;
+ }
+ }
+ if (need_escapes)
+ return num_chars;
+ else
+ return 0;
+}
+
+#else
+
+/* The assembler supports UTF8, we don't use escapes. Mangling is
+ simply <N>NAME. <N> is the number of UTF8 encoded characters that
+ are found in NAME. Note that `java', `lang' and `Object' are used
+ so frequently that they could be cached. */
+
+void
+append_gpp_mangled_name (name, len)
+ const char *name;
+ int len;
+{
+ const unsigned char *ptr;
+ const unsigned char *limit = (const unsigned char *)name + len;
+ int encoded_len;
+ char buf [6];
+
+ /* Compute the length of the string we wish to mangle. */
+ for (encoded_len = 0, ptr = (const unsigned char *) name;
+ ptr < limit; encoded_len++)
+ {
+ int ch = UTF8_GET(ptr, limit);
+
+ if (ch < 0)
+ error ("internal error - invalid Utf8 name");
+ }
+
+ sprintf (buf, "%d", encoded_len);
+ obstack_grow (mangle_obstack, buf, strlen (buf));
+ obstack_grow (mangle_obstack, name, len);
+}
+
+#endif /* HAVE_AS_UTF8 */