Unicode mangling (was Re: [PATCH] Java: New C++ ABI compatibility changes.)
Alexandre Petit-Bianco
apbianco@cygnus.com
Tue Jan 16 16:08:00 GMT 2001
Jason Merrill writes:
> I meant _NNNN and _LNNNNNNNN, actually, with a literal _ encoded as __.
> Only the last would actually require a change in the Java frontend.
OK. I have a patch for this. We now mangle something like `<clinit>'
to _ZN1f16_003cclinit_003eEv. A hypothetical `<cl_init>' would be
mangled `_ZN1f17_003ccl__nit_003eEv'.
> What was that used for?
I honestly don't know. Maybe Per remembers.
>>> With these fixes, I think the current scheme is OK. But for targets
>>> with 8-bit clean binutils, I think it makes a lot of sense to just
>>> use the UTF8 encoding in the symbol.
>>
>> That's fine too, but requires coordinated changes in binutils.
>
> Does it? Having output filters on nm and such to convert from UTF8
> to the current locale's encoding would be good, but not strictly
> necessary.
I'm not quite sure about that -- it's true we don't look at UTF8
encoded symbols that often (even though it depends in which foreign
language you code.) but I would hate having a garbled output. May be
it's just me.
Anyways, I'll check this patch in soon. We can always modify mangling
later to address the case of targets with 8-bit clean binutils (how do
we configure for that?)
./A
2001-01-16 Alexandre Petit-Bianco <apbianco@cygnus.com>
* mangle.c (mangle_member_name): Return void.
(mangle_field_decl): Don't append `U' in escaped names.
(mangle_method_decl): Likewise.
(mangle_member_name): Just use append_gpp_mangled_name.
(unicode_mangling_length): Fixed leading comment. Consider
mangling `$'. Account for possible mangling of `_'.
(append_unicode_mangled_name): Consider mangling `$'. Account for
possible mangling of `_'.
(append_gpp_mangled_name): Fixed leading comment. Don't prefix
length with `U' when escapes are necessary.
Index: mangle.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/mangle.c,v
retrieving revision 1.11
diff -u -p -r1.11 mangle.c
--- mangle.c 2001/01/15 08:01:22 1.11
+++ mangle.c 2001/01/17 00:04:57
@@ -59,7 +59,7 @@ static void compression_table_add PARAMS
static void append_unicode_mangled_name PARAMS ((const char *, int));
static void append_gpp_mangled_name PARAMS ((const char *, int));
static int unicode_mangling_length PARAMS ((const char *, int));
-static int mangle_member_name PARAMS ((tree));
+static void mangle_member_name PARAMS ((tree));
/* We use an incoming obstack, always to be provided to the interface
functions. */
@@ -123,19 +123,14 @@ static void
mangle_field_decl (decl)
tree decl;
{
- tree name = DECL_NAME (decl);
- int field_name_needs_escapes = 0;
-
/* Mangle the name of the this the field belongs to */
mangle_record_type (DECL_CONTEXT (decl), /* from_pointer = */ 0);
/* Mangle the name of the field */
- field_name_needs_escapes = mangle_member_name (name);
+ mangle_member_name (DECL_NAME (decl));
/* Terminate the mangled name */
obstack_1grow (mangle_obstack, 'E');
- if (field_name_needs_escapes)
- obstack_1grow (mangle_obstack, 'U');
}
/* This mangles a method decl, first mangling its name and then all
@@ -147,7 +142,6 @@ mangle_method_decl (mdecl)
{
tree method_name = DECL_NAME (mdecl);
tree arglist;
- int method_name_needs_escapes = 0;
/* Mangle the name of the type that contains mdecl */
mangle_record_type (DECL_CONTEXT (mdecl), /* from_pointer = */ 0);
@@ -172,7 +166,7 @@ mangle_method_decl (mdecl)
obstack_grow (mangle_obstack, "C1", 2);
}
else
- method_name_needs_escapes = mangle_member_name (method_name);
+ mangle_member_name (method_name);
obstack_1grow (mangle_obstack, 'E');
/* We mangled type.methodName. Now onto the arguments. */
@@ -189,31 +183,18 @@ mangle_method_decl (mdecl)
for (arg = arglist; arg != end_params_node; arg = TREE_CHAIN (arg))
mangle_type (TREE_VALUE (arg));
}
-
- /* Terminate the mangled name */
- if (method_name_needs_escapes)
- obstack_1grow (mangle_obstack, 'U');
}
/* This mangles a member name, like a function name or a field
name. Handle cases were `name' is a C++ keyword. Return a non zero
value if unicode encoding was required. */
-static int
+static void
mangle_member_name (name)
tree name;
{
- const char * name_string = IDENTIFIER_POINTER (name);
- int len = IDENTIFIER_LENGTH (name);
- int to_return = 0;
-
- if (unicode_mangling_length (name_string, len) > 0)
- {
- append_unicode_mangled_name (name_string, len);
- to_return = 1;
- }
- else
- append_gpp_mangled_name (name_string, len);
+ append_gpp_mangled_name (IDENTIFIER_POINTER (name),
+ IDENTIFIER_LENGTH (name));
/* If NAME happens to be a C++ keyword, add `$' or `.' or `_'. */
if (cxx_keyword_p (IDENTIFIER_POINTER (name), IDENTIFIER_LENGTH (name)))
@@ -228,13 +209,11 @@ mangle_member_name (name)
#endif /* NO_DOT_IN_LABEL */
#endif /* NO_DOLLAR_IN_LABEL */
}
-
- return to_return;
}
-/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate
- the length of the string as mangled (a la g++) including Unicode escapes.
- If no escapes are needed, return 0. */
+/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
+ length of the string as mangled (a la g++) including Unicode
+ escapes. If no escapes are needed, return 0. */
static int
unicode_mangling_length (name, len)
@@ -255,18 +234,29 @@ unicode_mangling_length (name, len)
need_escapes += num_chars == 0;
else if (ch == '_')
underscores++;
- else if (ch != '$' && (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
+ else if (
+#ifdef NO_DOLLAR_IN_LABEL
+ ch != '$' &&
+#endif
+ (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
need_escapes++;
num_chars++;
}
+ /* If we need escapes, compute the length of the mangled symbol
+ string. Mangling a character that needs an escape adds 4 ASCII
+ characters to the string, and in this case, processing a `_' adds
+ one more. */
if (need_escapes)
- return num_chars + 4 * (need_escapes + underscores);
+ return num_chars + 4 * need_escapes + underscores;
else
return 0;
}
/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
- appropriately mangled (with Unicode escapes) to OBSTACK. */
+ appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
+ Characters needing an escape are encoded _UNNNN, in which case `_'
+ will be mangled `__'. `$' is mangled `$' or _U0024 according to
+ NO_DOLLAR_IN_LABEL. */
static void
append_unicode_mangled_name (name, len)
@@ -287,22 +277,31 @@ append_unicode_mangled_name (name, len)
if (ch >= '0' && ch <= '9')
emit_escape = (ptr == (const unsigned char *) name);
else
- emit_escape = (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z');
+ emit_escape =
+ (
+#ifdef NO_DOLLAR_IN_LABEL
+ ch != '$' &&
+#endif
+ (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'));
if (emit_escape)
{
- char buf[6];
- sprintf (buf, "_%04x", ch);
- obstack_grow (mangle_obstack, buf, 5);
+ if (ch == '_')
+ obstack_grow (mangle_obstack, "__", 2);
+ else
+ {
+ char buf[6];
+ sprintf (buf, "_%04x", ch);
+ obstack_grow (mangle_obstack, buf, 5);
+ }
}
else
- {
- obstack_1grow (mangle_obstack, ch);
- }
+ obstack_1grow (mangle_obstack, ch);
}
}
/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
- appropriately mangled (with Unicode escapes if needed) to OBSTACK. */
+ appropriately mangled (with Unicode escapes if needed) to
+ MANGLE_OBSTACK. */
static void
append_gpp_mangled_name (name, len)
@@ -312,18 +311,14 @@ append_gpp_mangled_name (name, len)
int encoded_len = unicode_mangling_length (name, len);
int needs_escapes = encoded_len > 0;
char buf[6];
+
+ sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
+ obstack_grow (mangle_obstack, buf, strlen (buf));
+
if (needs_escapes)
- {
- sprintf (buf, "U%d", encoded_len);
- obstack_grow (mangle_obstack, buf, strlen(buf));
- append_unicode_mangled_name (name, len);
- }
+ append_unicode_mangled_name (name, len);
else
- {
- sprintf (buf, "%d", len);
- obstack_grow (mangle_obstack, buf, strlen(buf));
- obstack_grow (mangle_obstack, name, len);
- }
+ obstack_grow (mangle_obstack, name, len);
}
/* Append the mangled name of TYPE onto OBSTACK. */
More information about the Java
mailing list