Unicode mangling (was Re: [PATCH] Java: New C++ ABI compatibility changes.)

Alexandre Petit-Bianco apbianco@cygnus.com
Tue Jan 16 16:08:00 GMT 2001


Jason Merrill writes:

> I meant _NNNN and _LNNNNNNNN, actually, with a literal _ encoded as __.
> Only the last would actually require a change in the Java frontend.

OK. I have a patch for this. We now mangle something like `<clinit>'
to _ZN1f16_003cclinit_003eEv. A hypothetical `<cl_init>' would be
mangled `_ZN1f17_003ccl__nit_003eEv'.

> What was that used for?

I honestly don't know. Maybe Per remembers.

>>> With these fixes, I think the current scheme is OK.  But for targets
>>> with 8-bit clean binutils, I think it makes a lot of sense to just
>>> use the UTF8 encoding in the symbol.
>>
>> That's fine too, but requires coordinated changes in binutils.
>
> Does it?  Having output filters on nm and such to convert from UTF8
> to the current locale's encoding would be good, but not strictly
> necessary.

I'm not quite sure about that -- it's true we don't look at UTF8
encoded symbols that often (even though it depends in which foreign
language you code.) but I would hate having a garbled output. May be
it's just me.

Anyways, I'll check this patch in soon. We can always modify mangling
later to address the case of targets with 8-bit clean binutils (how do
we configure for that?)

./A

2001-01-16  Alexandre Petit-Bianco  <apbianco@cygnus.com>

	* mangle.c (mangle_member_name): Return void.
	(mangle_field_decl): Don't append `U' in escaped names.
	(mangle_method_decl): Likewise.
	(mangle_member_name): Just use append_gpp_mangled_name.
	(unicode_mangling_length): Fixed leading comment. Consider
	mangling `$'. Account for possible mangling of `_'.
	(append_unicode_mangled_name): Consider mangling `$'. Account for
	possible mangling of `_'.
	(append_gpp_mangled_name): Fixed leading comment. Don't prefix
	length with `U' when escapes are necessary.

Index: mangle.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/mangle.c,v
retrieving revision 1.11
diff -u -p -r1.11 mangle.c
--- mangle.c	2001/01/15 08:01:22	1.11
+++ mangle.c	2001/01/17 00:04:57
@@ -59,7 +59,7 @@ static void compression_table_add PARAMS
 static void append_unicode_mangled_name PARAMS ((const char *, int));
 static void append_gpp_mangled_name PARAMS ((const char *, int));
 static int  unicode_mangling_length PARAMS ((const char *, int));
-static int  mangle_member_name PARAMS ((tree));
+static void mangle_member_name PARAMS ((tree));
 
 /* We use an incoming obstack, always to be provided to the interface
    functions. */
@@ -123,19 +123,14 @@ static void
 mangle_field_decl (decl)
      tree decl;
 {
-  tree name = DECL_NAME (decl);
-  int field_name_needs_escapes = 0;
-
   /* Mangle the name of the this the field belongs to */
   mangle_record_type (DECL_CONTEXT (decl), /* from_pointer = */ 0);
   
   /* Mangle the name of the field */
-  field_name_needs_escapes = mangle_member_name (name);
+  mangle_member_name (DECL_NAME (decl));
 
   /* Terminate the mangled name */
   obstack_1grow (mangle_obstack, 'E');
-  if (field_name_needs_escapes)
-    obstack_1grow (mangle_obstack, 'U');
 }
 
 /* This mangles a method decl, first mangling its name and then all
@@ -147,7 +142,6 @@ mangle_method_decl (mdecl)
 {
   tree method_name = DECL_NAME (mdecl);
   tree arglist;
-  int method_name_needs_escapes = 0;
 
   /* Mangle the name of the type that contains mdecl */
   mangle_record_type (DECL_CONTEXT (mdecl), /* from_pointer = */ 0);
@@ -172,7 +166,7 @@ mangle_method_decl (mdecl)
 	obstack_grow (mangle_obstack, "C1", 2);
     }
   else
-    method_name_needs_escapes = mangle_member_name (method_name);
+    mangle_member_name (method_name);
   obstack_1grow (mangle_obstack, 'E');
 
   /* We mangled type.methodName. Now onto the arguments. */
@@ -189,31 +183,18 @@ mangle_method_decl (mdecl)
       for (arg = arglist; arg != end_params_node;  arg = TREE_CHAIN (arg))
 	mangle_type (TREE_VALUE (arg));
     }
-
-  /* Terminate the mangled name */
-  if (method_name_needs_escapes)
-    obstack_1grow (mangle_obstack, 'U');
 }
 
 /* This mangles a member name, like a function name or a field
    name. Handle cases were `name' is a C++ keyword.  Return a non zero
    value if unicode encoding was required.  */
 
-static int
+static void
 mangle_member_name (name)
      tree name;
 {
-  const char * name_string = IDENTIFIER_POINTER (name);
-  int len = IDENTIFIER_LENGTH (name);
-  int to_return = 0;
-
-  if (unicode_mangling_length (name_string, len) > 0)
-    {
-      append_unicode_mangled_name (name_string, len);
-      to_return = 1;
-    }
-  else
-    append_gpp_mangled_name (name_string, len);
+  append_gpp_mangled_name (IDENTIFIER_POINTER (name),
+			   IDENTIFIER_LENGTH (name));
 
   /* If NAME happens to be a C++ keyword, add `$' or `.' or `_'. */
   if (cxx_keyword_p (IDENTIFIER_POINTER (name), IDENTIFIER_LENGTH (name)))
@@ -228,13 +209,11 @@ mangle_member_name (name)
 #endif /* NO_DOT_IN_LABEL */
 #endif /* NO_DOLLAR_IN_LABEL */
     }
-
-  return to_return;
 }
 
-/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate
-   the length of the string as mangled (a la g++) including Unicode escapes.
-   If no escapes are needed, return 0. */
+/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
+   length of the string as mangled (a la g++) including Unicode
+   escapes.  If no escapes are needed, return 0.  */
 
 static int
 unicode_mangling_length (name, len)
@@ -255,18 +234,29 @@ unicode_mangling_length (name, len)
 	need_escapes += num_chars == 0;
       else if (ch == '_')
 	underscores++;
-      else if (ch != '$' && (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
+      else if (
+#ifdef NO_DOLLAR_IN_LABEL
+	       ch != '$' &&
+#endif
+	       (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
 	need_escapes++;
       num_chars++;
     }
+  /* If we need escapes, compute the length of the mangled symbol
+     string. Mangling a character that needs an escape adds 4 ASCII
+     characters to the string, and in this case, processing a `_' adds
+     one more. */
   if (need_escapes)
-    return num_chars + 4 * (need_escapes + underscores);
+    return num_chars + 4 * need_escapes + underscores;
   else
     return 0;
 }
 
 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
-   appropriately mangled (with Unicode escapes) to OBSTACK. */
+   appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
+   Characters needing an escape are encoded _UNNNN, in which case `_'
+   will be mangled `__'. `$' is mangled `$' or _U0024 according to
+   NO_DOLLAR_IN_LABEL.  */
 
 static void
 append_unicode_mangled_name (name, len)
@@ -287,22 +277,31 @@ append_unicode_mangled_name (name, len)
       if (ch >= '0' && ch <= '9')
 	emit_escape = (ptr == (const unsigned char *) name);
       else
-	emit_escape = (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z');
+	emit_escape = 
+	  (
+#ifdef NO_DOLLAR_IN_LABEL
+	   ch != '$' &&
+#endif
+	   (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'));
       if (emit_escape)
 	{
-	  char buf[6];
-	  sprintf (buf, "_%04x", ch);
-	  obstack_grow (mangle_obstack, buf, 5);
+	  if (ch == '_')
+	    obstack_grow (mangle_obstack, "__", 2);
+	  else
+	    {
+	      char buf[6];
+	      sprintf (buf, "_%04x", ch);
+	      obstack_grow (mangle_obstack, buf, 5);
+	    }
 	}
       else
-	{
-	  obstack_1grow (mangle_obstack, ch);
-	}
+	obstack_1grow (mangle_obstack, ch);
     }
 }
 
 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
-   appropriately mangled (with Unicode escapes if needed) to OBSTACK. */
+   appropriately mangled (with Unicode escapes if needed) to
+   MANGLE_OBSTACK.  */
 
 static void
 append_gpp_mangled_name (name, len)
@@ -312,18 +311,14 @@ append_gpp_mangled_name (name, len)
   int encoded_len = unicode_mangling_length (name, len);
   int needs_escapes = encoded_len > 0;
   char buf[6];
+  
+  sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
+  obstack_grow (mangle_obstack, buf, strlen (buf));
+  
   if (needs_escapes)
-    {
-      sprintf (buf, "U%d", encoded_len);
-      obstack_grow (mangle_obstack, buf, strlen(buf));
-      append_unicode_mangled_name (name, len);
-    }
+    append_unicode_mangled_name (name, len);
   else
-    {
-      sprintf (buf, "%d", len);
-      obstack_grow (mangle_obstack, buf, strlen(buf));
-      obstack_grow (mangle_obstack, name, len);
-    }
+    obstack_grow (mangle_obstack, name, len);
 }
 
 /* Append the mangled name of TYPE onto OBSTACK.  */


More information about the Java mailing list