This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[PATCH] Java: mangling of unicode characters.



I came up with this patch, which is still incomplete since it would
require suport in the C++ compiler to mangle `$' when
NO_DOLLAR_IN_LABEL isn't defined (which appears to be the case on
x86/linux for example.) For now, we still mangle `class$' as `6class$'
instead of `11class__U24_'. Note that this patch defines a new file so
we can link jvgenmain against a limited set of the mangling function
(append_gpp_mangled_name and some other statics.)

This complies with what was defined here:

  http://gcc.gnu.org/ml/gcc-patches/2001-01/msg01510.html

./A

2001-02-04  Alexandre Petit-Bianco  <apbianco@cygnus.com>

	* Make-lang.in (JAVA_OBJS): Added java/mangle_name.o
	(JVGENMAIN_OBJS): Likewise.
	* java-tree.h (append_gpp_mangled_name): New prototype.	
	* jcf-parse.c (ggc_mark_jcf): Argument now `void *.'
	Removed cast calling `gcc_add_root.'
	* jvgenmain.c (mangle_obstack): New global, initialized.
	(main): Use it.
	(do_mangle_class): Constify local `ptr.'
	Removed macro `MANGLE_NAME.' Removed cast in `for.' Call
	append_gpp_mangle_name and update `count' if necessary.
	Use `mangle_obstack.'
	* mangle.c (append_unicode_mangled_name): Removed.
	(append_gpp_mangled_name): Likewise.
	(unicode_mangling_length): Likewise.
	(mangle_member_name): Return void.
	(mangle_field_decl): Don't append `U' in escaped names.
	(mangle_method_decl): Likewise.
	(mangle_member_name): Just use append_gpp_mangled_name.
	* mangle_name.c: New file.

Index: Make-lang.in
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/Make-lang.in,v
retrieving revision 1.49
diff -u -p -r1.49 Make-lang.in
--- Make-lang.in	2001/01/28 01:50:21	1.49
+++ Make-lang.in	2001/02/04 20:21:11
@@ -100,6 +100,7 @@ $(srcdir)/java/keyword.h: $(srcdir)/java
 JAVA_OBJS = java/parse.o java/class.o java/decl.o java/expr.o \
   java/constants.o java/lang.o java/typeck.o java/except.o java/verify.o \
   java/zextract.o java/jcf-io.o java/jcf-parse.o java/mangle.o \
+  java/mangle_name.o \
   java/jcf-write.o java/buffer.o java/check-init.o java/jcf-depend.o \
   java/jcf-path.o java/xref.o java/boehm.o mkdeps.o
 
@@ -111,7 +112,7 @@ JVSCAN_OBJS = java/parse-scan.o java/jv-
 JCFDUMP_OBJS = java/jcf-dump.o java/jcf-io.o java/jcf-depend.o java/jcf-path.o \
 		java/zextract.o errors.o version.o mkdeps.o
 
-JVGENMAIN_OBJS = java/jvgenmain.o
+JVGENMAIN_OBJS = java/jvgenmain.o java/mangle_name.o
 
 # Use loose warnings for this front end.
 java-warn =
Index: java-tree.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/java-tree.h,v
retrieving revision 1.95
diff -u -p -r1.95 java-tree.h
--- java-tree.h	2001/01/30 00:37:21	1.95
+++ java-tree.h	2001/02/04 20:21:15
@@ -1115,6 +1115,7 @@ extern tree java_mangle_class_field PARA
 extern tree java_mangle_class_field_from_string PARAMS ((struct obstack *, char *));
 extern tree java_mangle_vtable PARAMS ((struct obstack *, tree));
 extern const char *lang_printable_name_wls PARAMS ((tree, int));
+extern void append_gpp_mangled_name PARAMS ((const char *, int));
 
 /* We use ARGS_SIZE_RTX to indicate that gcc/expr.h has been included
    to declare `enum expand_modifier'. */
Index: jcf-parse.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/jcf-parse.c,v
retrieving revision 1.74
diff -u -p -r1.74 jcf-parse.c
--- jcf-parse.c	2001/02/01 10:35:50	1.74
+++ jcf-parse.c	2001/02/04 20:21:18
@@ -95,14 +95,14 @@ static int jcf_figure_file_type PARAMS (
 static void parse_class_file PARAMS ((void));
 static void set_source_filename PARAMS ((JCF *, int));
 static int predefined_filename_p PARAMS ((tree));
-static void ggc_mark_jcf PARAMS ((void**));
+static void ggc_mark_jcf PARAMS ((void*));
 
 /* Mark (for garbage collection) all the tree nodes that are
    referenced from JCF's constant pool table. */
 
 static void
 ggc_mark_jcf (elt)
-     void **elt;
+     void *elt;
 {
   JCF *jcf = *(JCF**) elt;
   if (jcf != NULL)
@@ -1135,5 +1135,5 @@ init_jcf_parse ()
   /* Register roots with the garbage collector.  */
   ggc_add_tree_root (parse_roots, sizeof (parse_roots) / sizeof(tree));
 
-  ggc_add_root (&current_jcf, 1, sizeof (JCF), (void (*)(void *))ggc_mark_jcf);
+  ggc_add_root (&current_jcf, 1, sizeof (JCF), ggc_mark_jcf);
 }
Index: jvgenmain.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/jvgenmain.c,v
retrieving revision 1.20
diff -u -p -r1.20 jvgenmain.c
--- jvgenmain.c	2001/02/02 00:15:43	1.20
+++ jvgenmain.c	2001/02/04 20:21:18
@@ -34,7 +34,8 @@ The Free Software Foundation is independ
 
 static char * do_mangle_classname PARAMS ((const char *string));
 
-struct obstack name_obstack;
+struct obstack  name_obstack;
+struct obstack *mangle_obstack = &name_obstack;
 
 void
 gcc_obstack_init (obstack)
@@ -92,7 +93,7 @@ main (int argc, const char **argv)
 
   classname = argv[i];
 
-  gcc_obstack_init (&name_obstack);
+  gcc_obstack_init (mangle_obstack);
   mangled_classname = do_mangle_classname (classname);
 
   if (i < argc - 1 && strcmp (argv[i + 1], "-") != 0)
@@ -150,30 +151,22 @@ static char *
 do_mangle_classname (string)
      const char *string;
 {
-  char *ptr;
+  const char *ptr;
   int count = 0;
 
-#define MANGLE_NAME()						\
-  {								\
-    char buffer [128];						\
-    sprintf (buffer, "%d", count);				\
-    obstack_grow (&name_obstack, buffer, strlen (buffer));	\
-    obstack_grow (&name_obstack, & ptr [-count], count);	\
-    count = 0;							\
-  }
-
   obstack_grow (&name_obstack, "_ZN", 3);
 
-  for (ptr = (char *)string; *ptr; ptr++ )
+  for (ptr = string; *ptr; ptr++ )
     {
       if (ptr[0] == '.')
 	{
-	  MANGLE_NAME ();
+	  append_gpp_mangled_name (&ptr [-count], count);
+	  count = 0;
 	}
       else
 	count++;
     }
-  MANGLE_NAME ();
-  obstack_grow0 (&name_obstack, "6class$E", 8);
-  return obstack_finish (&name_obstack);
+  append_gpp_mangled_name (&ptr [-count], count);
+  obstack_grow (mangle_obstack, "6class$E", 8);
+  return obstack_finish (mangle_obstack);
 }
Index: mangle.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/mangle.c,v
retrieving revision 1.12
diff -u -p -r1.12 mangle.c
--- mangle.c	2001/01/25 22:25:21	1.12
+++ mangle.c	2001/02/04 20:21:20
@@ -56,10 +56,7 @@ static void init_mangling PARAMS ((struc
 static tree finish_mangling PARAMS ((void));
 static void compression_table_add PARAMS ((tree));
 
-static void append_unicode_mangled_name PARAMS ((const char *, int));
-static void append_gpp_mangled_name PARAMS ((const char *, int));
-static int  unicode_mangling_length PARAMS ((const char *, int));
-static int  mangle_member_name PARAMS ((tree));
+static void mangle_member_name PARAMS ((tree));
 
 /* We use an incoming obstack, always to be provided to the interface
    functions. */
@@ -123,19 +120,14 @@ static void
 mangle_field_decl (decl)
      tree decl;
 {
-  tree name = DECL_NAME (decl);
-  int field_name_needs_escapes = 0;
-
   /* Mangle the name of the this the field belongs to */
   mangle_record_type (DECL_CONTEXT (decl), /* from_pointer = */ 0);
   
   /* Mangle the name of the field */
-  field_name_needs_escapes = mangle_member_name (name);
+  mangle_member_name (DECL_NAME (decl));
 
   /* Terminate the mangled name */
   obstack_1grow (mangle_obstack, 'E');
-  if (field_name_needs_escapes)
-    obstack_1grow (mangle_obstack, 'U');
 }
 
 /* This mangles a method decl, first mangling its name and then all
@@ -147,7 +139,6 @@ mangle_method_decl (mdecl)
 {
   tree method_name = DECL_NAME (mdecl);
   tree arglist;
-  int method_name_needs_escapes = 0;
 
   /* Mangle the name of the type that contains mdecl */
   mangle_record_type (DECL_CONTEXT (mdecl), /* from_pointer = */ 0);
@@ -167,7 +158,7 @@ mangle_method_decl (mdecl)
 	obstack_grow (mangle_obstack, "C1", 2);
     }
   else
-    method_name_needs_escapes = mangle_member_name (method_name);
+    mangle_member_name (method_name);
   obstack_1grow (mangle_obstack, 'E');
 
   /* We mangled type.methodName. Now onto the arguments. */
@@ -184,32 +175,19 @@ mangle_method_decl (mdecl)
       for (arg = arglist; arg != end_params_node;  arg = TREE_CHAIN (arg))
 	mangle_type (TREE_VALUE (arg));
     }
-
-  /* Terminate the mangled name */
-  if (method_name_needs_escapes)
-    obstack_1grow (mangle_obstack, 'U');
 }
 
 /* This mangles a member name, like a function name or a field
    name. Handle cases were `name' is a C++ keyword.  Return a non zero
    value if unicode encoding was required.  */
 
-static int
+static void
 mangle_member_name (name)
      tree name;
 {
-  const char * name_string = IDENTIFIER_POINTER (name);
-  int len = IDENTIFIER_LENGTH (name);
-  int to_return = 0;
+  append_gpp_mangled_name (IDENTIFIER_POINTER (name),
+			   IDENTIFIER_LENGTH (name));
 
-  if (unicode_mangling_length (name_string, len) > 0)
-    {
-      append_unicode_mangled_name (name_string, len);
-      to_return = 1;
-    }
-  else
-    append_gpp_mangled_name (name_string, len);
-
   /* If NAME happens to be a C++ keyword, add `$' or `.' or `_'. */
   if (cxx_keyword_p (IDENTIFIER_POINTER (name), IDENTIFIER_LENGTH (name)))
     {
@@ -222,102 +200,6 @@ mangle_member_name (name)
       obstack_1grow (mangle_obstack, '_');
 #endif /* NO_DOT_IN_LABEL */
 #endif /* NO_DOLLAR_IN_LABEL */
-    }
-
-  return to_return;
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate
-   the length of the string as mangled (a la g++) including Unicode escapes.
-   If no escapes are needed, return 0. */
-
-static int
-unicode_mangling_length (name, len)
-     const char *name; 
-     int len; 
-{
-  const unsigned char *ptr;
-  const unsigned char *limit = (const unsigned char *)name + len;
-  int need_escapes = 0;
-  int num_chars = 0;
-  int underscores = 0;
-  for (ptr = (const unsigned char *) name;  ptr < limit;  )
-    {
-      int ch = UTF8_GET(ptr, limit);
-      if (ch < 0)
-	error ("internal error - invalid Utf8 name");
-      if (ch >= '0' && ch <= '9')
-	need_escapes += num_chars == 0;
-      else if (ch == '_')
-	underscores++;
-      else if (ch != '$' && (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
-	need_escapes++;
-      num_chars++;
-    }
-  if (need_escapes)
-    return num_chars + 4 * (need_escapes + underscores);
-  else
-    return 0;
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
-   appropriately mangled (with Unicode escapes) to OBSTACK. */
-
-static void
-append_unicode_mangled_name (name, len)
-     const char *name;
-     int len;
-{
-  const unsigned char *ptr;
-  const unsigned char *limit = (const unsigned char *)name + len;
-  for (ptr = (const unsigned char *) name;  ptr < limit;  )
-    {
-      int ch = UTF8_GET(ptr, limit);
-      int emit_escape;
-      if (ch < 0)
-	{
-	  error ("internal error - bad Utf8 string");
-	  break;
-	}
-      if (ch >= '0' && ch <= '9')
-	emit_escape = (ptr == (const unsigned char *) name);
-      else
-	emit_escape = (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z');
-      if (emit_escape)
-	{
-	  char buf[6];
-	  sprintf (buf, "_%04x", ch);
-	  obstack_grow (mangle_obstack, buf, 5);
-	}
-      else
-	{
-	  obstack_1grow (mangle_obstack, ch);
-	}
-    }
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
-   appropriately mangled (with Unicode escapes if needed) to OBSTACK. */
-
-static void
-append_gpp_mangled_name (name, len)
-     const char *name;
-     int len;
-{
-  int encoded_len = unicode_mangling_length (name, len);
-  int needs_escapes = encoded_len > 0;
-  char buf[6];
-  if (needs_escapes)
-    {
-      sprintf (buf, "U%d", encoded_len);
-      obstack_grow (mangle_obstack, buf, strlen(buf));
-      append_unicode_mangled_name (name, len);
-    }
-  else
-    {
-      sprintf (buf, "%d", len);
-      obstack_grow (mangle_obstack, buf, strlen(buf));
-      obstack_grow (mangle_obstack, name, len);
     }
 }
 
Index: mangle_name.c
===================================================================
RCS file: mangle_name.c
diff -N mangle_name.c
--- /dev/null	Tue May  5 13:32:27 1998
+++ mangle_name.c	Sun Feb  4 12:21:20 2001
@@ -0,0 +1,221 @@
+/* Shared functions related to mangling class names for the GNU compiler
+   for the Java(TM) language.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. 
+
+Java and all Java-based marks are trademarks or registered trademarks
+of Sun Microsystems, Inc. in the United States and other countries.
+The Free Software Foundation is independent of Sun Microsystems, Inc.  */
+
+/* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
+
+#include "config.h"
+#include "system.h"
+#include "jcf.h"
+#include "tree.h"
+#include "java-tree.h"
+#include "obstack.h"
+#include "toplev.h"
+#include "obstack.h"
+
+static void append_unicode_mangled_name PARAMS ((const char *, int));
+#ifndef HAVE_AS_UTF8
+static int  unicode_mangling_length PARAMS ((const char *, int));
+#endif
+
+extern struct obstack *mangle_obstack;
+
+/* If the assembler doesn't support UTF8 in symbol names, some
+   characters might need to be escaped.  */
+
+#ifndef HAVE_AS_UTF8
+
+/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
+   appropriately mangled (with Unicode escapes if needed) to
+   MANGLE_OBSTACK.  */
+
+void
+append_gpp_mangled_name (name, len)
+     const char *name;
+     int len;
+{
+  int encoded_len = unicode_mangling_length (name, len);
+  int needs_escapes = encoded_len > 0;
+  char buf[6];
+
+  sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
+  obstack_grow (mangle_obstack, buf, strlen (buf));
+
+  if (needs_escapes)
+    append_unicode_mangled_name (name, len);
+  else
+    obstack_grow (mangle_obstack, name, len);
+}
+
+/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
+   appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
+   Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
+   which case `__U' will be mangled `__U55_'. `$' is mangled `$' or
+   __U24_ according to NO_DOLLAR_IN_LABEL.  */
+
+static void
+append_unicode_mangled_name (name, len)
+     const char *name;
+     int len;
+{
+  const unsigned char *ptr;
+  const unsigned char *limit = (const unsigned char *)name + len;
+  int uuU = 0;
+  for (ptr = (const unsigned char *) name;  ptr < limit;  )
+    {
+      int ch = UTF8_GET(ptr, limit);
+
+      if ((ch >= '0' && ch <= '9')
+#ifndef NO_DOLLAR_IN_LABEL
+	  || ch == '$'
+#endif
+	  || (ch >= 'a' && ch <= 'z')
+	  || (ch >= 'A' && ch <= 'Z' && ch != 'U'))
+	obstack_1grow (mangle_obstack, ch);
+      /* Everything else needs encoding */
+      else
+	{
+	  char buf [9];
+	  if (ch == '_' || ch == 'U')
+	    {
+	      /* Prepare to recognize __U */
+	      if (ch == '_' && (uuU < 3))
+		{
+		  uuU++;
+		  obstack_1grow (mangle_obstack, ch);
+		}
+	      /* We recognize __U that we wish to encode
+                 __U55_. Finish the encoding. */
+	      else if (ch == 'U' && (uuU == 2))
+		{
+		  uuU = 0;
+		  obstack_grow (mangle_obstack, "U55_", 4);
+		}
+	      continue;
+	    }
+	  sprintf (buf, "__U%x_", ch);
+	  obstack_grow (mangle_obstack, buf, strlen (buf));
+	  uuU = 0;
+	}
+    }
+}
+
+/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
+   length of the string as mangled (a la g++) including Unicode
+   escapes.  If no escapes are needed, return 0.  */
+
+static int
+unicode_mangling_length (name, len)
+     const char *name; 
+     int len; 
+{
+  const unsigned char *ptr;
+  const unsigned char *limit = (const unsigned char *)name + len;
+  int need_escapes = 0;		/* Whether we need an escape or not */
+  int num_chars = 0;		/* Number of characters in the mangled name */
+  int uuU = 0;			/* Help us to find __U. 0: '_', 1: '__' */
+  for (ptr = (const unsigned char *) name;  ptr < limit;  )
+    {
+      int ch = UTF8_GET(ptr, limit);
+
+      if (ch < 0)
+	error ("internal error - invalid Utf8 name");
+      if ((ch >= '0' && ch <= '9')
+#ifndef NO_DOLLAR_IN_LABEL
+	  || ch == '$'
+#endif
+	  || (ch >= 'a' && ch <= 'z')
+	  || (ch >= 'A' && ch <= 'Z' && ch != 'U'))
+	num_chars++;
+      /* Everything else needs encoding */
+      else
+	{
+	  int encoding_length = 2;
+
+	  if (ch == '_' || ch == 'U')
+	    {
+	      /* Prepare to recognize __U */
+	      if (ch == '_' && (uuU < 3))
+		{
+		  num_chars++;
+		  uuU++;
+		}
+	      /* We recognize __U that we wish to encode __U55_ */
+	      else if (ch == 'U' && (uuU == 2))
+		{
+		  num_chars += 4;
+		  need_escapes = 1;
+		  uuU = 0;
+		}
+	      continue;
+	    }
+	  
+	  if (ch > 0xff)
+	    encoding_length++;
+	  if (ch > 0xfff)
+	    encoding_length++;
+	  
+	  num_chars += (4 + encoding_length);
+	  need_escapes = 1;
+	  uuU = 0;
+	}
+    }
+  if (need_escapes)
+    return num_chars;
+  else
+    return 0;
+}
+
+#else
+
+/* The assembler supports UTF8, we don't use escapes. Mangling is
+   simply <N>NAME. <N> is the number of UTF8 encoded characters that
+   are found in NAME.  */
+
+void
+append_gpp_mangled_name (name, len)
+     const char *name;
+     int len;
+{
+  const unsigned char *ptr;
+  const unsigned char *limit = (const unsigned char *)name + len;
+  int encoded_len;
+  char buf [6];
+  
+  /* Compute the length of the string we wish to mangle. */
+  for (encoded_len =  0, ptr = (const unsigned char *) name;
+       ptr < limit; encoded_len++)
+    {
+      int ch = UTF8_GET(ptr, limit);
+
+      if (ch < 0)
+	error ("internal error - invalid Utf8 name");
+    }
+
+  sprintf (buf, "%d", encoded_len);
+  obstack_grow (mangle_obstack, buf, strlen (buf));
+  obstack_grow (mangle_obstack, name, len);
+}
+
+#endif /* HAVE_AS_UTF8 */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]