This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch, Fortran, OOP] PR 46971: [4.6 Regression] ICE on long class names


Hi Thomas,

>> A stronger argument would be the lack of uniqueness, i.e. a hashed
>> name colliding with a plain-text name, but I think this is practically
>> impossible.
>
> I think we have had that discussion before, sort of :-)

at least a similar one ...


> Could you maybe insert a couple of leading underscores into the hashed
> class name so that a user would have to do something really illegal
> (i.e. make up a class name with two leading underscores) to get a
> collision?

I think it's easier to do the opposite: Make sure the plain-text
string contains a character which cannot appear in the hash string
(which is in hex: 0-9, A-F). Let's take the underscore (a single one
is sufficient), since we already have that in most cases. I have
modified the patch to make sure that it appears in *all* cases (cf.
get_unique_type_string). There you go, the improbable is now
impossible ;)

Another small change I made in 'gfc_hash_value' is to remove the
modulus. It only saved two digits in the mod file, but it increased
the probability of a hash collision.

Ok for trunk? (I'll do another regtest just to be on the safe side.)

Cheers,
Janus
Index: gcc/fortran/class.c
===================================================================
--- gcc/fortran/class.c	(revision 168345)
+++ gcc/fortran/class.c	(working copy)
@@ -1,7 +1,8 @@
 /* Implementation of Fortran 2003 Polymorphism.
    Copyright (C) 2009, 2010
    Free Software Foundation, Inc.
-   Contributed by Paul Richard Thomas & Janus Weil
+   Contributed by Paul Richard Thomas <pault@gcc.gnu.org>
+   and Janus Weil <janus@gcc.gnu.org>
 
 This file is part of GCC.
 
@@ -116,11 +117,52 @@ get_unique_type_string (char *string, gfc_symbol *
 {  
   if (derived->module)
     sprintf (string, "%s_%s", derived->module, derived->name);
+  else if (derived->ns->proc_name)
+    sprintf (string, "%s_%s", derived->ns->proc_name->name, derived->name);
   else
-    sprintf (string, "%s_%s", derived->ns->proc_name->name, derived->name);
+    sprintf (string, "_%s", derived->name);
 }
 
 
+/* A relative of 'get_unique_type_string' which makes sure the generated
+   string will not be too long (replacing it by a hash string if needed).  */
+
+static void
+get_unique_hashed_string (char *string, gfc_symbol *derived)
+{
+  char tmp[2*GFC_MAX_SYMBOL_LEN+2];
+  get_unique_type_string (&tmp[0], derived);
+  /* If string is too long, use hash value in hex representation
+     (allow for extra decoration, cf. gfc_build_class_symbol)*/
+  if (strlen (tmp) > GFC_MAX_SYMBOL_LEN - 10)
+    {
+      int h = gfc_hash_value (derived);
+      sprintf (string, "%X", h);
+    }
+  else
+    strcpy (string, tmp);
+}
+
+
+/* Assign a hash value for a derived type. The algorithm is that of SDBM.  */
+
+unsigned int
+gfc_hash_value (gfc_symbol *sym)
+{
+  unsigned int hash = 0;
+  char c[2*(GFC_MAX_SYMBOL_LEN+1)];
+  int i, len;
+  
+  get_unique_type_string (&c[0], sym);
+  len = strlen (c);
+  
+  for (i = 0; i < len; i++)
+    hash = (hash << 6) + (hash << 16) - hash + c[i];
+
+  return hash;
+}
+
+
 /* Build a polymorphic CLASS entity, using the symbol that comes from
    build_sym. A CLASS entity is represented by an encapsulating type,
    which contains the declared type as '_data' component, plus a pointer
@@ -130,13 +172,13 @@ gfc_try
 gfc_build_class_symbol (gfc_typespec *ts, symbol_attribute *attr,
 			gfc_array_spec **as, bool delayed_vtab)
 {
-  char name[GFC_MAX_SYMBOL_LEN], tname[GFC_MAX_SYMBOL_LEN];
+  char name[GFC_MAX_SYMBOL_LEN+1], tname[GFC_MAX_SYMBOL_LEN+1];
   gfc_symbol *fclass;
   gfc_symbol *vtab;
   gfc_component *c;
 
   /* Determine the name of the encapsulating type.  */
-  get_unique_type_string (tname, ts->u.derived);
+  get_unique_hashed_string (tname, ts->u.derived);
   if ((*as) && (*as)->rank && attr->allocatable)
     sprintf (name, "__class_%s_%d_a", tname, (*as)->rank);
   else if ((*as) && (*as)->rank)
@@ -343,9 +385,9 @@ gfc_find_derived_vtab (gfc_symbol *derived)
     
   if (ns)
     {
-      char name[GFC_MAX_SYMBOL_LEN], tname[GFC_MAX_SYMBOL_LEN];
+      char name[GFC_MAX_SYMBOL_LEN+1], tname[GFC_MAX_SYMBOL_LEN+1];
       
-      get_unique_type_string (tname, derived);
+      get_unique_hashed_string (tname, derived);
       sprintf (name, "__vtab_%s", tname);
 
       /* Look for the vtab symbol in various namespaces.  */
Index: gcc/fortran/decl.c
===================================================================
--- gcc/fortran/decl.c	(revision 168345)
+++ gcc/fortran/decl.c	(working copy)
@@ -7183,46 +7183,6 @@ gfc_get_type_attr_spec (symbol_attribute *attr, ch
 }
 
 
-/* Assign a hash value for a derived type. The algorithm is that of
-   SDBM. The hashed string is '[module_name #] derived_name'.  */
-static unsigned int
-hash_value (gfc_symbol *sym)
-{
-  unsigned int hash = 0;
-  const char *c;
-  int i, len;
-
-  /* Hash of the module or procedure name.  */
-  if (sym->module != NULL)
-    c = sym->module;
-  else if (sym->ns && sym->ns->proc_name
-	     && sym->ns->proc_name->attr.flavor == FL_MODULE)
-    c = sym->ns->proc_name->name;
-  else
-    c = NULL;
-
-  if (c)
-    { 
-      len = strlen (c);
-      for (i = 0; i < len; i++, c++)
-	hash =  (hash << 6) + (hash << 16) - hash + (*c);
-
-      /* Disambiguate between 'a' in 'aa' and 'aa' in 'a'.  */ 
-      hash =  (hash << 6) + (hash << 16) - hash + '#';
-    }
-
-  /* Hash of the derived type name.  */
-  len = strlen (sym->name);
-  c = sym->name;
-  for (i = 0; i < len; i++, c++)
-    hash = (hash << 6) + (hash << 16) - hash + (*c);
-
-  /* Return the hash but take the modulus for the sake of module read,
-     even though this slightly increases the chance of collision.  */
-  return (hash % 100000000);
-}
-
-
 /* Match the beginning of a derived type declaration.  If a type name
    was the result of a function, then it is possible to have a symbol
    already to be known as a derived type yet have no components.  */
@@ -7355,7 +7315,7 @@ gfc_match_derived_decl (void)
 
   if (!sym->hash_value)
     /* Set the hash for the compound name for this type.  */
-    sym->hash_value = hash_value (sym);
+    sym->hash_value = gfc_hash_value (sym);
 
   /* Take over the ABSTRACT attribute.  */
   sym->attr.abstract = attr.abstract;
Index: gcc/fortran/gfortran.h
===================================================================
--- gcc/fortran/gfortran.h	(revision 168345)
+++ gcc/fortran/gfortran.h	(working copy)
@@ -2868,6 +2868,7 @@ void gfc_add_component_ref (gfc_expr *, const char
 #define gfc_add_size_component(e)     gfc_add_component_ref(e,"_size")
 #define gfc_add_def_init_component(e) gfc_add_component_ref(e,"_def_init")
 gfc_expr *gfc_class_null_initializer (gfc_typespec *);
+unsigned int gfc_hash_value (gfc_symbol *);
 gfc_try gfc_build_class_symbol (gfc_typespec *, symbol_attribute *,
 				gfc_array_spec **, bool);
 gfc_symbol *gfc_find_derived_vtab (gfc_symbol *);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]