[PATCH][LTO] Introduce alias types

Richard Guenther rguenther@suse.de
Tue Sep 7 16:12:00 GMT 2010


This makes TBAA across languages well-defined by using a canonical
type (aka alias type) for computing alias sets.  The alias types
are subject to more lax canonicalization to for example treat
struct { int; } and int the same (for C - Fortran interoperability)
and generally perform a structural equivalence comparison.

Apart from implementation details on what types form the equivalence
sets (and thus alias-sets) this separates TBAA handling (which
for LTO relies on types as we do not and cannot stream alias-sets)
from type merging and gimple canonical types used for IL type
compatibility.  This allows adjustments to either part without
affecting the other (which is good).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I've been sitting on this kind of patch for quite a while now
(waiting for a real problem to show up with the way we currently
handle things ... but I guess cross-language LTO isn't that common
for now).  Thus, regardless ok for trunk?

Thanks,
Richard.

2010-09-07  Richard Guenther  <rguenther@suse.de>

	* gimple.c: Include langhooks.h.
	(gimple_get_alias_set): Use gimple_register_alias_type to
	canonicalize types.
	(gimple_alias_types): New global hashtable.
	(alias_type_hash_cache): Likewise.
	(alias_gtc_visited): Likewise.
	(alias_gtc_ob): New global obstack.
	(single_aggr_field): New function.
	(gimple_alias_types_compatible_p): Likewise.
	(iterative_hash_alias_type): Likewise.
	(gimple_alias_type_hash): Likewise.
	(gimple_alias_type_eq): Likewise.
	(gimple_register_alias_type): Likewise.
	* gimple.h (gimple_register_alias_type): Declare.
	* alias.c (get_alias_set): Ask the frontend last for types.

Index: gcc/gimple.c
===================================================================
*** gcc/gimple.c.orig	2010-09-07 15:17:59.000000000 +0200
--- gcc/gimple.c	2010-09-07 15:27:23.000000000 +0200
*************** along with GCC; see the file COPYING3.
*** 36,41 ****
--- 36,42 ----
  #include "flags.h"
  #include "alias.h"
  #include "demangle.h"
+ #include "langhooks.h"  /* For type_for_mode.  */
  
  /* Global type table.  FIXME lto, it should be possible to re-use some
     of the type hashing routines in tree.c (type_hash_canon, type_hash_lookup,
*************** struct type_pair_d
*** 3168,3174 ****
  {
    unsigned int uid1;
    unsigned int uid2;
!   signed char same_p[2];
  };
  typedef struct type_pair_d *type_pair_t;
  
--- 3169,3175 ----
  {
    unsigned int uid1;
    unsigned int uid2;
!   signed char same_p[3];
  };
  typedef struct type_pair_d *type_pair_t;
  
*************** lookup_type_pair (tree t1, tree t2, htab
*** 3227,3232 ****
--- 3228,3234 ----
        p->uid2 = TYPE_UID (t2);
        p->same_p[0] = -2;
        p->same_p[1] = -2;
+       p->same_p[2] = -2;
        *slot = (void *) p;
      }
  
*************** gimple_signed_type (tree type)
*** 4670,4676 ****
  alias_set_type
  gimple_get_alias_set (tree t)
  {
!   tree u;
  
    /* Permit type-punning when accessing a union, provided the access
       is directly through the union.  For example, this code does not
--- 4672,4678 ----
  alias_set_type
  gimple_get_alias_set (tree t)
  {
!   tree u, tem;
  
    /* Permit type-punning when accessing a union, provided the access
       is directly through the union.  For example, this code does not
*************** gimple_get_alias_set (tree t)
*** 4697,4702 ****
--- 4699,4707 ----
        || t == unsigned_char_type_node)
      return 0;
  
+   if ((tem = gimple_register_alias_type (t)) != t)
+     return get_alias_set (tem);
+ 
    /* Allow aliasing between signed and unsigned variants of the same
       type.  We treat the signed variant as canonical.  */
    if (TREE_CODE (t) == INTEGER_TYPE && TYPE_UNSIGNED (t))
*************** gimple_get_alias_set (tree t)
*** 4712,4717 ****
--- 4717,5072 ----
  }
  
  
+ static GTY((if_marked ("ggc_marked_p"), param_is (union tree_node)))
+   htab_t gimple_alias_types;
+ static GTY((if_marked ("tree_int_map_marked_p"), param_is (struct tree_int_map)))
+   htab_t alias_type_hash_cache;
+ 
+ static hashval_t gimple_alias_type_hash (const void *);
+ 
+ /* Return the single FIELD_DECL of a record or union type at offset zero
+    or NULL_TREE if there is more than one field.  */
+ static tree
+ single_aggr_field (const_tree aggr)
+ {
+   tree f, single_field = NULL_TREE;
+ 
+   gcc_assert (RECORD_OR_UNION_TYPE_P (aggr));
+ 
+   for (f = TYPE_FIELDS (aggr); f; f = TREE_CHAIN (f))
+     {
+       if (TREE_CODE (f) != FIELD_DECL)
+ 	continue;
+       if (single_field)
+ 	return NULL_TREE;
+       single_field = f;
+     }
+ 
+   if (!single_field
+       || !DECL_FIELD_OFFSET (single_field)
+       || !integer_zerop (DECL_FIELD_OFFSET (single_field))
+       || !DECL_FIELD_BIT_OFFSET (single_field)
+       || !integer_zerop (DECL_FIELD_BIT_OFFSET (single_field)))
+     return NULL_TREE;
+ 
+   return single_field;
+ }
+ 
+ /* Return 1 iff T1 and T2 are structurally identical.
+    Otherwise, return 0.  */
+ 
+ static int
+ gimple_alias_types_compatible_p (tree t1, tree t2)
+ {
+   type_pair_t p = NULL;
+   tree f;
+ 
+   /* Check first for the obvious case of pointer identity.  */
+   if (t1 == t2)
+     return 1;
+ 
+   /* Check that we have two types to compare.  */
+   if (t1 == NULL_TREE || t2 == NULL_TREE)
+     return 0;
+ 
+   /* Can't be the same type if the types don't have the same mode.  */
+   if (TYPE_MODE (t1) != TYPE_MODE (t2))
+     return 0;
+ 
+   /* Void types are always the same.  */
+   if (TREE_CODE (t1) == VOID_TYPE)
+     return 1;
+ 
+   /* Strip simple wrappers.  */
+   while (RECORD_OR_UNION_TYPE_P (t1)
+ 	 && (f = single_aggr_field (t1)))
+     t1 = TREE_TYPE (f);
+   while (RECORD_OR_UNION_TYPE_P (t2)
+ 	 && (f = single_aggr_field (t2)))
+     t2 = TREE_TYPE (f);
+ 
+   /* We compare the following by mode only.  */
+   if (INTEGRAL_TYPE_P (t1)
+       || SCALAR_FLOAT_TYPE_P (t1)
+       || FIXED_POINT_TYPE_P (t1)
+       || POINTER_TYPE_P (t1))
+     return 1;
+ 
+   /* Perform cheap tail-recursion for vector and complex types.
+      Array types are treated same.  We do not care for the array extent
+      as we never get here directly but only via other aggregates which
+      already check for matching initial offset.
+      And we explicitly allow differences in the size of a trailing array
+      to be globbed to the same alias-set entry.  */
+   if (TREE_CODE (t1) == VECTOR_TYPE
+       || TREE_CODE (t1) == COMPLEX_TYPE
+       || TREE_CODE (t1) == ARRAY_TYPE)
+     return gimple_alias_types_compatible_p (TREE_TYPE (t1), TREE_TYPE (t2));
+ 
+   /* If the hash values of t1 and t2 are different the types can't
+      possibly be the same.  This helps keeping the type-pair hashtable
+      small, only tracking comparisons for hash collisions.  */
+   if (gimple_alias_type_hash (t1) != gimple_alias_type_hash (t2))
+     return 0;
+ 
+   /* If we've visited this type pair before (in the case of aggregates
+      with self-referential types), and we made a decision, return it.  */
+   p = lookup_type_pair (t1, t2, &gtc_visited, &gtc_ob);
+   if (p->same_p[GTC_ALIAS] == 0 || p->same_p[GTC_ALIAS] == 1)
+     {
+       /* We have already decided whether T1 and T2 are the
+ 	 same, return the cached result.  */
+       return p->same_p[GTC_ALIAS] == 1;
+     }
+ 
+   gcc_assert (p->same_p[GTC_ALIAS] == -2);
+ 
+   /* Do type-specific comparisons.  */
+   switch (TREE_CODE (t1))
+     {
+     case ARRAY_TYPE:
+       if (!gimple_alias_types_compatible_p (TREE_TYPE (t1), TREE_TYPE (t2)))
+ 	goto different_types;
+       goto same_types;
+ 
+     case RECORD_TYPE:
+     case UNION_TYPE:
+     case QUAL_UNION_TYPE:
+       {
+ 	tree f1, f2;
+ 
+ 	/* For aggregate types, all the fields must be the same.  */
+ 	for (f1 = TYPE_FIELDS (t1), f2 = TYPE_FIELDS (t2);
+ 	     f1 && f2;
+ 	     f1 = TREE_CHAIN (f1), f2 = TREE_CHAIN (f2))
+ 	  {
+ 	    if (TREE_CODE (f1) != FIELD_DECL
+ 		|| TREE_CODE (f2) != FIELD_DECL)
+ 	      continue;
+ 
+ 	    /* The fields must have the same offset and type.  */
+ 	    if (!gimple_compare_field_offset (f1, f2)
+ 		|| !gimple_alias_types_compatible_p (TREE_TYPE (f1),
+ 						     TREE_TYPE (f2)))
+ 	      goto different_types;
+ 	  }
+ 
+ 	/* If one aggregate has more fields than the other, they
+ 	   are not the same.  */
+ 	if (f1 || f2)
+ 	  goto different_types;
+ 
+ 	goto same_types;
+       }
+ 
+     case METHOD_TYPE:
+     case FUNCTION_TYPE:
+       /* We shouldn't ever get here and query the alias-set of a
+ 	 function or method type.  */
+ 
+     case OFFSET_TYPE:
+       /* Or of an offset type.  */
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   /* Common exit path for types that are not compatible.  */
+ different_types:
+   p->same_p[GTC_ALIAS] = 0;
+   return 0;
+ 
+   /* Common exit path for types that are compatible.  */
+ same_types:
+   p->same_p[GTC_ALIAS] = 1;
+   return 1;
+ }
+ 
+ /* Returning a hash value for gimple type TYPE combined with VAL.  */
+ 
+ static hashval_t
+ iterative_hash_alias_type (tree type, hashval_t val)
+ {
+   hashval_t v;
+   struct tree_int_map m;
+   void **slot;
+ 
+   /* For types with a mode all we can hash is that.  Do not bother
+      to populate the hashtable with those.  */
+   if (TYPE_MODE (type) != BLKmode
+       && TYPE_MODE (type) != VOIDmode
+       && !AGGREGATE_TYPE_P (type))
+     return iterative_hash_hashval_t (TYPE_MODE (type), val);
+ 
+   /* If there is a hash value recorded for this type simply mix in its hash.  */
+   m.base.from = type;
+   if ((slot = htab_find_slot (alias_type_hash_cache, &m, INSERT))
+       && *slot)
+     return iterative_hash_hashval_t (((struct tree_int_map *) *slot)->to, val);
+ 
+   /* There are not many things we can hash for non-aggregates.  */
+   v = 0;
+   if (TREE_CODE (type) == COMPLEX_TYPE
+       || TREE_CODE (type) == VECTOR_TYPE
+       || AGGREGATE_TYPE_P (type))
+     v = iterative_hash_hashval_t (TREE_CODE (type), v);
+ 
+   /* Recurse for aggregates with a single element type.  */
+   if (TREE_CODE (type) == ARRAY_TYPE
+       || TREE_CODE (type) == COMPLEX_TYPE
+       || TREE_CODE (type) == VECTOR_TYPE)
+     v = iterative_hash_alias_type (TREE_TYPE (type), v);
+ 
+   if (TREE_CODE (type) == RECORD_TYPE
+       || TREE_CODE (type) == UNION_TYPE
+       || TREE_CODE (type) == QUAL_UNION_TYPE)
+     {
+       unsigned nf;
+       tree f;
+ 
+       for (f = TYPE_FIELDS (type), nf = 0; f; f = TREE_CHAIN (f))
+ 	{
+ 	  if (TREE_CODE (f) != FIELD_DECL)
+ 	    continue;
+ 
+ 	  /* Hash a constant field offset.
+ 	     ???  Java produces fields w/o an offset.  */
+ 	  if (DECL_FIELD_OFFSET (f)
+ 	      && TREE_CODE (DECL_FIELD_OFFSET (f)) == INTEGER_CST
+ 	      /* ???  But not for bitfields, we'd want to only hash the
+ 	         offset of the first bit - see below.  */
+ 	      && !DECL_BIT_FIELD (f))
+ 	    {
+ 	      double_int byte_off = tree_to_double_int (DECL_FIELD_OFFSET (f));
+ 	      double_int bit_off = tree_to_double_int (DECL_FIELD_BIT_OFFSET (f));
+ 	      byte_off = double_int_lshift (byte_off,
+ 					    exact_log2 (BITS_PER_UNIT),
+ 					    2 * HOST_BITS_PER_WIDE_INT, 1);
+ 	      bit_off = double_int_add (bit_off, byte_off);
+ 	      v = iterative_hash_host_wide_int (bit_off.low, v);
+ 	      v = iterative_hash_host_wide_int (bit_off.high, v);
+ 	    }
+ 
+ 	  /* ???  We should glob bit-fields into a single field
+ 	     of their underlying type, not hash them multiple
+ 	     times.  */
+ 	  if (DECL_BIT_FIELD (f))
+ 	    v = iterative_hash_alias_type (DECL_BIT_FIELD_TYPE (f), v);
+ 	  else
+ 	    {
+ 	      tree tem, field_type = TREE_TYPE (f);
+ 
+ 	      /* Strip simple wrappers.  */
+ 	      while (RECORD_OR_UNION_TYPE_P (field_type)
+ 		     && (tem = single_aggr_field (field_type)))
+ 		field_type = TREE_TYPE (tem);
+ 
+ 	      v = iterative_hash_alias_type (field_type, v);
+ 	    }
+ 
+ 	  /* ???  We should also handle unions specially, not
+ 	     requiring the same number of fields?.  */
+ 	  nf++;
+ 	}
+ 
+       v = iterative_hash_hashval_t (nf, v);
+     }
+ 
+   /* Remember the hash-value for this type.  */
+   *slot = (void *) ggc_alloc_tree_int_map ();
+   ((struct tree_int_map *)*slot)->base.from = type;
+   ((struct tree_int_map *)*slot)->to = v;
+ 
+   return iterative_hash_hashval_t (v, val);
+ }
+ 
+ /* Returns a hash value for P (assumed to be a type).  The hash value
+    is computed using some distinguishing features of the type.
+ 
+    This function should produce the same hash value for two compatible
+    types according to gimple_alias_types_compatible_p.  */
+ 
+ static hashval_t
+ gimple_alias_type_hash (const void *p)
+ {
+   const_tree t = (const_tree) p;
+   tree f;
+ 
+   /* Strip simple wrappers.  */
+   while (RECORD_OR_UNION_TYPE_P (t)
+ 	 && (f = single_aggr_field (t)))
+     t = TREE_TYPE (f);
+ 
+   if (alias_type_hash_cache == NULL)
+     alias_type_hash_cache = htab_create_ggc (512, tree_int_map_hash,
+ 					     tree_int_map_eq, NULL);
+ 
+   return iterative_hash_alias_type (CONST_CAST_TREE (t), 0);
+ }
+ 
+ 
+ /* Returns nonzero if P1 and P2 are equal.  */
+ 
+ static int
+ gimple_alias_type_eq (const void *p1, const void *p2)
+ {
+   const_tree t1 = (const_tree) p1;
+   const_tree t2 = (const_tree) p2;
+   return gimple_alias_types_compatible_p (CONST_CAST_TREE (t1),
+ 					  CONST_CAST_TREE (t2));
+ }
+ 
+ 
+ /* Register type T in the global alias type table gimple_alias_types.
+    If another type T', alias-compatible with T, already existed in
+    gimple_alias_types then return T', otherwise return T.
+    This is used by TBAA to assign structurally equivalent types
+    the same alias-set which is very important for example for
+    cross-language LTO.  */
+ 
+ tree
+ gimple_register_alias_type (tree t)
+ {
+   void **slot;
+   tree tem;
+ 
+   gcc_assert (TYPE_P (t));
+ 
+   /* Strip simple wrappers.  */
+   while (RECORD_OR_UNION_TYPE_P (t)
+ 	 && (tem = single_aggr_field (t)))
+     t = TREE_TYPE (tem);
+ 
+   if (POINTER_TYPE_P (t))
+     return ptr_type_node;
+ 
+   else if (TYPE_MODE (t) != BLKmode
+ 	   && TYPE_MODE (t) != VOIDmode
+ 	   && !AGGREGATE_TYPE_P (t))
+     {
+       /* ???  We should simply provide and use a middle-end
+ 	 type_for_mode here for speed and completeness reasons.  */
+       tree tem = lang_hooks.types.type_for_mode (TYPE_MODE (t), 0);
+       if (tem)
+ 	return tem;
+     }
+ 
+   if (gimple_alias_types == NULL)
+     gimple_alias_types = htab_create_ggc (16381, gimple_alias_type_hash,
+ 					  gimple_alias_type_eq, 0);
+ 
+   slot = htab_find_slot (gimple_alias_types, t, INSERT);
+   if (*slot
+       && *(tree *)slot != t)
+     t = (tree) *((tree *) slot);
+   else
+     *slot = (void *) t;
+ 
+   return t;
+ }
+ 
+ 
+ 
  /* Data structure used to count the number of dereferences to PTR
     inside an expression.  */
  struct count_ptr_d
Index: gcc/gimple.h
===================================================================
*** gcc/gimple.h.orig	2010-09-07 15:17:59.000000000 +0200
--- gcc/gimple.h	2010-09-07 15:18:15.000000000 +0200
*************** extern tree get_call_expr_in (tree t);
*** 957,964 ****
  extern void recalculate_side_effects (tree);
  extern bool gimple_compare_field_offset (tree, tree);
  extern tree gimple_register_type (tree);
! enum gtc_mode { GTC_MERGE = 0, GTC_DIAG = 1 };
  extern bool gimple_types_compatible_p (tree, tree, enum gtc_mode);
  extern void print_gimple_types_stats (void);
  extern void free_gimple_type_tables (void);
  extern tree gimple_unsigned_type (tree);
--- 957,965 ----
  extern void recalculate_side_effects (tree);
  extern bool gimple_compare_field_offset (tree, tree);
  extern tree gimple_register_type (tree);
! enum gtc_mode { GTC_MERGE = 0, GTC_DIAG = 1, GTC_ALIAS = 2 };
  extern bool gimple_types_compatible_p (tree, tree, enum gtc_mode);
+ extern tree gimple_register_alias_type (tree);
  extern void print_gimple_types_stats (void);
  extern void free_gimple_type_tables (void);
  extern tree gimple_unsigned_type (tree);
Index: gcc/alias.c
===================================================================
*** gcc/alias.c.orig	2010-09-07 15:17:59.000000000 +0200
--- gcc/alias.c	2010-09-07 15:29:36.000000000 +0200
*************** get_alias_set (tree t)
*** 732,746 ****
        return 0;
      }
  
-   /* See if the language has special handling for this type.  */
-   set = lang_hooks.get_alias_set (t);
-   if (set != -1)
-     return set;
- 
    /* There are no objects of FUNCTION_TYPE, so there's no point in
       using up an alias set for them.  (There are, of course, pointers
       and references to functions, but that's different.)  */
!   else if (TREE_CODE (t) == FUNCTION_TYPE || TREE_CODE (t) == METHOD_TYPE)
      set = 0;
  
    /* Unless the language specifies otherwise, let vector types alias
--- 732,741 ----
        return 0;
      }
  
    /* There are no objects of FUNCTION_TYPE, so there's no point in
       using up an alias set for them.  (There are, of course, pointers
       and references to functions, but that's different.)  */
!   if (TREE_CODE (t) == FUNCTION_TYPE || TREE_CODE (t) == METHOD_TYPE)
      set = 0;
  
    /* Unless the language specifies otherwise, let vector types alias
*************** get_alias_set (tree t)
*** 817,822 ****
--- 812,821 ----
  	   && t != ptr_type_node)
      return get_alias_set (ptr_type_node);
  
+   /* See if the language has special handling for this type.  */
+   else if ((set = lang_hooks.get_alias_set (t)) != -1)
+     return set;
+ 
    /* Otherwise make a new alias set for this type.  */
    else
      set = new_alias_set ();



More information about the Gcc-patches mailing list