Use ODR for canonical types construction in LTO

Richard Biener rguenther@suse.de
Thu Jun 27 10:25:00 GMT 2019


On Thu, 27 Jun 2019, Jan Hubicka wrote:

> Hi,
> this is updated patch for ODR based canonical type calculation.  It makes use
> of TYPE_CXX_ODR_P instead of doing the guesswork and while thinking how to get
> rid of the quadratic behaviour of the hash I noticed that all the logic fits
> quite naturally into gimple_register_canonical_type_1.
> 
> We only need to arrange that all non-ODR types gets to hash before we has
> TYPE_CXX_ODR_P and since hash functions recursively populate the type hash
> we know that subtypes with linkage are processed before types.
> 
> From the WPA stats we now get relatively few non-ODR types building cc1plus
> [WPA] Compared 1062055 SCCs, 890518 collisions (0.838486)
> [WPA] Merged 1059104 SCCs
> [WPA] Merged 6025225 tree bodies
> [WPA] Merged 639655 types
> [WPA] 101515 types prevailed (177264 associated trees)
> [WPA] GIMPLE canonical type table: size 16381, 262 elements, 5685 searches, 48 collisions (ratio: 0.008443)
> [WPA] GIMPLE canonical type pointer-map: 3548 elements, 14261 searches
> 
> So 262 distinct types compared to 1590 which is an effect of Jason's patch.
> There are 3286 ODR types having type canonical set by name and 910 have
> conflict to non-ODR type.
> 
> I am still waiting for the statistics of alias oracle, but for tramp3d there
> are no significant changes compared to the first patch.
> 
> lto-bootstrapped/regtested x86_64-linux, OK?
> 
> 	* class.c (layout_class_type): Set TYPE_CXX_ODR_P for as-base
> 	type copy.
> 
> 	* ipa-devirt.c (odr_type_d): Add tbaa_enabled flag.
> 	(add_type_duplicate): When odr hash is not allocated, to nothing.
> 	(odr_based_tbaa_p): New function.
> 	(set_type_canonical_for_odr_type): New function.
> 	* ipa-utils.h (enable_odr_based_tbaa, odr_based_tbaa_p,
> 	set_type_canonical_for_odr_type): New.
> 	* lto-common.c: Include demangle.h and tree-pretty-print.h
> 	(type_streaming_finished): New static var.
> 	(gimple_register_canonical_type_1): Return updated hash; handle ODR
> 	types.
> 	(iterative_hash_canonical_type): Update use of
> 	gimple_register_canonical_type_1.
> 	* tree.c (gimple_canonical_types_compatible_p): ODR types with
> 	ODR based TBAA are not equivalent to non-ODR types.
> 
> 	* g++.dg/lto/alias-2_0.C: New testcase.
> 	* g++.dg/lto/alias-2_1.C: New testcase.
> Index: cp/class.c
> ===================================================================
> --- cp/class.c	(revision 272656)
> +++ cp/class.c	(working copy)
> @@ -6395,6 +6395,7 @@ layout_class_type (tree t, tree *virtual
>        SET_TYPE_ALIGN (base_t, rli->record_align);
>        TYPE_USER_ALIGN (base_t) = TYPE_USER_ALIGN (t);
>        TYPE_TYPELESS_STORAGE (base_t) = TYPE_TYPELESS_STORAGE (t);
> +      TYPE_CXX_ODR_P (base_t) = true;

 = TYPE_CXX_ODR_P (t);

would be much more obvious here.

>        /* Copy the non-static data members of T. This will include its
>  	 direct non-virtual bases & vtable.  */
> Index: ipa-devirt.c
> ===================================================================
> --- ipa-devirt.c	(revision 272656)
> +++ ipa-devirt.c	(working copy)
> @@ -213,6 +213,8 @@ struct GTY(()) odr_type_d
>    bool odr_violated;
>    /* Set when virtual table without RTTI previaled table with.  */
>    bool rtti_broken;
> +  /* Set when the canonical type is determined using the type name.  */
> +  bool tbaa_enabled;
>  };
>  
>  /* Return TRUE if all derived types of T are known and thus
> @@ -1610,6 +1612,9 @@ add_type_duplicate (odr_type val, tree t
>  
>    val->types_set->add (type);
>  
> +  if (!odr_hash)
> +    return NULL;
> +
>    gcc_checking_assert (can_be_name_hashed_p (type)
>  		       && can_be_name_hashed_p (val->type));
>  
> @@ -1989,6 +1994,46 @@ prevailing_odr_type (tree type)
>    return t->type;
>  }
>  
> +/* Set tbaa_enabled flag for TYPE.  */
> +
> +void
> +enable_odr_based_tbaa (tree type)
> +{
> +  odr_type t = get_odr_type (type, true);
> +  t->tbaa_enabled = true;
> +}
> +
> +/* True if canonical type of TYPE is determined using ODR name.  */
> +
> +bool
> +odr_based_tbaa_p (const_tree type)
> +{
> +  if (!RECORD_OR_UNION_TYPE_P (type))
> +    return false;
> +  odr_type t = get_odr_type (const_cast <tree> (type), false);
> +  if (!t || !t->tbaa_enabled)
> +    return false;
> +  return true;
> +}
> +
> +/* Set TYPE_CANONICAL of type and all its variants and duplicates
> +   to CANONICAL.  */
> +
> +void
> +set_type_canonical_for_odr_type (tree type, tree canonical)
> +{
> +  odr_type t = get_odr_type (type, false);
> +  unsigned int i;
> +  tree tt;
> +
> +  for (tree t2 = t->type; t2; t2 = TYPE_NEXT_VARIANT (t2))
> +    TYPE_CANONICAL (t2) = canonical;
> +  if (t->types)
> +    FOR_EACH_VEC_ELT (*t->types, i, tt)
> +      for (tree t2 = tt; t2; t2 = TYPE_NEXT_VARIANT (t2))
> +        TYPE_CANONICAL (t2) = canonical;
> +}
> +
>  /* Return true if we reported some ODR violation on TYPE.  */
>  
>  bool
> Index: ipa-utils.h
> ===================================================================
> --- ipa-utils.h	(revision 272656)
> +++ ipa-utils.h	(working copy)
> @@ -93,6 +93,9 @@ bool odr_or_derived_type_p (const_tree t
>  bool odr_types_equivalent_p (tree type1, tree type2);
>  bool odr_type_violation_reported_p (tree type);
>  tree prevailing_odr_type (tree type);
> +void enable_odr_based_tbaa (tree type);
> +bool odr_based_tbaa_p (const_tree type);
> +void set_type_canonical_for_odr_type (tree type, tree canonical);
>  
>  /* Return vector containing possible targets of polymorphic call E.
>     If COMPLETEP is non-NULL, store true if the list is complete. 
> Index: lto/lto-common.c
> ===================================================================
> --- lto/lto-common.c	(revision 272656)
> +++ lto/lto-common.c	(working copy)
> @@ -1,5 +1,5 @@
>  /* Top-level LTO routines.
> -   Copyright (C) 2009-2018 Free Software Foundation, Inc.
> +   Copyright (C) 2009-2019 Free Software Foundation, Inc.
>     Contributed by CodeSourcery, Inc.
>  
>  This file is part of GCC.
> @@ -56,6 +56,12 @@ along with GCC; see the file COPYING3.
>  #include "attribs.h"
>  #include "builtins.h"
>  #include "lto-common.h"
> +#include "tree-pretty-print.h"
> +#include "demangle.h"
> +
> +/* True when no new types are going to be streamd from the global stream.  */
> +
> +static bool type_streaming_finished = false;
>  
>  GTY(()) tree first_personality_decl;
>  
> @@ -217,9 +223,14 @@ static hash_map<const_tree, hashval_t> *
>  static unsigned long num_canonical_type_hash_entries;
>  static unsigned long num_canonical_type_hash_queries;
>  
> +/* Types postponed for registration to the canonical type table.
> +   During streaming we postpone all TYPE_CXX_ODR_P types so we can alter
> +   decide whether there is conflict with non-ODR type or not.  */
> +static GTY(()) vec<tree, va_gc> *types_to_register = NULL;
> +
>  static void iterative_hash_canonical_type (tree type, inchash::hash &hstate);
>  static hashval_t gimple_canonical_type_hash (const void *p);
> -static void gimple_register_canonical_type_1 (tree t, hashval_t hash);
> +static hashval_t gimple_register_canonical_type_1 (tree t, hashval_t hash);
>  
>  /* Returning a hash value for gimple type TYPE.
>  
> @@ -357,7 +368,7 @@ iterative_hash_canonical_type (tree type
>  	 optimal order.  To avoid quadratic behavior also register the
>  	 type here.  */
>        v = hash_canonical_type (type);
> -      gimple_register_canonical_type_1 (type, v);
> +      v = gimple_register_canonical_type_1 (type, v);
>      }
>    hstate.add_int (v);
>  }
> @@ -388,7 +399,7 @@ gimple_canonical_type_eq (const void *p1
>  
>  /* Main worker for gimple_register_canonical_type.  */
>  
> -static void
> +static hashval_t
>  gimple_register_canonical_type_1 (tree t, hashval_t hash)
>  {
>    void **slot;
> @@ -397,6 +408,77 @@ gimple_register_canonical_type_1 (tree t
>  		       && type_with_alias_set_p (t)
>  		       && canonical_type_used_p (t));
>  
> +  /* ODR types for which there is no ODR violation and we did not record
> +     structurally equivalent non-ODR type can be treated as unique by their
> +     name.
> +
> +     hash passed to gimple_register_canonical_type_1 is a structural hash
> +     that we can use to lookup structurally equivalent non-ODR type.
> +     In case we decide to treat type as unique ODR type we recompute hash based
> +     on name and let TBAA machinery know about our decision.  */
> +  if (RECORD_OR_UNION_TYPE_P (t)
> +      && odr_type_p (t) && !odr_type_violation_reported_p (t))
> +    {
> +      /* Here we rely on fact that all non-ODR types was inserted into
> +	 canonical type hash and thus we can safely detect conflicts between
> +	 ODR types and interoperable non-ODR types.  */
> +      gcc_checking_assert (type_streaming_finished
> +			   && TYPE_MAIN_VARIANT (t) == t);
> +      slot = htab_find_slot_with_hash (gimple_canonical_types, t, hash,
> +				       NO_INSERT);
> +      if (slot && !TYPE_CXX_ODR_P (*(tree *)slot))
> +	{
> +	  tree nonodr = *(tree *)slot;
> +	  if (symtab->dump_file)
> +	    {
> +	      char *name = cplus_demangle_v3
> +				 (IDENTIFIER_POINTER
> +				   (DECL_ASSEMBLER_NAME (TYPE_NAME (t))), 0);

Ugh - do we really want to demangle here?  I think not.  Surely
not without -details or with -slim.  Readers can c++filt this
easily.

> +	      fprintf (symtab->dump_file,
> +		       "ODR and non-ODR type conflict: ");
> +	      print_generic_expr (symtab->dump_file, t);
> +	      fprintf (symtab->dump_file, " and ");
> +	      print_generic_expr (symtab->dump_file, nonodr);
> +	      fprintf (symtab->dump_file, " demangled:%s\n", name);
> +	      free (name);
> +	    }
> +	  /* Set canonical for T and all other ODR equivalent duplicates
> +	     including incomplete structures.  */
> +	  set_type_canonical_for_odr_type (t, nonodr);
> +	}
> +      else
> +	{
> +	  tree prevail = prevailing_odr_type (t);
> +
> +	  if (symtab->dump_file)
> +	    {
> +	      char *name = cplus_demangle_v3
> +				 (IDENTIFIER_POINTER
> +				   (DECL_ASSEMBLER_NAME (TYPE_NAME (t))), 0);

Likewise.

> +				
> +	      fprintf (symtab->dump_file,
> +		       "New canonical ODR type: ");
> +	      print_generic_expr (symtab->dump_file, t);
> +	      fprintf (symtab->dump_file, " demangled:%s\n", name);
> +	      free (name);
> +	    }
> +	  /* Set canonical for T and all other ODR equivalent duplicates
> +	     including incomplete structures.  */
> +	  set_type_canonical_for_odr_type (t, prevail);
> +	  enable_odr_based_tbaa (t);

I suppose there never will be a set of ODR types with the same
prevailing type but some of them having a conflict with a nonodr
type and some not?

> +	  if (!type_in_anonymous_namespace_p (t))
> +	    hash = htab_hash_string (IDENTIFIER_POINTER
> +					   (DECL_ASSEMBLER_NAME
> +						   (TYPE_NAME (prevail))));
> +	  else
> +	    hash = TYPE_UID (TYPE_MAIN_VARIANT (t));
> +	  num_canonical_type_hash_entries++;
> +	  bool existed_p = canonical_type_hash_cache->put (prevail, hash);

but those hash differently?  I think you wanted to put t, not prevail
here.  And you want to use the TYPE_UID of prevail as well?

Otherwise looks good.

You can commit the C++ FE change with the adjustment in case it
fixes the reported verification ICEs.

Richard.

> +	  gcc_checking_assert (!existed_p);
> +	}
> +      return hash;
> +    }
> +
>    slot = htab_find_slot_with_hash (gimple_canonical_types, t, hash, INSERT);
>    if (*slot)
>      {
> @@ -413,6 +495,7 @@ gimple_register_canonical_type_1 (tree t
>        bool existed_p = canonical_type_hash_cache->put (t, hash);
>        gcc_assert (!existed_p);
>      }
> +  return hash;
>  }
>  
>  /* Register type T in the global type table gimple_types and set
> @@ -464,6 +547,34 @@ lto_register_canonical_types (tree node,
>      gimple_register_canonical_type (node);
>  }
>  
> +/* Finish canonical type calculation: after all units has been streamed in we
> +   can check if given ODR type structurally conflicts with a non-ODR type.  In
> +   the first case we set type canonical according to the canonical type hash.
> +   In the second case we use type names.  */
> +
> +static void
> +lto_register_canonical_types_for_odr_types ()
> +{
> +  tree t;
> +  unsigned int i;
> +
> +  if (!types_to_register)
> +    return;
> +
> +  type_streaming_finished = true;
> +
> +  /* Be sure that no types derived from ODR types was
> +     not inserted into the hash table.  */
> +  if (flag_checking)
> +    FOR_EACH_VEC_ELT (*types_to_register, i, t)
> +      gcc_assert (!TYPE_CANONICAL (t));
> +
> +  /* Register all remaining types.  */
> +  FOR_EACH_VEC_ELT (*types_to_register, i, t)
> +    if (!TYPE_CANONICAL (t))
> +      gimple_register_canonical_type (t);
> +}
> +
>  
>  /* Remember trees that contains references to declarations.  */
>  vec <tree, va_gc> *tree_with_vars;
> @@ -1657,6 +1768,7 @@ unify_scc (struct data_in *data_in, unsi
>  }
>  
>  
> +
>  /* Read all the symbols from buffer DATA, using descriptors in DECL_DATA.
>     RESOLUTIONS is the set of symbols picked by the linker (read from the
>     resolution file when the linker plugin is being used).  */
> @@ -1749,12 +1861,23 @@ lto_read_decls (struct lto_file_decl_dat
>  		  num_prevailing_types++;
>  		  lto_fixup_prevailing_type (t);
>  
> -		  /* Compute the canonical type of all types.
> +		  /* Compute the canonical type of all non-ODR types.
> +		     Delay ODR types for the end of merging process - the canonical
> +		     type for those can be computed using the (unique) name however
> +		     we want to do this only if units in other languages do not
> +		     contain structurally equivalent type.
> +
>  		     Because SCC components are streamed in random (hash) order
>  		     we may have encountered the type before while registering
>  		     type canonical of a derived type in the same SCC.  */
>  		  if (!TYPE_CANONICAL (t))
> -		    gimple_register_canonical_type (t);
> +		    {
> +		      if (!RECORD_OR_UNION_TYPE_P (t)
> +			  || !TYPE_CXX_ODR_P (t))
> +		        gimple_register_canonical_type (t);
> +		      else if (COMPLETE_TYPE_P (t))
> +			vec_safe_push (types_to_register, t);
> +		    }
>  		  if (TYPE_MAIN_VARIANT (t) == t && odr_type_p (t))
>  		    register_odr_type (t);
>  		}
> @@ -2605,6 +2728,8 @@ read_cgraph_and_symbols (unsigned nfiles
>    ggc_free(decl_data);
>    real_file_decl_data = NULL;
>  
> +  lto_register_canonical_types_for_odr_types ();
> +
>    if (resolution_file_name)
>      fclose (resolution);
>  
> Index: testsuite/g++.dg/lto/alias-2_0.C
> ===================================================================
> --- testsuite/g++.dg/lto/alias-2_0.C	(nonexistent)
> +++ testsuite/g++.dg/lto/alias-2_0.C	(working copy)
> @@ -0,0 +1,31 @@
> +/* { dg-lto-do run } */
> +/* { dg-lto-options { { -O2 -flto } } } */
> +
> +/* With LTO we consider all pointers to incomplete types to be possibly
> +   aliasing.  This makes *bptr to alias with aptr.
> +   For C++ ODR types we however can work out that they are actually
> +   different.  */
> +
> +#include <string.h>
> +
> +typedef int (*fnptr) ();
> +
> +__attribute__ ((used))
> +struct a *aptr;
> +
> +__attribute__ ((used))
> +struct b **bptr = (struct b**)&aptr;
> +extern void init ();
> +extern void inline_me_late (int);
> +
> +
> +int
> +main (int argc, char **argv)
> +{
> +  init ();
> +  aptr = 0;
> +  inline_me_late (argc);
> +  if (!__builtin_constant_p (aptr == 0))
> +    __builtin_abort ();
> +  return (size_t)aptr;
> +}
> Index: testsuite/g++.dg/lto/alias-2_1.C
> ===================================================================
> --- testsuite/g++.dg/lto/alias-2_1.C	(nonexistent)
> +++ testsuite/g++.dg/lto/alias-2_1.C	(working copy)
> @@ -0,0 +1,16 @@
> +#include <string.h>
> +struct a {int a;} a;
> +struct b {int b;} b;
> +extern struct b **bptr;
> +void
> +inline_me_late (int argc)
> +{
> +  if (argc == -1)
> +    *bptr = (struct b *)(size_t)1;
> +}
> +void
> +init()
> +{
> +  a.a=1;
> +  b.b=2;
> +}
> Index: tree.c
> ===================================================================
> --- tree.c	(revision 272656)
> +++ tree.c	(working copy)
> @@ -14103,6 +14103,7 @@ gimple_canonical_types_compatible_p (con
>  
>    gcc_assert (!trust_type_canonical
>  	      || (type_with_alias_set_p (t1) && type_with_alias_set_p (t2)));
> +
>    /* If the types have been previously registered and found equal
>       they still are.  */
>  
> @@ -14120,6 +14121,14 @@ gimple_canonical_types_compatible_p (con
>        return TYPE_CANONICAL (t1) == TYPE_CANONICAL (t2);
>      }
>  
> +  /* For types where we do ODR based TBAA the canonical type is always
> +     set correctly, so we know that types are different if their
> +     canonical types does not match.  */
> +  if (trust_type_canonical
> +      && (odr_type_p (t1) && odr_based_tbaa_p (t1))
> +	  != (odr_type_p (t2) && odr_based_tbaa_p (t2)))
> +    return false;
> +
>    /* Can't be the same type if the types don't have the same code.  */
>    enum tree_code code = tree_code_for_canonical_type_merging (TREE_CODE (t1));
>    if (code != tree_code_for_canonical_type_merging (TREE_CODE (t2)))
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG NÌrnberg)


More information about the Gcc-patches mailing list