[PATCH] Improve gimplification of constructors with RANGE_EXPRs (PR c++/82294, PR c++/87436)

Richard Biener rguenther@suse.de
Fri Dec 14 11:22:00 GMT 2018


On Fri, 14 Dec 2018, Jakub Jelinek wrote:

> On Fri, Dec 14, 2018 at 10:40:19AM +0100, Richard Biener wrote:
> > This looks OK to me - the only comment I have is on the two magic
> > constants (64 and 8) which are used twice in the patch.  Can you
> > either see to hoist the common condition into sth like
> > 
> >  bool prefer_loop_initializer_p = ...
> > 
> > or add #defines for those constants?  I suppose the hoisting
> > might be tricky as int_size_in_bytes can return -1 and the
> > workarounds are different in both places right now (maybe that's
> > a bug as well...).  OTOH using (unsigned)int_size_in_bytes
> > looks reasonable in general.
> 
> So like this?

Yes.

Thanks,
Richard.

> Still need to wait for the FE patch if I want to commit the testcases, those
> depend on both patches.
> I've added size32plus effective target to the larger test, as 384MB is too
> much for 16 or 20 bit address targets.
> And, I'll gather statistics on how often this makes a difference during
> gimplification during my next bootstraps/regtests.
> 
> 2018-12-14  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR c++/82294
> 	PR c++/87436
> 	* expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument.
> 	* expr.c (categorize_ctor_elements_1): Likewise.  Compute it like
> 	p_nz_elts, except don't multiply it by mult.  Adjust recursive call.
> 	Fix up COMPLEX_CST handling.
> 	(categorize_ctor_elements): Add p_unique_nz_elts argument, initialize
> 	it and pass it through to categorize_ctor_elements_1.
> 	(mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers.
> 	* gimplify.c (gimplify_init_constructor): Likewise.  Don't force
> 	ctor into readonly data section if num_unique_nonzero_elements is
> 	smaller or equal to 1/8 of num_nonzero_elements and size is >= 64
> 	bytes.
> 
> 	* g++.dg/tree-ssa/pr82294.C: New test.
> 	* g++.dg/tree-ssa/pr87436.C: New test.
> 
> --- gcc/expr.h.jj	2018-12-13 18:00:10.527301479 +0100
> +++ gcc/expr.h	2018-12-14 11:52:31.941071185 +0100
> @@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned
>  extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
>  
>  extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
> -				      HOST_WIDE_INT *, bool *);
> +				      HOST_WIDE_INT *, HOST_WIDE_INT *,
> +				      bool *);
>  
>  extern void expand_operands (tree, tree, rtx, rtx*, rtx*,
>  			     enum expand_modifier);
> --- gcc/expr.c.jj	2018-12-13 18:00:10.426303121 +0100
> +++ gcc/expr.c	2018-12-14 11:52:31.945071118 +0100
> @@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bo
>  
>  static bool
>  categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
> +			    HOST_WIDE_INT *p_unique_nz_elts,
>  			    HOST_WIDE_INT *p_init_elts, bool *p_complete)
>  {
>    unsigned HOST_WIDE_INT idx;
> -  HOST_WIDE_INT nz_elts, init_elts, num_fields;
> +  HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
>    tree value, purpose, elt_type;
>  
>    /* Whether CTOR is a valid constant initializer, in accordance with what
> @@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree c
>    bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
>  
>    nz_elts = 0;
> +  unique_nz_elts = 0;
>    init_elts = 0;
>    num_fields = 0;
>    elt_type = NULL_TREE;
> @@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree c
>  	{
>  	case CONSTRUCTOR:
>  	  {
> -	    HOST_WIDE_INT nz = 0, ic = 0;
> +	    HOST_WIDE_INT nz = 0, unz = 0, ic = 0;
>  
> -	    bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic,
> -							   p_complete);
> +	    bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &unz,
> +							   &ic, p_complete);
>  
>  	    nz_elts += mult * nz;
> +	    unique_nz_elts += unz;
>   	    init_elts += mult * ic;
>  
>  	    if (const_from_elts_p && const_p)
> @@ -5999,21 +6002,31 @@ categorize_ctor_elements_1 (const_tree c
>  	case REAL_CST:
>  	case FIXED_CST:
>  	  if (!initializer_zerop (value))
> -	    nz_elts += mult;
> +	    {
> +	      nz_elts += mult;
> +	      unique_nz_elts++;
> +	    }
>  	  init_elts += mult;
>  	  break;
>  
>  	case STRING_CST:
>  	  nz_elts += mult * TREE_STRING_LENGTH (value);
> +	  unique_nz_elts += TREE_STRING_LENGTH (value);
>  	  init_elts += mult * TREE_STRING_LENGTH (value);
>  	  break;
>  
>  	case COMPLEX_CST:
>  	  if (!initializer_zerop (TREE_REALPART (value)))
> -	    nz_elts += mult;
> +	    {
> +	      nz_elts += mult;
> +	      unique_nz_elts++;
> +	    }
>  	  if (!initializer_zerop (TREE_IMAGPART (value)))
> -	    nz_elts += mult;
> -	  init_elts += mult;
> +	    {
> +	      nz_elts += mult;
> +	      unique_nz_elts++;
> +	    }
> +	  init_elts += 2 * mult;
>  	  break;
>  
>  	case VECTOR_CST:
> @@ -6025,7 +6038,10 @@ categorize_ctor_elements_1 (const_tree c
>  	      {
>  		tree v = VECTOR_CST_ELT (value, i);
>  		if (!initializer_zerop (v))
> -		  nz_elts += mult;
> +		  {
> +		    nz_elts += mult;
> +		    unique_nz_elts++;
> +		  }
>  		init_elts += mult;
>  	      }
>  	  }
> @@ -6035,6 +6051,7 @@ categorize_ctor_elements_1 (const_tree c
>  	  {
>  	    HOST_WIDE_INT tc = count_type_elements (elt_type, false);
>  	    nz_elts += mult * tc;
> +	    unique_nz_elts += tc;
>  	    init_elts += mult * tc;
>  
>  	    if (const_from_elts_p && const_p)
> @@ -6054,6 +6071,7 @@ categorize_ctor_elements_1 (const_tree c
>      *p_complete = false;
>  
>    *p_nz_elts += nz_elts;
> +  *p_unique_nz_elts += unique_nz_elts;
>    *p_init_elts += init_elts;
>  
>    return const_p;
> @@ -6062,6 +6080,11 @@ categorize_ctor_elements_1 (const_tree c
>  /* Examine CTOR to discover:
>     * how many scalar fields are set to nonzero values,
>       and place it in *P_NZ_ELTS;
> +   * the same, but counting RANGE_EXPRs as multiplier of 1 instead of
> +     high - low + 1 (this can be useful for callers to determine ctors
> +     that could be cheaply initialized with - perhaps nested - loops
> +     compared to copied from huge read-only data),
> +     and place it in *P_UNIQUE_NZ_ELTS;
>     * how many scalar fields in total are in CTOR,
>       and place it in *P_ELT_COUNT.
>     * whether the constructor is complete -- in the sense that every
> @@ -6073,13 +6096,16 @@ categorize_ctor_elements_1 (const_tree c
>  
>  bool
>  categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
> +			  HOST_WIDE_INT *p_unique_nz_elts,
>  			  HOST_WIDE_INT *p_init_elts, bool *p_complete)
>  {
>    *p_nz_elts = 0;
> +  *p_unique_nz_elts = 0;
>    *p_init_elts = 0;
>    *p_complete = true;
>  
> -  return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete);
> +  return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts,
> +				     p_init_elts, p_complete);
>  }
>  
>  /* TYPE is initialized by a constructor with NUM_ELTS elements, the last
> @@ -6110,17 +6136,18 @@ complete_ctor_at_level_p (const_tree typ
>    return count_type_elements (type, true) == num_elts;
>  }
>  
> -/* Return 1 if EXP contains mostly (3/4)  zeros.  */
> +/* Return 1 if EXP contains mostly (3/4) zeros.  */
>  
>  static int
>  mostly_zeros_p (const_tree exp)
>  {
>    if (TREE_CODE (exp) == CONSTRUCTOR)
>      {
> -      HOST_WIDE_INT nz_elts, init_elts;
> +      HOST_WIDE_INT nz_elts, unz_elts, init_elts;
>        bool complete_p;
>  
> -      categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
> +      categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
> +				&complete_p);
>        return !complete_p || nz_elts < init_elts / 4;
>      }
>  
> @@ -6134,10 +6161,11 @@ all_zeros_p (const_tree exp)
>  {
>    if (TREE_CODE (exp) == CONSTRUCTOR)
>      {
> -      HOST_WIDE_INT nz_elts, init_elts;
> +      HOST_WIDE_INT nz_elts, unz_elts, init_elts;
>        bool complete_p;
>  
> -      categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
> +      categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
> +				&complete_p);
>        return nz_elts == 0;
>      }
>  
> --- gcc/gimplify.c.jj	2018-12-13 18:00:10.597300341 +0100
> +++ gcc/gimplify.c	2018-12-14 12:01:17.491402703 +0100
> @@ -4778,7 +4778,15 @@ gimplify_init_constructor (tree *expr_p,
>        {
>  	struct gimplify_init_ctor_preeval_data preeval_data;
>  	HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
> +	HOST_WIDE_INT num_unique_nonzero_elements;
>  	bool cleared, complete_p, valid_const_initializer;
> +	/* Use readonly data for initializers of this or smaller size
> +	   regardless of the num_nonzero_elements / num_unique_nonzero_elements
> +	   ratio.  */
> +	const HOST_WIDE_INT min_unique_size = 64;
> +	/* If num_nonzero_elements / num_unique_nonzero_elements ratio
> +	   is smaller than this, use readonly data.  */
> +	const int unique_nonzero_ratio = 8;
>  
>  	/* Aggregate types must lower constructors to initialization of
>  	   individual elements.  The exception is that a CONSTRUCTOR node
> @@ -4795,6 +4803,7 @@ gimplify_init_constructor (tree *expr_p,
>  	   can only do so if it known to be a valid constant initializer.  */
>  	valid_const_initializer
>  	  = categorize_ctor_elements (ctor, &num_nonzero_elements,
> +				      &num_unique_nonzero_elements,
>  				      &num_ctor_elements, &complete_p);
>  
>  	/* If a const aggregate variable is being initialized, then it
> @@ -4803,7 +4812,15 @@ gimplify_init_constructor (tree *expr_p,
>  	    && num_nonzero_elements > 1
>  	    && TREE_READONLY (object)
>  	    && VAR_P (object)
> -	    && (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object)))
> +	    && (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object))
> +	    /* For ctors that have many repeated nonzero elements
> +	       represented through RANGE_EXPRs, prefer initializing
> +	       those through runtime loops over copies of large amounts
> +	       of data from readonly data section.  */
> +	    && (num_unique_nonzero_elements
> +		> num_nonzero_elements / unique_nonzero_ratio
> +		|| ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)
> +		    <= (unsigned HOST_WIDE_INT) min_unique_size)))
>  	  {
>  	    if (notify_temp_creation)
>  	      return GS_ERROR;
> @@ -4896,6 +4913,13 @@ gimplify_init_constructor (tree *expr_p,
>  	       is so large as to make individual moves inefficient.  */
>  	    if (size > 0
>  		&& num_nonzero_elements > 1
> +		/* For ctors that have many repeated nonzero elements
> +		   represented through RANGE_EXPRs, prefer initializing
> +		   those through runtime loops over copies of large amounts
> +		   of data from readonly data section.  */
> +		&& (num_unique_nonzero_elements
> +		    > num_nonzero_elements / unique_nonzero_ratio
> +		    || size < min_unique_size)
>  		&& (size < num_nonzero_elements
>  		    || !can_move_by_pieces (size, align)))
>  	      {
> --- gcc/testsuite/g++.dg/tree-ssa/pr82294.C.jj	2018-12-14 11:52:31.949071052 +0100
> +++ gcc/testsuite/g++.dg/tree-ssa/pr82294.C	2018-12-14 11:52:31.949071052 +0100
> @@ -0,0 +1,13 @@
> +// PR c++/82294
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-O2 -fdump-tree-gimple" }
> +
> +// Verify we don't "optimize" the ctor as copying a 1KB .rodata
> +// object into the variable.  It is better to initialize it through
> +// a loop.
> +// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } }
> +
> +struct S { int x; explicit constexpr S (); };
> +constexpr S::S () : x{7} {}
> +struct T { S arr[256]; explicit T (); };
> +T::T () {}
> --- gcc/testsuite/g++.dg/tree-ssa/pr87436.C.jj	2018-12-14 11:52:31.949071052 +0100
> +++ gcc/testsuite/g++.dg/tree-ssa/pr87436.C	2018-12-14 11:52:31.949071052 +0100
> @@ -0,0 +1,25 @@
> +// PR c++/87436
> +// { dg-do compile { target { c++11 && size32plus } } }
> +// { dg-options "-O2 -fdump-tree-gimple" }
> +
> +// Verify we don't "optimize" the ctor as copying a 384MB .rodata
> +// object into the variable.  It is better to initialize it through
> +// two nested loops.
> +// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } }
> +
> +struct S {
> +  int a = -1;
> +  short b = 3;
> +  int x = 0;
> +  int y = 1;
> +  int z = 42;
> +  float f = 0.123f;
> +};
> +
> +struct T { S arr[4096][4096]; };
> +
> +T *
> +foo ()
> +{
> +  return new T;
> +}
> 
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)



More information about the Gcc-patches mailing list