This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 4/4] Make total scalarization also copy padding (PR 92486)


On Tue, 17 Dec 2019, Martin Jambor wrote:

> Hi,
> 
> PR 92486 shows that DSE, when seeing a "normal" gimple aggregate
> assignment coming from a C struct assignment and one a representing a
> folded memcpy, can kill the latter and keep in place only the former,
> which does not copy padding - at least when SRA decides to totally
> scalarize a least one of the aggregates (when not doing total
> scalarization, SRA cares about padding)
> 
> Mind you, SRA would not totally scalarize an aggregate if it saw that
> it takes part in a gimple assignment which is a folded memcpy (see how
> type_changing_p is set in contains_vce_or_bfcref_p) but it doesn't
> because of the DSE decisions.
> 
> I was asked to modify SRA to take padding into account - and to copy
> it around - when totally scalarizing, which is what the patch below
> does.
> 
> I believe the patch is correct in the sense that it will not cause
> miscompilations but after I have seen inlining propagate the useless
> (and small and ugly and certainly damaging) accesses far and wide, I
> am more convinced that before that this is not the correct approach
> and DSE should simple be able to discern between the two assignment
> too - and that the semantics of a "normal" gimple assignments should
> not include copying of padding.
> 
> But if the decision will be to make gimple aggregate always a solid
> block copy, the patch can do it, and has passed bootstrap and testing
> on x86_64-linux and a very similar one on aarch64-linux and
> i686-linux.  I suppose that at least the way how it figures out the
> type for copying will need change, but even then I'd rather not commit
> it.

I think both GENERIC and GIMPLE always had assignments being
block-copies.  I know we've changed memcpy folding to be more
explicit recently to avoid this very same issue but clearly
having a GIMPLE stmt like

 *p = *q;

decomposing into loads/stores of multiple discontiguous memory
ranges looks very wrong and would be quite awkward to correctly
represent throughout the compiler (and the alias infrastructure).

So even if the C standard says for aggregates the elements are
copied and contents of padding is unspecified the actual GIMPLE
primitive should always copy everything unless we can prove the
padding is not used.  The frontends could always choose to
make not copying the padding explicit (but usually copying it
_is_ more efficient unless you add artificial very large one).

>From an SRA analysis perspective I wonder if you can produce a
fix that doesn't depend on the earlier patches in this series?
It might be as "simple" as, in completely_scalarize, for
FIELD_DECLs with following padding to artificially enlarge
the field (I wonder if we can do w/o the 'ref' arg to
scalarize_elem for that - we'd build a MEM_REF then?).

Thanks,
Richard.

> Thanks,
> 
> Martin
> 
> 2019-12-13  Martin Jambor  <mjambor@suse.cz>
> 
> 	PR tree-optimization/92486
> 	* tree-sra.c: Include langhooks.h.
> 	(total_scalarization_fill_padding): New function.
> 	(total_skip_all_accesses_until_pos): Also create accesses for padding.
> 	(total_should_skip_creating_access): Pass new parameters to
> 	total_skip_all_accesses_until_pos, update how much area is already
> 	covered in cases of success.
> 	(totally_scalarize_subtree): Track how much of an aggregate is
> 	covered, create accesses for trailing padding.
> ---
>  gcc/tree-sra.c | 102 ++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 92 insertions(+), 10 deletions(-)
> 
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 9f087e5c27a..753bf63c33c 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -99,7 +99,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "dbgcnt.h"
>  #include "builtins.h"
>  #include "tree-sra.h"
> -
> +#include "langhooks.h"
>  
>  /* Enumeration of all aggregate reductions we can do.  */
>  enum sra_mode { SRA_MODE_EARLY_IPA,   /* early call regularization */
> @@ -2983,6 +2983,54 @@ create_total_scalarization_access (struct access *parent, HOST_WIDE_INT from,
>    return access;
>  }
>  
> +/* Create accesses to cover padding within PARENT, spanning from FROM to TO,
> +   link it in between its children in between LAST_PTR and NEXT_SIBLING.  */
> +
> +static struct access **
> +total_scalarization_fill_padding (struct access *parent, HOST_WIDE_INT from,
> +				  HOST_WIDE_INT to, struct access **last_ptr,
> +				  struct access *next_sibling)
> +{
> +  do
> +    {
> +      /* Diff cannot be bigger than max_scalarization_size in
> +	 analyze_all_variable_accesses.  */
> +      HOST_WIDE_INT diff = to - from;
> +      gcc_assert (diff >= BITS_PER_UNIT);
> +      HOST_WIDE_INT stsz = 1 << floor_log2 (diff);
> +      tree type;
> +      scalar_int_mode mode;
> +
> +      while (true)
> +	{
> +	  type = lang_hooks.types.type_for_size (stsz, 1);
> +	  if (type
> +	      && is_a <scalar_int_mode> (TYPE_MODE (type), &mode)
> +	      && GET_MODE_BITSIZE (mode) == stsz)
> +	    break;
> +	  stsz /= 2;
> +	  gcc_checking_assert (stsz >= BITS_PER_UNIT);
> +	}
> +
> +      do {
> +	tree expr = build_ref_for_offset (UNKNOWN_LOCATION, parent->base,
> +					  from, parent->reverse, type, NULL,
> +					  false);
> +	struct access *access
> +	  = create_total_scalarization_access (parent, from, stsz, type,
> +					       expr, last_ptr, next_sibling);
> +	access->grp_no_warning = 1;
> +	last_ptr = &access->next_sibling;
> +
> +	from += stsz;
> +      }
> +      while (to - from >= stsz);
> +      gcc_assert (from <= to);
> +    }
> +  while (from < to);
> +  return last_ptr;
> +}
> +
>  static bool totally_scalarize_subtree (struct access *root);
>  
>  /* Move **LAST_PTR along the chain of siblings until it points to an access
> @@ -2991,16 +3039,35 @@ static bool totally_scalarize_subtree (struct access *root);
>     false.  */
>  
>  static bool
> -total_skip_all_accesses_until_pos (HOST_WIDE_INT pos, struct access ***last_ptr)
> +total_skip_all_accesses_until_pos (struct access *root, HOST_WIDE_INT pos,
> +				   HOST_WIDE_INT *covered,
> +				   struct access ***last_ptr)
>  {
>    struct access *next_child = **last_ptr;
>    while (next_child && next_child->offset < pos)
>      {
>        if (next_child->offset + next_child->size > pos)
>  	return true;
> +
> +      if (*covered < next_child->offset)
> +	{
> +	  *last_ptr = total_scalarization_fill_padding (root, *covered,
> +							next_child->offset,
> +							*last_ptr, next_child);
> +	  gcc_assert (next_child == **last_ptr);
> +	}
> +      *covered = next_child->offset + next_child->size;
>        *last_ptr = &next_child->next_sibling;
>        next_child = **last_ptr;
>      }
> +
> +  if (*covered < pos)
> +    {
> +      *last_ptr = total_scalarization_fill_padding (root, *covered, pos,
> +						   *last_ptr, next_child);
> +      *covered = pos;
> +    }
> +
>    return false;
>  }
>  
> @@ -3112,18 +3179,22 @@ total_skip_children_over_scalar_type (HOST_WIDE_INT pos, HOST_WIDE_INT size,
>     create an artificial access for the type at this position.  */
>  
>  static bool
> -total_should_skip_creating_access (tree type, HOST_WIDE_INT pos,
> -				   HOST_WIDE_INT size,
> +total_should_skip_creating_access (struct access *root, tree type,
> +				   HOST_WIDE_INT pos, HOST_WIDE_INT size,
> +				   HOST_WIDE_INT *covered,
>  				   struct access ***last_ptr)
>  {
> -  if (total_skip_all_accesses_until_pos (pos, last_ptr))
> +  if (total_skip_all_accesses_until_pos (root, pos, covered, last_ptr))
>      {
>        *last_ptr = NULL;
>        return false;
>      }
>  
>    if (total_handle_child_matching_pos_size (pos, size, type, last_ptr))
> -    return true;
> +    {
> +      *covered = pos + size;
> +      return true;
> +    }
>    if (!*last_ptr)
>      return false;
>  
> @@ -3138,7 +3209,10 @@ total_should_skip_creating_access (tree type, HOST_WIDE_INT pos,
>  
>    if (is_gimple_reg_type (type)
>        && total_skip_children_over_scalar_type (pos, size, last_ptr))
> -    return true;
> +    {
> +      *covered = pos + size;
> +      return true;
> +    }
>  
>    return false;
>  }
> @@ -3166,6 +3240,7 @@ totally_scalarize_subtree (struct access *root)
>    gcc_checking_assert (!is_gimple_reg_type (root->type));
>  
>   struct access **last_ptr = &root->first_child;
> + HOST_WIDE_INT covered = root->offset;
>  
>    switch (TREE_CODE (root->type))
>      {
> @@ -3182,7 +3257,8 @@ totally_scalarize_subtree (struct access *root)
>  
>  	    HOST_WIDE_INT pos = root->offset + int_bit_position (fld);
>  
> -	    if (total_should_skip_creating_access (ft, pos, fsize, &last_ptr))
> +	    if (total_should_skip_creating_access (root, ft, pos, fsize,
> +						   &covered, &last_ptr))
>  	      continue;
>  	    if (!last_ptr)
>  	      return false;
> @@ -3204,6 +3280,7 @@ totally_scalarize_subtree (struct access *root)
>  	    if (!is_gimple_reg_type (ft)
>  		&& !totally_scalarize_subtree (new_child))
>  	      return false;
> +	    covered = pos + fsize;
>  	  }
>        break;
>      case ARRAY_TYPE:
> @@ -3236,8 +3313,8 @@ totally_scalarize_subtree (struct access *root)
>  	     pos += el_size, ++idx)
>  	  {
>  	    gcc_checking_assert (last_ptr);
> -	    if (total_should_skip_creating_access (elemtype, pos, el_size,
> -						   &last_ptr))
> +	    if (total_should_skip_creating_access (root, elemtype, pos, el_size,
> +						   &covered, &last_ptr))
>  	      continue;
>  	    if (!last_ptr)
>  	      return false;
> @@ -3261,6 +3338,7 @@ totally_scalarize_subtree (struct access *root)
>  	      return false;
>  
>  	    last_ptr = &new_child->next_sibling;
> +	    covered = pos + el_size;
>  	  }
>        }
>        break;
> @@ -3269,6 +3347,10 @@ totally_scalarize_subtree (struct access *root)
>      }
>  
>   out:
> +  HOST_WIDE_INT limit = root->offset + root->size;
> +  if (covered < limit)
> +    total_scalarization_fill_padding (root, covered, limit, last_ptr, NULL);
> +
>    return true;
>  }
>  
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]