[PATCH] Fix PR47059

Martin Jambor mjambor@suse.cz
Tue Jan 18 14:35:00 GMT 2011


Hi,

On Mon, Jan 17, 2011 at 10:17:45AM -0000, Rahul Kharche wrote:
> This fixes aggressive scalarization of aggregate members smaller than
> MOVE_MAX_PIECES and not read or written to directly, which results in
> extra loads and stores.
> 

I cannot approve anything, you need an ACK of a middle-end maintainer.
Nevertheless, I have a few comments and suggestions too.

I am a bit afraid that if this gets applied, we'll get complaints
about unnecessary stack frames being set up and a lot of unnecessary
copying of stuff because of aggregates that were not completely
scalarized away just like in PR 42585.  I admit that I do not know how
to simply reconcile the techniques to avoid the two issues or which
one is worse.  For 4.7 I'd be content to just wait and see what will
come up, if anything.  For 4.6 or even 4.5, I am not so sure.

More comments inline below:

> Bootstrapped and tested on i686-pc-linux-gnu against trunk.
> 
> Is this OK?
> 
> 
> 
> 2011-01-17  Rahul Kharche  <rahul@icerasemi.com>
> 
> 	PR tree-optimization/47059
> 	* tree-sra.c (struct access): Add new member can_merge.
> 	(dump_access): Also output flag can_merge.
> 	(analyze_merge_subtree): New function to analyze merges.
> 	(analyze_access_subtree): Call above.
> 
> 
> 	* gcc.dg/tree-ssa/pr47059.c: New.
> 
> 
> Index:
> /home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
> ===================================================================
> ---
> /home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
> +++
> /home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -fdump-tree-esra" } */
> +
> +/* Test for SRA. */
> +
> +struct struct1
> +{
> +  void *data;
> +  unsigned short f1;
> +  unsigned short f2;
> +};
> +typedef struct struct1 S1;
> +
> +struct struct2
> +{
> +  int f3;
> +  S1 f4;
> +};
> +typedef struct struct2 S2;
> +
> +extern void foo (S1 *ptr);
> +extern S2 gstruct2_var;
> +extern S1 gstruct1_var;
> +
> +int
> +main ()
> +{
> +  S2 *ps_var;
> +  S1 *pls_var = &gstruct1_var;
> +  S1 ls_var = *pls_var;
> +
> +  ps_var = &gstruct2_var;
> +  ps_var->f4 = ls_var;
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "ls_var\$f1" 0 "esra" } } */
> +/* { dg-final { scan-tree-dump-times "ls_var\$f2" 0 "esra" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> +
> Index: /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c
> ===================================================================
> --- /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c	(revision
> 168655)
> +++ /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c	(working copy)
> @@ -237,6 +237,9 @@ struct access
>    /* Set when we discover that this pointer is not safe to dereference
> in the
>       caller.  */
>    unsigned grp_not_necessarilly_dereferenced : 1;
> +
> +  /* Set if this access can be merged with an immediate neighbour */
> +  unsigned can_merge : 1;

I don't think we need this flag (see below).  Nevertheless, if we
eventually decide to keep it, please add a grp_ prefix to its name
because it describes the whole group and not an individual access.

>  };
>  
>  typedef struct access *access_p;
> @@ -374,14 +377,14 @@ dump_access (FILE *f, struct access *acc
>  	     "grp_unscalarizable_region = %d, grp_unscalarized_data =
> %d, "
>  	     "grp_partial_lhs = %d, grp_to_be_replaced = %d, "
>  	     "grp_maybe_modified = %d, "
> -	     "grp_not_necessarilly_dereferenced = %d\n",
> +	     "grp_not_necessarilly_dereferenced = %d, can_merge = %d\n",
>  	     access->grp_write, access->total_scalarization,
>  	     access->grp_read, access->grp_hint,
> access->grp_assignment_read,
>  	     access->grp_assignment_write, access->grp_covered,
>  	     access->grp_unscalarizable_region,
> access->grp_unscalarized_data,
>  	     access->grp_partial_lhs, access->grp_to_be_replaced,
>  	     access->grp_maybe_modified,
> -	     access->grp_not_necessarilly_dereferenced);
> +	     access->grp_not_necessarilly_dereferenced,
> access->can_merge);
>    else
>      fprintf (f, ", write = %d, total_scalarization = %d, "
>  	     "grp_partial_lhs = %d\n",
> @@ -1838,6 +1841,64 @@ expr_with_var_bounded_array_refs_p (tree
>    return false;
>  }
>  
> +/* Analyze sccess subtree and detect neighbouring accesses that could
> be
> +   merged and set can_merge flag accordingly. Return if all accesses in
> +   this subtree can be merged. */
> +
> +static bool
> +analyze_merge_subtree (struct access *access)
> +{
> +  struct access *child, *ac1;
> +  HOST_WIDE_INT spill, offset;
> +  HOST_WIDE_INT merge_size, num_merged;
> +  bool merge_access_tree = true;
> +  bool direct_access;
> +  const HOST_WIDE_INT max_bits = MOVE_MAX_PIECES * BITS_PER_UNIT;
> +
> +  if (!access->first_child)
> +    return access->can_merge;
> +
> +  child = access->first_child;
> +  while (child)
> +    {
> +      spill = 0;
> +      offset = child->offset;
> +      merge_size = 0;

I cannot see any difference in between spill and merge_size so I guess
the former can be replaced by the latter?

> +      num_merged = 0;
> +
> +      /* Find adjacent accesses that are less than MOVE_MAX_PIECES
> +         bytes in size. Ensure these accesses are not read or written
> +	 to directly. */
> +      ac1 = child;
> +      while (ac1 && ac1->size < max_bits
> +	     && (ac1->size + spill) <= max_bits
> +	     && offset == ac1->offset
> +	     && !(direct_access = ac1->grp_assignment_read
> +		   		  || ac1->grp_assignment_write))
> +	{
> +	  spill += ac1->size;
> +	  offset += child->size;
> +	  merge_size += ac1->size;
> +	  num_merged++;
> +	  ac1 = ac1->next_sibling;
> +	}
> +
> +      /* Merge at least two adjacent accesses which have no direct
> +         read/writes or that will result in less reads. */
> +      if (num_merged > 1
> +	  && (merge_size > (max_bits - merge_size)
> +	      || !direct_access))

if direct access is true, the while loop above is stopped immediately
and the directly accessed access will not be part of the merge group
at all.  So I don't really understand this part of the condition, can
you please explain what it is supposed to do?

> +	  for (; child != ac1; child = child->next_sibling)
> +	    child->can_merge = 1;
> +      else
> +	{
> +	  merge_access_tree = false;
> +	  child = child->next_sibling;
> +	}
> +    }
> +  return merge_access_tree;
> +}

In addition to the above, I think we don't really need an extra
can_merge flag just to say don't scalarize, we already have
grp_unscalarizable_region for it (though we might want to rename it to
grp_noscalarize_region or something like that if we also use it in this
way).

So if I can take the liberty of rewriting your code slightly, I would
remove can_merge altogether and change the above function to something
like:

/* Analyze sccess subtree and detect neighbouring sub-accesses that could be
   merged and set grp_unscalarizable_region for them.  */

static void
disallow_mergable_subaccesses (struct access *access)
{
  const HOST_WIDE_INT max_bits = MOVE_MAX_PIECES * BITS_PER_UNIT;
  struct access *child;

  child = access->first_child;
  while (child)
    {
      struct access *ac1;
      HOST_WIDE_INT offset;
      HOST_WIDE_INT merge_size;
      bool direct_access;
      int num_merged;

      offset = child->offset;
      merge_size = 0;
      num_merged = 0;

      /* Find adjacent accesses that are less than MOVE_MAX_PIECES
         bytes in size. Ensure these accesses are not read or written
	 to directly. */
      ac1 = child;
      while (ac1 && ac1->size < max_bits
	     && (ac1->size + merge_size) <= max_bits
	     && offset == ac1->offset
	     && !(direct_access = ac1->grp_assignment_read
		  || ac1->grp_assignment_write))
	{
	  offset += child->size;
	  merge_size += ac1->size;
	  num_merged++;
	  ac1 = ac1->next_sibling;
	}

      /* Merge at least two adjacent accesses which have no direct
         read/writes or that will result in less reads. */
      if (num_merged > 1
	  && (merge_size > (max_bits - merge_size)
	      || !direct_access)) /* This is still weird.  */
	  for (; child != ac1; child = child->next_sibling)
	    child->grp_unscalarizable_region = 1;
      else
	child = child->next_sibling;
    }
}

and then in analyze_access_subtree do

  if (allow_replacements)
    {
      if (expr_with_var_bounded_array_refs_p (root->expr))
	allow_replacements = false;
      else
	disallow_mergable_subaccesses (root);
    }

Can you please have a look if that works for you, amend it at you find
necessary and explain the intent of direct_access?

Thanks,

Martin



More information about the Gcc-patches mailing list