[PATCH] Make loop invariant and store motion stronger on lp64 targets

Richard Guenther rguenther@suse.de
Wed Apr 16 12:50:00 GMT 2008


This patch teaches aff_combination_expand to look through some conversions
like those that appear from sizetype conversion on lp64 targets.  It tries
to (but fails to) address the xfail from the gcc.dg/tree-ssa/loop-33.c
testcase that has

void test4(unsigned b)
{
  unsigned i;

  /* And here.  */
  for (i = 0; i < 100; i++)
    {
      arr[b+8].X += i;
      arr[b+9].X += i;
    }
}

where get_inner_reference for the array access generates
((sizetype)(b+8)) * 8 on targets with sizetype != unsigned.  This
patch teaches aff_combination_expand to handle the case of

void test4(int b)
{ 
  unsigned i;
  
  /* And here.  */
  for (i = 0; i < 100; i++)
    {
      arr[b+8].X += i;
      arr[b+9].X += i;
    }
} 

by noting that in ((sizetype)(b+8)) * 8 overflow of b + 8 is
undefined and so the expression can be folded to
(((sizetype)b) + 8) * 8 which allows proper affine expansion.

As this is computing an expression in a wider type this cannot be
justified as a general thing fold_unary should do, so this transformation
is localized in aff_combination_expand.

As tree-affine is used more widely than just for alias disambiguation
we cannot do anything for the plain unsigned case as if b + 8 wraps
the conversion is value changing.  (I thought of testing
flag_unsafe_loop_optimization, but the code is also excercised for
non-loop expansions during TARGET_MEM_REF creation)

Still those widening conversions are now expanded (but only one
level), so some more equalities should be spotted as well.

Nevertheless, bootstrapped and tested on x86_64-unknown-linux-gnu
and tested on i686-pc-linux-gnu as well.

Zdenek, does this version look ok?

Thanks,
Richard.

2008-04-16  Richard Guenther  <rguenther@suse.de>

	* Makefile.in (tree-affine.o): Add $(FLAGS_H) dependency.
	* tree-affine.c (aff_combination_expand): Look through some
	conversions.

	* gcc.dg/tree-ssa/loop-35.c: New testcase.

Index: tree-affine.c
===================================================================
*** tree-affine.c	(revision 134344)
--- tree-affine.c	(working copy)
*************** along with GCC; see the file COPYING3.  
*** 31,36 ****
--- 31,37 ----
  #include "pointer-set.h"
  #include "tree-affine.h"
  #include "tree-gimple.h"
+ #include "flags.h"
  
  /* Extends CST as appropriate for the affine combinations COMB.  */
  
*************** aff_combination_expand (aff_tree *comb, 
*** 578,589 ****
    aff_combination_zero (&to_add, comb->type);
    for (i = 0; i < comb->n; i++)
      {
        e = comb->elts[i].val;
!       if (TREE_CODE (e) != SSA_NAME)
  	continue;
!       def = SSA_NAME_DEF_STMT (e);
        if (TREE_CODE (def) != GIMPLE_MODIFY_STMT
! 	  || GIMPLE_STMT_OPERAND (def, 0) != e)
  	continue;
  
        rhs = GIMPLE_STMT_OPERAND (def, 1);
--- 579,598 ----
    aff_combination_zero (&to_add, comb->type);
    for (i = 0; i < comb->n; i++)
      {
+       tree type, name;
        e = comb->elts[i].val;
!       type = TREE_TYPE (e);
!       name = e;
!       /* Look through some conversions.  */
!       if (TREE_CODE (e) == NOP_EXPR
!           && (TYPE_PRECISION (type)
! 	      >= TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (e, 0)))))
! 	name = TREE_OPERAND (e, 0);
!       if (TREE_CODE (name) != SSA_NAME)
  	continue;
!       def = SSA_NAME_DEF_STMT (name);
        if (TREE_CODE (def) != GIMPLE_MODIFY_STMT
! 	  || GIMPLE_STMT_OPERAND (def, 0) != name)
  	continue;
  
        rhs = GIMPLE_STMT_OPERAND (def, 1);
*************** aff_combination_expand (aff_tree *comb, 
*** 607,612 ****
--- 616,645 ----
  	  exp = XNEW (struct name_expansion);
  	  exp->in_progress = 1;
  	  *slot = exp;
+ 	  if (e != name)
+ 	    {
+ 	      /* In principle this is a generally valid folding, but
+ 		 it is not unconditionally an optimization, so do it
+ 		 here and not in fold_unary.  */
+ 	      /* Convert (T1)(X *+- CST) into (T1)X *+- (T1)CST if T1 is wider
+ 		 than the type of X and overflow for the type of X is
+ 		 undefined.  */
+ 	      if (INTEGRAL_TYPE_P (type)
+ 	          && INTEGRAL_TYPE_P (TREE_TYPE (rhs))
+ 	          && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs))
+ 	          && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (rhs))
+ 	          && (TREE_CODE (rhs) == PLUS_EXPR
+ 	              || TREE_CODE (rhs) == MINUS_EXPR
+ 	              || TREE_CODE (rhs) == MULT_EXPR)
+ 	          && TREE_CODE (TREE_OPERAND (rhs, 1)) == INTEGER_CST)
+ 	        {
+ 	          rhs = fold_build2 (TREE_CODE (rhs), type,
+ 	                             fold_convert (type, TREE_OPERAND (rhs, 0)),
+ 	                             fold_convert (type, TREE_OPERAND (rhs, 1)));
+ 	        }
+ 	      else
+ 	        rhs = fold_convert (type, rhs);
+ 	    }
  	  tree_to_aff_combination_expand (rhs, comb->type, &current, cache);
  	  exp->expansion = current;
  	  exp->in_progress = 0;
Index: testsuite/gcc.dg/tree-ssa/loop-35.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/loop-35.c	(revision 0)
--- testsuite/gcc.dg/tree-ssa/loop-35.c	(revision 0)
***************
*** 0 ****
--- 1,64 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-lim-details" } */
+ 
+ int x;
+ int a[100];
+ 
+ struct a
+ {
+   int X;
+   int Y;
+ };
+ 
+ struct a arr[100];
+ 
+ void test1(int b)
+ {
+   unsigned i;
+ 
+   /* And here.  */
+   for (i = 0; i < 100; i++)
+     {
+       arr[b+8].X += i;
+       arr[b+9].X += i;
+     }
+ }
+ 
+ void test2(struct a *A, int b)
+ {
+   unsigned i;
+ 
+   /* And here as well.  */
+   for (i = 0; i < 100; i++)
+     {
+       A[b].X += i;
+       A[b+1].Y += i;
+     }
+ }
+ 
+ void test3(unsigned long b)
+ {
+   unsigned i;
+ 
+   /* And here.  */
+   for (i = 0; i < 100; i++)
+     {
+       arr[b+8].X += i;
+       arr[b+9].X += i;
+     }
+ }
+ 
+ void test4(struct a *A, unsigned long b)
+ {
+   unsigned i;
+ 
+   /* And here as well.  */
+   for (i = 0; i < 100; i++)
+     {
+       A[b].X += i;
+       A[b+1].Y += i;
+     }
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "Executing store motion of" 8 "lim" } } */
+ /* { dg-final { cleanup-tree-dump "lim" } } */
Index: Makefile.in
===================================================================
*** Makefile.in	(revision 134344)
--- Makefile.in	(working copy)
*************** tree-ssa-loop-ivopts.o : tree-ssa-loop-i
*** 2191,2197 ****
     tree-chrec.h $(VARRAY_H) tree-affine.h pointer-set.h $(TARGET_H)
  tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) pointer-set.h \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(TREE_GIMPLE_H) \
!    output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H)
  tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
     output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
--- 2191,2197 ----
     tree-chrec.h $(VARRAY_H) tree-affine.h pointer-set.h $(TARGET_H)
  tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) pointer-set.h \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(TREE_GIMPLE_H) \
!    output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H)
  tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
     output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \



More information about the Gcc-patches mailing list