[PATCH] Make loop invariant and store motion stronger on lp64 targets
Richard Guenther
rguenther@suse.de
Wed Apr 16 12:50:00 GMT 2008
This patch teaches aff_combination_expand to look through some conversions
like those that appear from sizetype conversion on lp64 targets. It tries
to (but fails to) address the xfail from the gcc.dg/tree-ssa/loop-33.c
testcase that has
void test4(unsigned b)
{
unsigned i;
/* And here. */
for (i = 0; i < 100; i++)
{
arr[b+8].X += i;
arr[b+9].X += i;
}
}
where get_inner_reference for the array access generates
((sizetype)(b+8)) * 8 on targets with sizetype != unsigned. This
patch teaches aff_combination_expand to handle the case of
void test4(int b)
{
unsigned i;
/* And here. */
for (i = 0; i < 100; i++)
{
arr[b+8].X += i;
arr[b+9].X += i;
}
}
by noting that in ((sizetype)(b+8)) * 8 overflow of b + 8 is
undefined and so the expression can be folded to
(((sizetype)b) + 8) * 8 which allows proper affine expansion.
As this is computing an expression in a wider type this cannot be
justified as a general thing fold_unary should do, so this transformation
is localized in aff_combination_expand.
As tree-affine is used more widely than just for alias disambiguation
we cannot do anything for the plain unsigned case as if b + 8 wraps
the conversion is value changing. (I thought of testing
flag_unsafe_loop_optimization, but the code is also excercised for
non-loop expansions during TARGET_MEM_REF creation)
Still those widening conversions are now expanded (but only one
level), so some more equalities should be spotted as well.
Nevertheless, bootstrapped and tested on x86_64-unknown-linux-gnu
and tested on i686-pc-linux-gnu as well.
Zdenek, does this version look ok?
Thanks,
Richard.
2008-04-16 Richard Guenther <rguenther@suse.de>
* Makefile.in (tree-affine.o): Add $(FLAGS_H) dependency.
* tree-affine.c (aff_combination_expand): Look through some
conversions.
* gcc.dg/tree-ssa/loop-35.c: New testcase.
Index: tree-affine.c
===================================================================
*** tree-affine.c (revision 134344)
--- tree-affine.c (working copy)
*************** along with GCC; see the file COPYING3.
*** 31,36 ****
--- 31,37 ----
#include "pointer-set.h"
#include "tree-affine.h"
#include "tree-gimple.h"
+ #include "flags.h"
/* Extends CST as appropriate for the affine combinations COMB. */
*************** aff_combination_expand (aff_tree *comb,
*** 578,589 ****
aff_combination_zero (&to_add, comb->type);
for (i = 0; i < comb->n; i++)
{
e = comb->elts[i].val;
! if (TREE_CODE (e) != SSA_NAME)
continue;
! def = SSA_NAME_DEF_STMT (e);
if (TREE_CODE (def) != GIMPLE_MODIFY_STMT
! || GIMPLE_STMT_OPERAND (def, 0) != e)
continue;
rhs = GIMPLE_STMT_OPERAND (def, 1);
--- 579,598 ----
aff_combination_zero (&to_add, comb->type);
for (i = 0; i < comb->n; i++)
{
+ tree type, name;
e = comb->elts[i].val;
! type = TREE_TYPE (e);
! name = e;
! /* Look through some conversions. */
! if (TREE_CODE (e) == NOP_EXPR
! && (TYPE_PRECISION (type)
! >= TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (e, 0)))))
! name = TREE_OPERAND (e, 0);
! if (TREE_CODE (name) != SSA_NAME)
continue;
! def = SSA_NAME_DEF_STMT (name);
if (TREE_CODE (def) != GIMPLE_MODIFY_STMT
! || GIMPLE_STMT_OPERAND (def, 0) != name)
continue;
rhs = GIMPLE_STMT_OPERAND (def, 1);
*************** aff_combination_expand (aff_tree *comb,
*** 607,612 ****
--- 616,645 ----
exp = XNEW (struct name_expansion);
exp->in_progress = 1;
*slot = exp;
+ if (e != name)
+ {
+ /* In principle this is a generally valid folding, but
+ it is not unconditionally an optimization, so do it
+ here and not in fold_unary. */
+ /* Convert (T1)(X *+- CST) into (T1)X *+- (T1)CST if T1 is wider
+ than the type of X and overflow for the type of X is
+ undefined. */
+ if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (rhs))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs))
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (rhs))
+ && (TREE_CODE (rhs) == PLUS_EXPR
+ || TREE_CODE (rhs) == MINUS_EXPR
+ || TREE_CODE (rhs) == MULT_EXPR)
+ && TREE_CODE (TREE_OPERAND (rhs, 1)) == INTEGER_CST)
+ {
+ rhs = fold_build2 (TREE_CODE (rhs), type,
+ fold_convert (type, TREE_OPERAND (rhs, 0)),
+ fold_convert (type, TREE_OPERAND (rhs, 1)));
+ }
+ else
+ rhs = fold_convert (type, rhs);
+ }
tree_to_aff_combination_expand (rhs, comb->type, ¤t, cache);
exp->expansion = current;
exp->in_progress = 0;
Index: testsuite/gcc.dg/tree-ssa/loop-35.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/loop-35.c (revision 0)
--- testsuite/gcc.dg/tree-ssa/loop-35.c (revision 0)
***************
*** 0 ****
--- 1,64 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-lim-details" } */
+
+ int x;
+ int a[100];
+
+ struct a
+ {
+ int X;
+ int Y;
+ };
+
+ struct a arr[100];
+
+ void test1(int b)
+ {
+ unsigned i;
+
+ /* And here. */
+ for (i = 0; i < 100; i++)
+ {
+ arr[b+8].X += i;
+ arr[b+9].X += i;
+ }
+ }
+
+ void test2(struct a *A, int b)
+ {
+ unsigned i;
+
+ /* And here as well. */
+ for (i = 0; i < 100; i++)
+ {
+ A[b].X += i;
+ A[b+1].Y += i;
+ }
+ }
+
+ void test3(unsigned long b)
+ {
+ unsigned i;
+
+ /* And here. */
+ for (i = 0; i < 100; i++)
+ {
+ arr[b+8].X += i;
+ arr[b+9].X += i;
+ }
+ }
+
+ void test4(struct a *A, unsigned long b)
+ {
+ unsigned i;
+
+ /* And here as well. */
+ for (i = 0; i < 100; i++)
+ {
+ A[b].X += i;
+ A[b+1].Y += i;
+ }
+ }
+
+ /* { dg-final { scan-tree-dump-times "Executing store motion of" 8 "lim" } } */
+ /* { dg-final { cleanup-tree-dump "lim" } } */
Index: Makefile.in
===================================================================
*** Makefile.in (revision 134344)
--- Makefile.in (working copy)
*************** tree-ssa-loop-ivopts.o : tree-ssa-loop-i
*** 2191,2197 ****
tree-chrec.h $(VARRAY_H) tree-affine.h pointer-set.h $(TARGET_H)
tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) pointer-set.h \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(TREE_GIMPLE_H) \
! output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H)
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
--- 2191,2197 ----
tree-chrec.h $(VARRAY_H) tree-affine.h pointer-set.h $(TARGET_H)
tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) pointer-set.h \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(TREE_GIMPLE_H) \
! output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H)
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
More information about the Gcc-patches
mailing list