This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix part of PR48037
- From: Richard Guenther <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 15 Mar 2011 13:19:29 +0100 (CET)
- Subject: [PATCH] Fix part of PR48037
This avoids spilling SSE registers to memory just because we access
vector components in a C array way. The trick is to simply rewrite
those accesses to proper vector selects on the tree level and promote
the vector to SSA form.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2011-03-15 Richard Guenther <rguenther@suse.de>
PR tree-optimization/48037
* tree-ssa.c (maybe_rewrite_mem_ref_base): Rewrite vector
selects into BIT_FIELD_REFs.
(non_rewritable_mem_ref_base): Check if a MEM_REF is a
vector select.
* gcc.target/i386/pr48037-1.c: New testcase.
Index: gcc/tree-ssa.c
===================================================================
*** gcc/tree-ssa.c (revision 170776)
--- gcc/tree-ssa.c (working copy)
*************** maybe_rewrite_mem_ref_base (tree *tp)
*** 1838,1855 ****
tp = &TREE_OPERAND (*tp, 0);
if (TREE_CODE (*tp) == MEM_REF
&& TREE_CODE (TREE_OPERAND (*tp, 0)) == ADDR_EXPR
- && integer_zerop (TREE_OPERAND (*tp, 1))
&& (sym = TREE_OPERAND (TREE_OPERAND (*tp, 0), 0))
&& DECL_P (sym)
&& !TREE_ADDRESSABLE (sym)
&& symbol_marked_for_renaming (sym))
{
! if (!useless_type_conversion_p (TREE_TYPE (*tp),
! TREE_TYPE (sym)))
! *tp = build1 (VIEW_CONVERT_EXPR,
! TREE_TYPE (*tp), sym);
! else
! *tp = sym;
}
}
--- 1838,1869 ----
tp = &TREE_OPERAND (*tp, 0);
if (TREE_CODE (*tp) == MEM_REF
&& TREE_CODE (TREE_OPERAND (*tp, 0)) == ADDR_EXPR
&& (sym = TREE_OPERAND (TREE_OPERAND (*tp, 0), 0))
&& DECL_P (sym)
&& !TREE_ADDRESSABLE (sym)
&& symbol_marked_for_renaming (sym))
{
! if (TREE_CODE (TREE_TYPE (sym)) == VECTOR_TYPE
! && useless_type_conversion_p (TREE_TYPE (*tp),
! TREE_TYPE (TREE_TYPE (sym)))
! && multiple_of_p (sizetype, TREE_OPERAND (*tp, 1),
! TYPE_SIZE_UNIT (TREE_TYPE (*tp))))
! {
! *tp = build3 (BIT_FIELD_REF, TREE_TYPE (*tp), sym,
! TYPE_SIZE (TREE_TYPE (*tp)),
! int_const_binop (MULT_EXPR,
! bitsize_int (BITS_PER_UNIT),
! TREE_OPERAND (*tp, 1), 0));
! }
! else if (integer_zerop (TREE_OPERAND (*tp, 1)))
! {
! if (!useless_type_conversion_p (TREE_TYPE (*tp),
! TREE_TYPE (sym)))
! *tp = build1 (VIEW_CONVERT_EXPR,
! TREE_TYPE (*tp), sym);
! else
! *tp = sym;
! }
}
}
*************** non_rewritable_mem_ref_base (tree ref)
*** 1869,1879 ****
base = TREE_OPERAND (base, 0);
/* But watch out for MEM_REFs we cannot lower to a
! VIEW_CONVERT_EXPR. */
if (TREE_CODE (base) == MEM_REF
&& TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
{
tree decl = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
if (DECL_P (decl)
&& (!integer_zerop (TREE_OPERAND (base, 1))
|| (DECL_SIZE (decl)
--- 1883,1900 ----
base = TREE_OPERAND (base, 0);
/* But watch out for MEM_REFs we cannot lower to a
! VIEW_CONVERT_EXPR or a BIT_FIELD_REF. */
if (TREE_CODE (base) == MEM_REF
&& TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
{
tree decl = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
+ if (TREE_CODE (TREE_TYPE (decl)) == VECTOR_TYPE
+ && useless_type_conversion_p (TREE_TYPE (base),
+ TREE_TYPE (TREE_TYPE (decl)))
+ && double_int_fits_in_uhwi_p (mem_ref_offset (base))
+ && multiple_of_p (sizetype, TREE_OPERAND (base, 1),
+ TYPE_SIZE_UNIT (TREE_TYPE (base))))
+ return NULL_TREE;
if (DECL_P (decl)
&& (!integer_zerop (TREE_OPERAND (base, 1))
|| (DECL_SIZE (decl)
Index: gcc/testsuite/gcc.target/i386/pr48037-1.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr48037-1.c (revision 0)
--- gcc/testsuite/gcc.target/i386/pr48037-1.c (revision 0)
***************
*** 0 ****
--- 1,15 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target lp64 } */
+ /* { dg-options "-O -fno-math-errno" } */
+
+ typedef double __m128d __attribute__((vector_size(16)));
+ __m128d vsqrt1 (__m128d const x)
+ {
+ double const* __restrict__ const y = (double const*)&x;
+ double const a = __builtin_sqrt(y[0]);
+ double const b = __builtin_sqrt(y[1]);
+ return (__m128d) { a, b };
+ }
+
+ /* Verify we do not spill x to the stack. */
+ /* { dg-final { scan-assembler-not "%rsp" } } */