This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix PR46556 (poor address generation)
On Sat, 8 Oct 2011, William J. Schmidt wrote:
> Greetings,
>
> Here are the revised changes for the tree portions of the patch. I've
> attempted to resolve all comments to date on those portions. Per
> Steven's comment, I moved copy_ref_info into tree-ssa-address.c; let me
> know if there's a better place, or whether you'd prefer to leave it
> where it was.
>
> I looked into changing the second reassoc pass to use a different
> pass_late_reassoc entry, but this impacted the test suite. There are
> about 20 tests that rely on -fdump-tree-reassoc being associated with
> two dump files named reassoc1 and reassoc2. Rather than change all
> these test cases for a temporary solution, I chose to use the deprecated
> first_pass_instance boolean to distinguish between the two passes. I
> marked this as a Bad Thing and it will be removed once I have time to
> work on the straight-line strength reducer.
>
> I looked into adding a test case with a negative offset, but was unable
> to come up with a construct that would have a negative offset on the
> base MEM_REF and still be recognized by this particular pattern matcher.
> In any case, the use of double_ints throughout should remove that
> concern.
Comments below.
> Thanks,
> Bill
>
>
> 2011-10-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
>
> gcc:
>
> PR rtl-optimization/46556
> * tree.h (copy_ref_info): Expose existing function.
> * tree-ssa-loop-ivopts.c (copy_ref_info): Move this function to...
> * tree-ssa-address.c (copy_ref_info): ...here, and remove static token.
> * tree-ssa-reassoc.c (restructure_base_and_offset): New function.
> (restructure_mem_ref): Likewise.
> (reassociate_bb): Look for opportunities to call restructure_mem_ref.
>
> gcc/testsuite:
>
> PR rtl-optimization/46556
> * gcc.dg/tree-ssa/pr46556-1.c: New testcase.
> * gcc.dg/tree-ssa/pr46556-2.c: Likewise.
> * gcc.dg/tree-ssa/pr46556-3.c: Likewise.
>
>
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h (revision 179708)
> +++ gcc/tree.h (working copy)
> @@ -5777,6 +5777,7 @@ tree target_for_debug_bind (tree);
> /* In tree-ssa-address.c. */
> extern tree tree_mem_ref_addr (tree, tree);
> extern void copy_mem_ref_info (tree, tree);
> +extern void copy_ref_info (tree, tree);
>
> /* In tree-vrp.c */
> extern bool ssa_name_nonnegative_p (const_tree);
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c (revision 0)
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dom2" } */
> +
> +struct x
> +{
> + int a[16];
> + int b[16];
> + int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> + foo (p->a[n], p->c[n], p->b[n]);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "p_1\\(D\\) \\+ D" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 2 "dom2" } } */
> +/* { dg-final { cleanup-tree-dump "dom2" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c (revision 0)
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dom2" } */
> +
> +struct x
> +{
> + int a[16];
> + int b[16];
> + int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> + foo (p->a[n], p->c[n], p->b[n]);
> + if (n > 12)
> + foo (p->a[n], p->c[n], p->b[n]);
> + else if (n > 3)
> + foo (p->b[n], p->a[n], p->c[n]);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "p_1\\(D\\) \\+ D" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 6 "dom2" } } */
> +/* { dg-final { cleanup-tree-dump "dom2" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c (revision 0)
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dom2" } */
> +
> +struct x
> +{
> + int a[16];
> + int b[16];
> + int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> + foo (p->a[n], p->c[n], p->b[n]);
> + if (n > 3)
> + {
> + foo (p->a[n], p->c[n], p->b[n]);
> + if (n > 12)
> + foo (p->b[n], p->a[n], p->c[n]);
> + }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "p_1\\(D\\) \\+ D" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 6 "dom2" } } */
> +/* { dg-final { cleanup-tree-dump "dom2" } } */
> Index: gcc/tree-ssa-loop-ivopts.c
> ===================================================================
> --- gcc/tree-ssa-loop-ivopts.c (revision 179708)
> +++ gcc/tree-ssa-loop-ivopts.c (working copy)
> @@ -6278,64 +6278,6 @@ rewrite_use_nonlinear_expr (struct ivopts_data *da
> }
> }
>
> -/* Copies the reference information from OLD_REF to NEW_REF. */
> -
> -static void
> -copy_ref_info (tree new_ref, tree old_ref)
> -{
> - tree new_ptr_base = NULL_TREE;
> -
> - TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
> - TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
> -
> - new_ptr_base = TREE_OPERAND (new_ref, 0);
> -
> - /* We can transfer points-to information from an old pointer
> - or decl base to the new one. */
> - if (new_ptr_base
> - && TREE_CODE (new_ptr_base) == SSA_NAME
> - && !SSA_NAME_PTR_INFO (new_ptr_base))
> - {
> - tree base = get_base_address (old_ref);
> - if (!base)
> - ;
> - else if ((TREE_CODE (base) == MEM_REF
> - || TREE_CODE (base) == TARGET_MEM_REF)
> - && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
> - && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
> - {
> - struct ptr_info_def *new_pi;
> - duplicate_ssa_name_ptr_info
> - (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
> - new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
> - /* We have to be careful about transfering alignment information. */
> - if (TREE_CODE (old_ref) == MEM_REF
> - && !(TREE_CODE (new_ref) == TARGET_MEM_REF
> - && (TMR_INDEX2 (new_ref)
> - || (TMR_STEP (new_ref)
> - && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
> - < new_pi->align)))))
> - {
> - new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
> - mem_ref_offset (new_ref)).low;
> - new_pi->misalign &= (new_pi->align - 1);
> - }
> - else
> - {
> - new_pi->align = 1;
> - new_pi->misalign = 0;
> - }
> - }
> - else if (TREE_CODE (base) == VAR_DECL
> - || TREE_CODE (base) == PARM_DECL
> - || TREE_CODE (base) == RESULT_DECL)
> - {
> - struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
> - pt_solution_set_var (&pi->pt, base);
> - }
> - }
> -}
> -
> /* Performs a peephole optimization to reorder the iv update statement with
> a mem ref to enable instruction combining in later phases. The mem ref uses
> the iv value before the update, so the reordering transformation requires
> Index: gcc/tree-ssa-address.c
> ===================================================================
> --- gcc/tree-ssa-address.c (revision 179708)
> +++ gcc/tree-ssa-address.c (working copy)
> @@ -832,6 +832,64 @@ copy_mem_ref_info (tree to, tree from)
> TREE_THIS_VOLATILE (to) = TREE_THIS_VOLATILE (from);
> }
>
> +/* Copies the reference information from OLD_REF to NEW_REF. */
Please add something like "NEW_REF is supposed to be either a MEM_REF
or a TARGET_MEM_REF.". You can checkin the patch moving this
function separately.
> +
> +void
> +copy_ref_info (tree new_ref, tree old_ref)
> +{
> + tree new_ptr_base = NULL_TREE;
> +
> + TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
> + TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
this function misses to transfer TREE_THIS_NOTRAP which is supposed
to be set on the base of old_ref or any contained ARRAY[_RANGE]_REF.
If you make the function generic please adjust it to at least do ...
> + new_ptr_base = TREE_OPERAND (new_ref, 0);
> +
> + /* We can transfer points-to information from an old pointer
> + or decl base to the new one. */
> + if (new_ptr_base
> + && TREE_CODE (new_ptr_base) == SSA_NAME
> + && !SSA_NAME_PTR_INFO (new_ptr_base))
> + {
> + tree base = get_base_address (old_ref);
> + if (!base)
> + ;
> + else if ((TREE_CODE (base) == MEM_REF
> + || TREE_CODE (base) == TARGET_MEM_REF)
> + && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
> + && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
> + {
> + struct ptr_info_def *new_pi;
> + duplicate_ssa_name_ptr_info
> + (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
> + new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
> + /* We have to be careful about transfering alignment information. */
> + if (TREE_CODE (old_ref) == MEM_REF
> + && !(TREE_CODE (new_ref) == TARGET_MEM_REF
> + && (TMR_INDEX2 (new_ref)
> + || (TMR_STEP (new_ref)
> + && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
> + < new_pi->align)))))
> + {
> + new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
> + mem_ref_offset (new_ref)).low;
> + new_pi->misalign &= (new_pi->align - 1);
> + }
> + else
> + {
> + new_pi->align = 1;
> + new_pi->misalign = 0;
> + }
...
TREE_THIS_NOTRAP (new_ref) = TREE_THIS_NOTRAP (base);
> + }
> + else if (TREE_CODE (base) == VAR_DECL
> + || TREE_CODE (base) == PARM_DECL
> + || TREE_CODE (base) == RESULT_DECL)
> + {
> + struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
> + pt_solution_set_var (&pi->pt, base);
> + }
> + }
> +}
> +
> /* Move constants in target_mem_ref REF to offset. Returns the new target
> mem ref if anything changes, NULL_TREE otherwise. */
>
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c (revision 179708)
> +++ gcc/tree-ssa-reassoc.c (working copy)
> @@ -2815,6 +2815,121 @@ break_up_subtract_bb (basic_block bb)
> break_up_subtract_bb (son);
> }
>
> +/* Given BASE and OFFSET derived from *EXPR occurring in stmt *GSI,
> + determine whether there is a profitable opportunity to restructure
> + address arithmetic within BASE and OFFSET. If so, produce such
> + a restructuring and return it. */
> +
> +static tree
> +restructure_base_and_offset (tree *expr, gimple_stmt_iterator *gsi,
You are not changing *expr, so don't pass it as reference.
> + tree base, tree offset, HOST_WIDE_INT bitpos)
> +{
> + tree mult_op0, mult_op1, t1, t2, offset_type, mult_expr, add_expr, mem_ref;
> + double_int c, c1, c2, c3, c4;
> +
> + /* Look for the following pattern:
> +
> + base = MEM_REF (T1, C1);
> + offset = MULT_EXPR (PLUS_EXPR (T2, C2), C3)
> + bitpos = C4 * BITS_PER_UNIT
> +
> + If found, convert it to:
> +
> + MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
> + C1 + (C2 * C3) + C4)
> + */
> + if (!base
> + || !offset
> + || TREE_CODE (base) != MEM_REF
> + || TREE_CODE (TREE_OPERAND (base, 1)) != INTEGER_CST
operand 1 of a MEM_REF is always an INTEGER_CST.
> + || TREE_CODE (offset) != MULT_EXPR)
> + return NULL_TREE;
> +
> + mult_op0 = TREE_OPERAND (offset, 0);
> + mult_op1 = TREE_OPERAND (offset, 1);
> +
> + if (TREE_CODE (mult_op0) != PLUS_EXPR
> + || TREE_CODE (mult_op1) != INTEGER_CST
> + || TREE_CODE (TREE_OPERAND (mult_op0, 1)) != INTEGER_CST)
> + return NULL_TREE;
> +
> + t1 = TREE_OPERAND (base, 0);
> + t2 = TREE_OPERAND (mult_op0, 0);
> + c1 = TREE_INT_CST (TREE_OPERAND (base, 1));
mem_ref_offset (base)
> + c2 = TREE_INT_CST (TREE_OPERAND (mult_op0, 1));
> + c3 = TREE_INT_CST (mult_op1);
tree_to_double_int ()
> + c4 = uhwi_to_double_int (bitpos / BITS_PER_UNIT);
You don't verify that bitpos % BITS_PER_UNIT is zero anywhere.
> + c = double_int_add (double_int_add (c1, double_int_mul (c2, c3)), c4);
> +
> + if (!double_int_fits_in_shwi_p (c)
> + || !double_int_fits_in_shwi_p (c3))
> + return NULL_TREE;
I think those tests are not necessary if you use ...
> + offset_type = TREE_TYPE (TREE_OPERAND (base, 1));
> +
> + mult_expr = fold_build2 (MULT_EXPR, sizetype, t2,
> + build_int_cst (sizetype, double_int_to_shwi (c3)));
double_int_to_tree (sizetype, c3)
> + mult_expr = force_gimple_operand_gsi (gsi, mult_expr, true, NULL,
> + true, GSI_SAME_STMT);
> + add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (t1), t1, mult_expr);
> + add_expr = force_gimple_operand_gsi (gsi, add_expr, true, NULL,
> + true, GSI_SAME_STMT);
> + mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr,
> + build_int_cst (offset_type, double_int_to_shwi (c)));
double_int_to_tree (offset_type, c)
Please delay gimplification to the caller, that way this function
solely operates on the trees returned from get_inner_reference.
Or are you concerned that fold might undo your association?
> + return mem_ref;
> +}
> +
> +/* If *EXPR contains a memory reference, determine whether there is an
> + opportunity to restructure the base and offset expressions for
> + improved performance. */
> +
> +static bool
> +restructure_mem_ref (tree *expr, gimple_stmt_iterator *gsi)
> +{
> + enum tree_code code = TREE_CODE (*expr);
> + tree base, offset, mem_ref;
> + HOST_WIDE_INT bitsize, bitpos;
> + enum machine_mode mode;
> + int unsignedp, volatilep;
> +
> + /* Only expressions that reference memory are of interest. Bitfield
> + references should eventually be lowered prior to this point and
> + are not dealt with. Skip BLKmode aggregates as well. */
> + if (! handled_component_p (*expr)
> + || code == BIT_FIELD_REF
> + || (code == COMPONENT_REF && DECL_BIT_FIELD (TREE_OPERAND (*expr, 1)))
> + || TYPE_MODE (TREE_TYPE (*expr)) == BLKmode)
> + return false;
> +
> + /* Determine whether the expression can be represented with base and
> + offset components. */
> + base = get_inner_reference (*expr, &bitsize, &bitpos, &offset, &mode,
> + &unsignedp, &volatilep, false);
> + if (!base || !offset)
> + return false;
> +
> + /* Look for a restructuring opportunity. */
> + if ((mem_ref = restructure_base_and_offset (expr, gsi, base,
> + offset, bitpos)) == NULL_TREE)
> + return false;
> +
> + /* Found one. Record the replacement in the dump file and complete
> + the replacement. */
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + {
> + fprintf (dump_file, "\nOriginal_expr = ");
> + print_generic_expr (dump_file, *expr, TDF_VOPS|TDF_MEMSYMS);
> + fprintf (dump_file, "\nmem_ref = ");
> + print_generic_expr (dump_file, mem_ref, TDF_VOPS|TDF_MEMSYMS);
> + fprintf (dump_file, "\n\n");
> + }
Thus gimplify and add the statements here.
> + copy_ref_info (mem_ref, *expr);
> + *expr = mem_ref;
> +
> + return true;
> +}
> +
> /* Reassociate expressions in basic block BB and its post-dominator as
> children. */
>
> @@ -2823,14 +2938,43 @@ reassociate_bb (basic_block bb)
> {
> gimple_stmt_iterator gsi;
> basic_block son;
> + bool in_loop = loop_depth (bb->loop_father) != 0;
>
> for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
> {
> gimple stmt = gsi_stmt (gsi);
>
> - if (is_gimple_assign (stmt)
> - && !stmt_could_throw_p (stmt))
> + /* During late reassociation only, look for restructuring
> + opportunities within an expression that references memory.
> + We only do this for blocks not contained in loops, since the
> + ivopts machinery does a good job on loop expressions, and we
> + don't want to interfere with other loop optimizations. */
I'm not sure I buy this. IVOPTs would have produced [TARGET_]MEM_REFs
which you don't handle. Did you do any measurements what happens if
you enable it generally?
> + /* TODO: This belongs more properly in a separate pass that
> + performs general strength reduction on straight-line code.
> + Eventually move this there. */
> + if (!first_pass_instance /* TODO: deprecated */
> + && !in_loop
> + && gimple_vuse (stmt)
> + && gimple_assign_single_p (stmt))
> {
> + tree *lhs, *rhs;
> + if (gimple_vdef (stmt))
> + {
> + lhs = gimple_assign_lhs_ptr (stmt);
> + if (restructure_mem_ref (lhs, &gsi))
> + update_stmt (stmt);
> + }
> + else if (gimple_vuse (stmt))
That will be always true.
> + {
> + rhs = gimple_assign_rhs1_ptr (stmt);
> + if (restructure_mem_ref (rhs, &gsi))
> + update_stmt (stmt);
> + }
> + }
> +
> + else if (is_gimple_assign (stmt)
> + && !stmt_could_throw_p (stmt))
> + {
> tree lhs, rhs1, rhs2;
> enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
You verified the patch has no performance degradations on ppc64
for SPEC CPU, did you see any improvements?
The pattern matching is still very ad-hoc and doesn't consider
statements that feed the base address. There is conceptually
no difference between p->a[n] and *(p + n * 4). You placed this
lowering in reassoc to catch CSE opportunities with DOM, right?
Does RTL CSE not do it's job or is the transform undone by
fwprop before it gets a chance to do it?
Thanks,
Richard.