This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: RFA: Fix tree-optimization/55524
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: Joern Rennecke <joern dot rennecke at embecosm dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 9 Apr 2013 10:47:20 +0200
- Subject: Re: RFA: Fix tree-optimization/55524
- References: <20130408111056 dot gvvtnyc8w04cww8g-nzlynne at webmail dot spamcop dot net>
On Mon, Apr 8, 2013 at 5:10 PM, Joern Rennecke
<joern.rennecke@embecosm.com> wrote:
> This is basically the same patch as attached to the PR, except that I
> have changed the goto-loop into a do-while loop with a new comment;
> this caused the need for a lot of reformatting.
Can you please include a testcase that shows the effect of this?
> bootstrapped & regtested on i686-pc-linux-gnu.
>
> 2013-04-08 Joern Rennecke <joern.rennecke@embecosm.com>
>
> * tree-ssa-math-opts.c (mult_to_fma_pass): New file static struct.
> (convert_mult_to_fma): In first pass, don't use an fms construct
> when we don't have an fms operation, but fmna.
it's fnma I believe.
> (execute_optimize_widening_mul): Add a second pass if
> convert_mult_to_fma requests it.
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c (revision 197578)
> +++ gcc/tree-ssa-math-opts.c (working copy)
> @@ -2461,6 +2461,12 @@ convert_plusminus_to_widen (gimple_stmt_
> return true;
> }
>
> +static struct
> +{
> + bool second_pass;
> + bool retry_request;
> +} mult_to_fma_pass;
> +
> /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
> with uses in additions and subtractions to form fused multiply-add
> operations. Returns true if successful and MUL_STMT should be removed.
> */
> @@ -2570,6 +2576,22 @@ convert_mult_to_fma (gimple mul_stmt, tr
> return false;
> }
>
> + /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
> + by a MULT_EXPR that we'll visit later, we might be able to
> + get a more profitable match with fnma.
> + OTOH, if we don't, a negate / fma pair has likely lower latency
> + that a mult / subtract pair. */
This makes it sound that this is purely an ordering issue, thus for
x = a * b;
y = c * d;
z = x - y;
instead of
y = c * d;
z = a * b + (-y);
you want to generate
x = a * b;
z = -(c * d + (-x));
I fail to see why you need two passes for this rather than considering the
case that the immediate use stmt of the multiplication we start from
combines another multiplication with a MINUS_EXPR. That is ...
> + if (use_code == MINUS_EXPR && !negate_p
> + && gimple_assign_rhs1 (use_stmt) == result
> + && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
> + && optab_handler (fnma_optab, TYPE_MODE (type)) !=
> CODE_FOR_nothing
> + && mult_to_fma_pass.second_pass == false)
see if that is the case here and simply not do the transform. A second pass
will not recover from that without destroying the fnma pattern (testcase?)
Richard.
> + {
> + /* ??? Could make setting of retry_request dependent on some
> + rtx_cost measure we evaluate beforehand. */
> + mult_to_fma_pass.retry_request = true;
> + return false;
> + }
> /* We can't handle a * b + a * b. */
> if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
> return false;
> @@ -2657,76 +2679,89 @@ execute_optimize_widening_mul (void)
>
> memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
>
> - FOR_EACH_BB (bb)
> - {
> - gimple_stmt_iterator gsi;
>
> - for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> - {
> - gimple stmt = gsi_stmt (gsi);
> - enum tree_code code;
> + /* We may run one or two passes. In the first pass, if have fnma,
> + but not fms, we don't synthesize fms so that we can get the maximum
> + matches for fnma. If we have therefore skipped opportunities to
> + synthesize fms, we'll run a second pass where we use any such
> + opportunities that still remain. */
> + mult_to_fma_pass.retry_request = false;
> + do
> + {
> + mult_to_fma_pass.second_pass = mult_to_fma_pass.retry_request;
> + FOR_EACH_BB (bb)
> + {
> + gimple_stmt_iterator gsi;
>
> - if (is_gimple_assign (stmt))
> + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> {
> - code = gimple_assign_rhs_code (stmt);
> - switch (code)
> + gimple stmt = gsi_stmt (gsi);
> + enum tree_code code;
> +
> + if (is_gimple_assign (stmt))
> {
> - case MULT_EXPR:
> - if (!convert_mult_to_widen (stmt, &gsi)
> - && convert_mult_to_fma (stmt,
> - gimple_assign_rhs1 (stmt),
> - gimple_assign_rhs2 (stmt)))
> + code = gimple_assign_rhs_code (stmt);
> + switch (code)
> {
> - gsi_remove (&gsi, true);
> - release_defs (stmt);
> - continue;
> - }
> - break;
> -
> - case PLUS_EXPR:
> - case MINUS_EXPR:
> - convert_plusminus_to_widen (&gsi, stmt, code);
> - break;
> + case MULT_EXPR:
> + if (!convert_mult_to_widen (stmt, &gsi)
> + && convert_mult_to_fma (stmt,
> + gimple_assign_rhs1 (stmt),
> + gimple_assign_rhs2
> (stmt)))
> + {
> + gsi_remove (&gsi, true);
> + release_defs (stmt);
> + continue;
> + }
> + break;
> +
> + case PLUS_EXPR:
> + case MINUS_EXPR:
> + convert_plusminus_to_widen (&gsi, stmt, code);
> + break;
>
> - default:;
> + default:;
> + }
> }
> - }
> - else if (is_gimple_call (stmt)
> - && gimple_call_lhs (stmt))
> - {
> - tree fndecl = gimple_call_fndecl (stmt);
> - if (fndecl
> - && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
> + else if (is_gimple_call (stmt)
> + && gimple_call_lhs (stmt))
> {
> - switch (DECL_FUNCTION_CODE (fndecl))
> + tree fndecl = gimple_call_fndecl (stmt);
> + if (fndecl
> + && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
> {
> - case BUILT_IN_POWF:
> - case BUILT_IN_POW:
> - case BUILT_IN_POWL:
> - if (TREE_CODE (gimple_call_arg (stmt, 1)) ==
> REAL_CST
> - && REAL_VALUES_EQUAL
> - (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
> - dconst2)
> - && convert_mult_to_fma (stmt,
> - gimple_call_arg (stmt,
> 0),
> - gimple_call_arg (stmt,
> 0)))
> - {
> - unlink_stmt_vdef (stmt);
> - if (gsi_remove (&gsi, true)
> - && gimple_purge_dead_eh_edges (bb))
> - cfg_changed = true;
> - release_defs (stmt);
> - continue;
> - }
> + switch (DECL_FUNCTION_CODE (fndecl))
> + {
> + case BUILT_IN_POWF:
> + case BUILT_IN_POW:
> + case BUILT_IN_POWL:
> + if ((TREE_CODE (gimple_call_arg (stmt, 1))
> + == REAL_CST)
> + && (REAL_VALUES_EQUAL
> + (TREE_REAL_CST (gimple_call_arg (stmt,
> 1)),
> + dconst2))
> + && (convert_mult_to_fma
> + (stmt, gimple_call_arg (stmt, 0),
> + gimple_call_arg (stmt, 0))))
> + {
> + unlink_stmt_vdef (stmt);
> + if (gsi_remove (&gsi, true)
> + && gimple_purge_dead_eh_edges (bb))
> + cfg_changed = true;
> + release_defs (stmt);
> + continue;
> + }
> break;
>
> - default:;
> + default:;
> + }
> }
> }
> + gsi_next (&gsi);
> }
> - gsi_next (&gsi);
> }
> }
> + while (!mult_to_fma_pass.second_pass && mult_to_fma_pass.retry_request);
>
> statistics_counter_event (cfun, "widening multiplications inserted",
> widen_mul_stats.widen_mults_inserted);
>