This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
RFA: Fix tree-optimization/55524
- From: Joern Rennecke <joern dot rennecke at embecosm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 08 Apr 2013 11:10:56 -0400
- Subject: RFA: Fix tree-optimization/55524
This is basically the same patch as attached to the PR, except that I
have changed the goto-loop into a do-while loop with a new comment;
this caused the need for a lot of reformatting.
bootstrapped & regtested on i686-pc-linux-gnu.
2013-04-08 Joern Rennecke <joern.rennecke@embecosm.com>
* tree-ssa-math-opts.c (mult_to_fma_pass): New file static struct.
(convert_mult_to_fma): In first pass, don't use an fms construct
when we don't have an fms operation, but fmna.
(execute_optimize_widening_mul): Add a second pass if
convert_mult_to_fma requests it.
Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c (revision 197578)
+++ gcc/tree-ssa-math-opts.c (working copy)
@@ -2461,6 +2461,12 @@ convert_plusminus_to_widen (gimple_stmt_
return true;
}
+static struct
+{
+ bool second_pass;
+ bool retry_request;
+} mult_to_fma_pass;
+
/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
with uses in additions and subtractions to form fused multiply-add
operations. Returns true if successful and MUL_STMT should be removed. */
@@ -2570,6 +2576,22 @@ convert_mult_to_fma (gimple mul_stmt, tr
return false;
}
+ /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
+ by a MULT_EXPR that we'll visit later, we might be able to
+ get a more profitable match with fnma.
+ OTOH, if we don't, a negate / fma pair has likely lower latency
+ that a mult / subtract pair. */
+ if (use_code == MINUS_EXPR && !negate_p
+ && gimple_assign_rhs1 (use_stmt) == result
+ && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
+ && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing
+ && mult_to_fma_pass.second_pass == false)
+ {
+ /* ??? Could make setting of retry_request dependent on some
+ rtx_cost measure we evaluate beforehand. */
+ mult_to_fma_pass.retry_request = true;
+ return false;
+ }
/* We can't handle a * b + a * b. */
if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
return false;
@@ -2657,76 +2679,89 @@ execute_optimize_widening_mul (void)
memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
- FOR_EACH_BB (bb)
- {
- gimple_stmt_iterator gsi;
- for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
- {
- gimple stmt = gsi_stmt (gsi);
- enum tree_code code;
+ /* We may run one or two passes. In the first pass, if have fnma,
+ but not fms, we don't synthesize fms so that we can get the maximum
+ matches for fnma. If we have therefore skipped opportunities to
+ synthesize fms, we'll run a second pass where we use any such
+ opportunities that still remain. */
+ mult_to_fma_pass.retry_request = false;
+ do
+ {
+ mult_to_fma_pass.second_pass = mult_to_fma_pass.retry_request;
+ FOR_EACH_BB (bb)
+ {
+ gimple_stmt_iterator gsi;
- if (is_gimple_assign (stmt))
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
{
- code = gimple_assign_rhs_code (stmt);
- switch (code)
+ gimple stmt = gsi_stmt (gsi);
+ enum tree_code code;
+
+ if (is_gimple_assign (stmt))
{
- case MULT_EXPR:
- if (!convert_mult_to_widen (stmt, &gsi)
- && convert_mult_to_fma (stmt,
- gimple_assign_rhs1 (stmt),
- gimple_assign_rhs2 (stmt)))
+ code = gimple_assign_rhs_code (stmt);
+ switch (code)
{
- gsi_remove (&gsi, true);
- release_defs (stmt);
- continue;
- }
- break;
-
- case PLUS_EXPR:
- case MINUS_EXPR:
- convert_plusminus_to_widen (&gsi, stmt, code);
- break;
+ case MULT_EXPR:
+ if (!convert_mult_to_widen (stmt, &gsi)
+ && convert_mult_to_fma (stmt,
+ gimple_assign_rhs1 (stmt),
+ gimple_assign_rhs2 (stmt)))
+ {
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ continue;
+ }
+ break;
+
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ convert_plusminus_to_widen (&gsi, stmt, code);
+ break;
- default:;
+ default:;
+ }
}
- }
- else if (is_gimple_call (stmt)
- && gimple_call_lhs (stmt))
- {
- tree fndecl = gimple_call_fndecl (stmt);
- if (fndecl
- && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+ else if (is_gimple_call (stmt)
+ && gimple_call_lhs (stmt))
{
- switch (DECL_FUNCTION_CODE (fndecl))
+ tree fndecl = gimple_call_fndecl (stmt);
+ if (fndecl
+ && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
{
- case BUILT_IN_POWF:
- case BUILT_IN_POW:
- case BUILT_IN_POWL:
- if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
- && REAL_VALUES_EQUAL
- (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
- dconst2)
- && convert_mult_to_fma (stmt,
- gimple_call_arg (stmt, 0),
- gimple_call_arg (stmt, 0)))
- {
- unlink_stmt_vdef (stmt);
- if (gsi_remove (&gsi, true)
- && gimple_purge_dead_eh_edges (bb))
- cfg_changed = true;
- release_defs (stmt);
- continue;
- }
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case BUILT_IN_POWF:
+ case BUILT_IN_POW:
+ case BUILT_IN_POWL:
+ if ((TREE_CODE (gimple_call_arg (stmt, 1))
+ == REAL_CST)
+ && (REAL_VALUES_EQUAL
+ (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
+ dconst2))
+ && (convert_mult_to_fma
+ (stmt, gimple_call_arg (stmt, 0),
+ gimple_call_arg (stmt, 0))))
+ {
+ unlink_stmt_vdef (stmt);
+ if (gsi_remove (&gsi, true)
+ && gimple_purge_dead_eh_edges (bb))
+ cfg_changed = true;
+ release_defs (stmt);
+ continue;
+ }
break;
- default:;
+ default:;
+ }
}
}
+ gsi_next (&gsi);
}
- gsi_next (&gsi);
}
}
+ while (!mult_to_fma_pass.second_pass && mult_to_fma_pass.retry_request);
statistics_counter_event (cfun, "widening multiplications inserted",
widen_mul_stats.widen_mults_inserted);