This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] widening_mul: Do cost check when propagating mult into plus/minus expressions
- From: "Andreas Krebbel" <krebbel at linux dot vnet dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 14 Jul 2011 15:09:36 +0200
- Subject: Re: [PATCH] widening_mul: Do cost check when propagating mult into plus/minus expressions
- References: <20110713131305.GA5348@bart> <4E1DCEA0.5080007@redhat.com>
On Wed, Jul 13, 2011 at 09:58:08AM -0700, Richard Henderson wrote:
> Why the force_operand? You've got register inputs. Either the target
> is going to support the operation or it isn't.
I agree that it doesn't seem to be necessary. I've used force_operand
since ivopts (add_cost) is doing it without seeing a clear reason for
it. So I've removed it now.
> Saving cost data dependent on speed, which is non-constant.
> You probably need to make this a two dimensional array.
Fixed.
Here is an updated version.
Bye,
-Andreas-
2011-07-14 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
* tree-ssa-math-opts.c (compute_costs): New function.
(convert_mult_to_fma): Take costs into account when propagating
multiplications into several additions.
* config/s390/s390.c (z196_costs): Adjust costs for madbr and
maebr.
Index: gcc/tree-ssa-math-opts.c
===================================================================
*** gcc/tree-ssa-math-opts.c.orig
--- gcc/tree-ssa-math-opts.c
*************** convert_plusminus_to_widen (gimple_stmt_
*** 2185,2190 ****
--- 2185,2236 ----
return true;
}
+ /* Computing the costs for calculating RTX with CODE in MODE. */
+
+ static unsigned
+ compute_costs (enum machine_mode mode, enum rtx_code code, bool speed)
+ {
+ rtx insn;
+ unsigned cost;
+
+ switch (GET_RTX_LENGTH (code))
+ {
+ case 2:
+ insn = gen_rtx_fmt_ee (code, mode,
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2));
+ break;
+ case 3:
+ insn = gen_rtx_fmt_eee (code, mode,
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2),
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 3));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Calculating costs of %s in %s mode. RTX is:\n",
+ GET_RTX_NAME (code), GET_MODE_NAME (mode));
+ print_rtl (dump_file, insn);
+ }
+
+ cost = rtx_cost (insn, SET, speed);
+
+ /* If the backend returns a cost of zero it is most certainly lying.
+ Set this to one in order to notice that we already calculated it
+ once. */
+ cost = cost ? cost : 1;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n%s in %s costs %d\n\n",
+ GET_RTX_NAME (code), GET_MODE_NAME (mode), cost);
+
+ return cost;
+ }
+
/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
with uses in additions and subtractions to form fused multiply-add
operations. Returns true if successful and MUL_STMT should be removed. */
*************** convert_mult_to_fma (gimple mul_stmt, tr
*** 2197,2202 ****
--- 2243,2254 ----
gimple use_stmt, neguse_stmt, fma_stmt;
use_operand_p use_p;
imm_use_iterator imm_iter;
+ enum machine_mode mode;
+ int uses = 0;
+ bool speed = optimize_bb_for_speed_p (gimple_bb (mul_stmt));
+ static unsigned mul_cost[2][NUM_MACHINE_MODES];
+ static unsigned add_cost[2][NUM_MACHINE_MODES];
+ static unsigned fma_cost[2][NUM_MACHINE_MODES];
if (FLOAT_TYPE_P (type)
&& flag_fp_contract_mode == FP_CONTRACT_OFF)
*************** convert_mult_to_fma (gimple mul_stmt, tr
*** 2213,2222 ****
if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
return false;
/* Make sure that the multiplication statement becomes dead after
! the transformation, thus that all uses are transformed to FMAs.
! This means we assume that an FMA operation has the same cost
! as an addition. */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
{
enum tree_code use_code;
--- 2265,2281 ----
if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
return false;
+ mode = TYPE_MODE (type);
+
+ if (!fma_cost[speed][mode])
+ {
+ fma_cost[speed][mode] = compute_costs (mode, FMA, speed);
+ add_cost[speed][mode] = compute_costs (mode, PLUS, speed);
+ mul_cost[speed][mode] = compute_costs (mode, MULT, speed);
+ }
+
/* Make sure that the multiplication statement becomes dead after
! the transformation, thus that all uses are transformed to FMAs. */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
{
enum tree_code use_code;
*************** convert_mult_to_fma (gimple mul_stmt, tr
*** 2292,2297 ****
--- 2351,2357 ----
if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
return false;
+ uses++;
/* While it is possible to validate whether or not the exact form
that we've recognized is available in the backend, the assumption
is that the transformation is never a loss. For instance, suppose
*************** convert_mult_to_fma (gimple mul_stmt, tr
*** 2302,2307 ****
--- 2362,2374 ----
independant and could be run in parallel. */
}
+ /* Calculate the costs of moving the multiplication into all the
+ minus/plus expressions. */
+
+ if (uses * fma_cost[speed][mode] >
+ uses * add_cost[speed][mode] + mul_cost[speed][mode])
+ return false;
+
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
{
gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
Index: gcc/config/s390/s390.c
===================================================================
*** gcc/config/s390/s390.c.orig
--- gcc/config/s390/s390.c
*************** struct processor_costs z196_cost =
*** 242,249 ****
COSTS_N_INSNS (100), /* SQXBR B+100 */
COSTS_N_INSNS (42), /* SQDBR B+42 */
COSTS_N_INSNS (28), /* SQEBR B+28 */
! COSTS_N_INSNS (1), /* MADBR B */
! COSTS_N_INSNS (1), /* MAEBR B */
COSTS_N_INSNS (101), /* DXBR B+101 */
COSTS_N_INSNS (29), /* DDBR */
COSTS_N_INSNS (22), /* DEBR */
--- 242,250 ----
COSTS_N_INSNS (100), /* SQXBR B+100 */
COSTS_N_INSNS (42), /* SQDBR B+42 */
COSTS_N_INSNS (28), /* SQEBR B+28 */
! /* Cheaper than a mul+add but more expensive then a single mul/add. */
! COSTS_N_INSNS (1) + COSTS_N_INSNS (1) / 2, /* MADBR B */
! COSTS_N_INSNS (1) + COSTS_N_INSNS (1) / 2, /* MAEBR B */
COSTS_N_INSNS (101), /* DXBR B+101 */
COSTS_N_INSNS (29), /* DDBR */
COSTS_N_INSNS (22), /* DEBR */