This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Index add_cost/neg_cost by machine mode in expmed.c
- From: Roger Sayle <roger at eyesopen dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sat, 5 Jun 2004 21:41:33 -0600 (MDT)
- Subject: [PATCH] Index add_cost/neg_cost by machine mode in expmed.c
The following patch improves the middle-end's model of target insn
rtx costs, by transforming add_cost, neg_cost, sdiv_pow2_cheap and
smod_pow2_cheap from scalar values into arrays indexed by machine mode.
Currently, RTL expansion implicitly assumes that the costs of addition
and negation are independent of the mode of the operation, and uses the
values for word_mode universally. This patch improves things by using
values appropriate for each mode.
My longer term objective is to teach GCC's RTL expanders that on
some targets, such as the Pentium4, its beneficial to implement
LSHIFT by small constants using additions instead of the architecture's
shift instructions. However, I'd be uncomfortable making that change
with the simplistic cost model currently used by the middle-end.
If this patch is approved, the next step would be to transform
shift_cost (and perhaps shift_addcost and shift_subcost) into two
dimensional arrays, indexed by machine mode and bit count.
This then leads to the question whether its best to continue as at
present, with init_expmed fully initializing these tables at start-up,
or whether we should switch to lazy initialization. The costs are
kept as integer values, hence -1 could be used as a suitable rogue
value to indicate uninitialized.
The other potential enhancement is to tweak "synth_mult" to accept
a machine mode argument, so that it can be used rather than just
"word_mode" when determining the cheapest shift/add sequence. The
patch below currently always uses "word_mode" in synth_mult, which
should preserve the current behaviour/trade-offs.
The following patch has been tested on i686-pc-linux-gnu with a full
"make bootstrap", all default languages, and regression tested with a
top-level "make -k check" with no new failures.
Ok for mainline?
2004-06-05 Roger Sayle <roger@eyesopen.com>
* expmed.c (add_cost, neg_cost, sdiv_pow2_cheap, smod_pow2_cheap):
Make arrays indexed by machine mode. Rename negate_cost to neg_cost.
(init_expmed): Initialize these cost arrays as appropriate.
(store_bit_field, extract_bit_field): Correct whitespace.
(synth_mult, choose_mult_variant, expand_mult, expand_mult_highpart,
expand_mult_highpart_optab, expand_divmod): Update uses of add_cost,
neg_cost, sdiv_pow2_cheap, smod_pow2_cheap to index with mode,
word_mode or compute_mode as appropriate.
Index: expmed.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expmed.c,v
retrieving revision 1.160
diff -c -3 -p -r1.160 expmed.c
*** expmed.c 30 May 2004 18:32:24 -0000 1.160
--- expmed.c 5 Jun 2004 22:32:25 -0000
*************** static void do_cmp_and_jump (rtx, rtx, e
*** 57,63 ****
Usually, this will mean that the MD file will emit non-branch
sequences. */
! static int sdiv_pow2_cheap, smod_pow2_cheap;
#ifndef SLOW_UNALIGNED_ACCESS
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
--- 57,64 ----
Usually, this will mean that the MD file will emit non-branch
sequences. */
! static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
! static int smod_pow2_cheap[NUM_MACHINE_MODES];
#ifndef SLOW_UNALIGNED_ACCESS
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
*************** static int sdiv_pow2_cheap, smod_pow2_ch
*** 90,96 ****
/* Cost of various pieces of RTL. Note that some of these are indexed by
shift count and some by mode. */
! static int add_cost, negate_cost, zero_cost;
static int shift_cost[MAX_BITS_PER_WORD];
static int shiftadd_cost[MAX_BITS_PER_WORD];
static int shiftsub_cost[MAX_BITS_PER_WORD];
--- 91,99 ----
/* Cost of various pieces of RTL. Note that some of these are indexed by
shift count and some by mode. */
! static int zero_cost;
! static int add_cost[NUM_MACHINE_MODES];
! static int neg_cost[NUM_MACHINE_MODES];
static int shift_cost[MAX_BITS_PER_WORD];
static int shiftadd_cost[MAX_BITS_PER_WORD];
static int shiftsub_cost[MAX_BITS_PER_WORD];
*************** init_expmed (void)
*** 114,120 ****
reg = gen_rtx_REG (word_mode, 10000);
zero_cost = rtx_cost (const0_rtx, 0);
- add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
gen_rtx_ASHIFT (word_mode, reg,
--- 117,122 ----
*************** init_expmed (void)
*** 136,178 ****
init_recog ();
- shift_cost[0] = 0;
- shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
-
- for (m = 1; m < MAX_BITS_PER_WORD; m++)
- {
- rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
- shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
-
- XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
- if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
- shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
-
- XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
- if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
- shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
-
- XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
- if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
- shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
- }
-
- negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
-
- sdiv_pow2_cheap
- = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
- <= 2 * add_cost);
- smod_pow2_cheap
- = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
- <= 2 * add_cost);
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
reg = gen_rtx_REG (mode, 10000);
div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
{
--- 138,161 ----
init_recog ();
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
reg = gen_rtx_REG (mode, 10000);
+ add_cost[(int) mode] = rtx_cost (gen_rtx_PLUS (mode, reg, reg), SET);
+ neg_cost[(int) mode] = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
+
+ sdiv_pow2_cheap[(int) mode]
+ = (rtx_cost (gen_rtx_DIV (mode, reg, GEN_INT (32)), SET)
+ <= 2 * add_cost[(int) mode]);
+ smod_pow2_cheap[(int) mode]
+ = (rtx_cost (gen_rtx_MOD (mode, reg, GEN_INT (32)), SET)
+ <= 2 * add_cost[(int) mode]);
+
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
{
*************** init_expmed (void)
*** 195,200 ****
--- 178,204 ----
}
}
+ shift_cost[0] = 0;
+ shiftadd_cost[0] = shiftsub_cost[0] = add_cost[(int) word_mode];
+
+ for (m = 1; m < MAX_BITS_PER_WORD; m++)
+ {
+ rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
+ shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
+
+ XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
+ if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
+ shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
+
+ XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
+ if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
+ shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
+
+ XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
+ if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
+ shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
+ }
+
end_sequence ();
}
*************** store_bit_field (rtx str_rtx, unsigned H
*** 317,323 ****
available. */
if (VECTOR_MODE_P (GET_MODE (op0))
&& GET_CODE (op0) != MEM
! && (vec_set_optab->handlers[(int)GET_MODE (op0)].insn_code
!= CODE_FOR_nothing)
&& fieldmode == GET_MODE_INNER (GET_MODE (op0))
&& bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
--- 321,327 ----
available. */
if (VECTOR_MODE_P (GET_MODE (op0))
&& GET_CODE (op0) != MEM
! && (vec_set_optab->handlers[(int) GET_MODE (op0)].insn_code
!= CODE_FOR_nothing)
&& fieldmode == GET_MODE_INNER (GET_MODE (op0))
&& bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
*************** extract_bit_field (rtx str_rtx, unsigned
*** 1086,1092 ****
available. */
if (VECTOR_MODE_P (GET_MODE (op0))
&& GET_CODE (op0) != MEM
! && (vec_extract_optab->handlers[(int)GET_MODE (op0)].insn_code
!= CODE_FOR_nothing)
&& ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
== bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
--- 1090,1096 ----
available. */
if (VECTOR_MODE_P (GET_MODE (op0))
&& GET_CODE (op0) != MEM
! && (vec_extract_optab->handlers[(int) GET_MODE (op0)].insn_code
!= CODE_FOR_nothing)
&& ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
== bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
*************** synth_mult (struct algorithm *alg_out, u
*** 2256,2262 ****
{
/* T ends with ...111. Multiply by (T + 1) and subtract 1. */
! cost = add_cost;
synth_mult (alg_in, t + 1, cost_limit - cost);
cost += alg_in->cost;
--- 2260,2266 ----
{
/* T ends with ...111. Multiply by (T + 1) and subtract 1. */
! cost = add_cost[(int) word_mode];
synth_mult (alg_in, t + 1, cost_limit - cost);
cost += alg_in->cost;
*************** synth_mult (struct algorithm *alg_out, u
*** 2273,2279 ****
{
/* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
! cost = add_cost;
synth_mult (alg_in, t - 1, cost_limit - cost);
cost += alg_in->cost;
--- 2277,2283 ----
{
/* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
! cost = add_cost[(int) word_mode];
synth_mult (alg_in, t - 1, cost_limit - cost);
cost += alg_in->cost;
*************** synth_mult (struct algorithm *alg_out, u
*** 2305,2311 ****
d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
! cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
synth_mult (alg_in, t / d, cost_limit - cost);
cost += alg_in->cost;
--- 2309,2317 ----
d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
! cost = add_cost[(int) word_mode] + shift_cost[m];
! if (shiftadd_cost[m] < cost)
! cost = shiftadd_cost[m];
synth_mult (alg_in, t / d, cost_limit - cost);
cost += alg_in->cost;
*************** synth_mult (struct algorithm *alg_out, u
*** 2324,2330 ****
d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
! cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
synth_mult (alg_in, t / d, cost_limit - cost);
cost += alg_in->cost;
--- 2330,2338 ----
d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
! cost = add_cost[(int) word_mode] + shift_cost[m];
! if (shiftsub_cost[m] < cost)
! cost = shiftsub_cost[m];
synth_mult (alg_in, t / d, cost_limit - cost);
cost += alg_in->cost;
*************** choose_mult_variant (enum machine_mode m
*** 2428,2442 ****
`unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
{
! synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
! alg2.cost += negate_cost;
if (alg2.cost < alg->cost)
*alg = alg2, *variant = negate_variant;
}
/* This proves very useful for division-by-constant. */
! synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
! alg2.cost += add_cost;
if (alg2.cost < alg->cost)
*alg = alg2, *variant = add_variant;
--- 2436,2452 ----
`unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
{
! synth_mult (&alg2, -val, MIN (alg->cost, mult_cost)
! - neg_cost[(int) mode]);
! alg2.cost += neg_cost[(int) mode];
if (alg2.cost < alg->cost)
*alg = alg2, *variant = negate_variant;
}
/* This proves very useful for division-by-constant. */
! synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost)
! - add_cost[(int) mode]);
! alg2.cost += add_cost[(int) mode];
if (alg2.cost < alg->cost)
*alg = alg2, *variant = add_variant;
*************** expand_mult (enum machine_mode mode, rtx
*** 2634,2640 ****
&& (unsignedp || !flag_trapv))
{
int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
! mult_cost = MIN (12 * add_cost, mult_cost);
if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
mult_cost))
--- 2644,2650 ----
&& (unsignedp || !flag_trapv))
{
int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
! mult_cost = MIN (12 * add_cost[(int) mode], mult_cost);
if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
mult_cost))
*************** expand_mult_highpart_optab (enum machine
*** 2900,2907 ****
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
! && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
! < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
--- 2910,2917 ----
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
! && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1]
! + 4 * add_cost[(int) mode] < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
*************** expand_mult_highpart_optab (enum machine
*** 2939,2946 ****
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
! && (mul_widen_cost[(int) wider_mode]
! + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
--- 2949,2956 ----
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
! && (mul_widen_cost[(int) wider_mode] + 2 * shift_cost[size-1]
! + 4 * add_cost[(int) mode] < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
*************** expand_mult_highpart (enum machine_mode
*** 2999,3005 ****
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
{
sign_adjust = true;
! extra_cost += add_cost;
}
/* See whether shift/add multiplication is cheap enough. */
--- 3009,3015 ----
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
{
sign_adjust = true;
! extra_cost += add_cost[(int) mode];
}
/* See whether shift/add multiplication is cheap enough. */
*************** expand_divmod (int rem_flag, enum tree_c
*** 3215,3221 ****
max_cost = div_cost[(int) compute_mode]
- (rem_flag && ! (last_div_const != 0 && op1_is_constant
&& INTVAL (op1) == last_div_const)
! ? mul_cost[(int) compute_mode] + add_cost : 0);
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
--- 3225,3232 ----
max_cost = div_cost[(int) compute_mode]
- (rem_flag && ! (last_div_const != 0 && op1_is_constant
&& INTVAL (op1) == last_div_const)
! ? mul_cost[(int) compute_mode] + add_cost[(int) compute_mode]
! : 0);
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
*************** expand_divmod (int rem_flag, enum tree_c
*** 3333,3339 ****
goto fail1;
extra_cost = (shift_cost[post_shift - 1]
! + shift_cost[1] + 2 * add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1,
max_cost - extra_cost);
--- 3344,3351 ----
goto fail1;
extra_cost = (shift_cost[post_shift - 1]
! + shift_cost[1]
! + 2 * add_cost(int) compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1,
max_cost - extra_cost);
*************** expand_divmod (int rem_flag, enum tree_c
*** 3416,3422 ****
goto fail1;
}
else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
! && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
/* ??? The cheap metric is computed only for
word_mode. If this operation is wider, this may
not be so. Assume true if the optab has an
--- 3428,3435 ----
goto fail1;
}
else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
! && (rem_flag ? smod_pow2_cheap[(int) compute_mode]
! : sdiv_pow2_cheap[(int) compute_mode])
/* ??? The cheap metric is computed only for
word_mode. If this operation is wider, this may
not be so. Assume true if the optab has an
*************** expand_divmod (int rem_flag, enum tree_c
*** 3498,3504 ****
goto fail1;
extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1] + add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
max_cost - extra_cost);
--- 3511,3518 ----
goto fail1;
extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1]
! + add_cost[(int) compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
max_cost - extra_cost);
*************** expand_divmod (int rem_flag, enum tree_c
*** 3529,3535 ****
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1] + 2 * add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
max_cost - extra_cost);
--- 3543,3550 ----
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1]
! + 2 * add_cost[(int) compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
max_cost - extra_cost);
*************** expand_divmod (int rem_flag, enum tree_c
*** 3619,3625 ****
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1] + 2 * add_cost);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
max_cost - extra_cost);
--- 3634,3641 ----
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1]
! + 2 * add_cost[(int) compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
max_cost - extra_cost);
Roger
--
Roger Sayle, E-mail: roger@eyesopen.com
OpenEye Scientific Software, WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road, Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507. Fax: (+1) 505-473-0833