This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Index expmed.c's shift_cost by machine mode
- From: Roger Sayle <roger at eyesopen dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sat, 12 Jun 2004 12:20:50 -0600 (MDT)
- Subject: [PATCH] Index expmed.c's shift_cost by machine mode
The following patch is the next step in my efforts to improve code
generation on targets where shifts are several times more expensive
than additions (including the Pentium4). This change adds an extra
machine mode index to the shift_cost, shiftadd_cost and shiftsub_cost
arrays used by expmed.c. The rest of expmed.c is then updated to
use the operation's mode when determining the cost of right shifts,
allowing improved synthetic multiplcation sequences for suitably
parameterized backends.
There was no response to my question of whether it would be
preferrable to initialize these tables dynamically. I decided that
typically there will be at most six integer modes (BI, QI, HI, SI,
DI and TI) and the additional start-up overhead should be negligible
compared to the more complex logic required for lazy initialization.
I also intend to export and make use of these tables in the RTL
optimizers (e.g. simplify_rtx), which will improve that cost/benefit
ratio further.
The following patch has been tested on i686-pc-linux-gnu with a full
"make bootstrap", all default languages, and regression tested with
a top-level "make -k check" with no new failures.
Ok for mainline?
2004-06-12 Roger Sayle <roger@eyesopen.com>
* expmed.c (shift_cost, shiftadd_cost, shiftsub_cost): Additionally
index by machine mode.
(init_expmed): Initialize shift_cost, shiftadd_cost and shiftsub_cost
tables inside the loop over machine modes.
(synth_mult, expand_mult_highpart_optab, expand_mult_highpart,
expand_divmod): Index shift*_cost by the appropriate machine mode.
Index: expmed.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expmed.c,v
retrieving revision 1.164
diff -c -3 -p -r1.164 expmed.c
*** expmed.c 11 Jun 2004 21:34:23 -0000 1.164
--- expmed.c 12 Jun 2004 03:54:05 -0000
*************** static int smod_pow2_cheap[NUM_MACHINE_M
*** 94,102 ****
static int zero_cost;
static int add_cost[NUM_MACHINE_MODES];
static int neg_cost[NUM_MACHINE_MODES];
! static int shift_cost[MAX_BITS_PER_WORD];
! static int shiftadd_cost[MAX_BITS_PER_WORD];
! static int shiftsub_cost[MAX_BITS_PER_WORD];
static int mul_cost[NUM_MACHINE_MODES];
static int div_cost[NUM_MACHINE_MODES];
static int mul_widen_cost[NUM_MACHINE_MODES];
--- 94,102 ----
static int zero_cost;
static int add_cost[NUM_MACHINE_MODES];
static int neg_cost[NUM_MACHINE_MODES];
! static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
! static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
! static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int mul_cost[NUM_MACHINE_MODES];
static int div_cost[NUM_MACHINE_MODES];
static int mul_widen_cost[NUM_MACHINE_MODES];
*************** void
*** 106,143 ****
init_expmed (void)
{
rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
int dummy;
! int m;
enum machine_mode mode, wider_mode;
start_sequence ();
- /* This is "some random pseudo register" for purposes of calling recog
- to see what insns exist. */
- reg = gen_rtx_REG (word_mode, 10000);
-
zero_cost = rtx_cost (const0_rtx, 0);
- shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
- gen_rtx_ASHIFT (word_mode, reg,
- const0_rtx)));
-
- shiftadd_insn
- = emit_insn (gen_rtx_SET (VOIDmode, reg,
- gen_rtx_PLUS (word_mode,
- gen_rtx_MULT (word_mode,
- reg, const0_rtx),
- reg)));
-
- shiftsub_insn
- = emit_insn (gen_rtx_SET (VOIDmode, reg,
- gen_rtx_MINUS (word_mode,
- gen_rtx_MULT (word_mode,
- reg, const0_rtx),
- reg)));
-
init_recog ();
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
--- 106,129 ----
init_expmed (void)
{
rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
+ rtx shift_pat, shiftadd_pat, shiftsub_pat;
+ rtx pow2[MAX_BITS_PER_WORD];
+ rtx cint[MAX_BITS_PER_WORD];
int dummy;
! int m, n;
enum machine_mode mode, wider_mode;
start_sequence ();
zero_cost = rtx_cost (const0_rtx, 0);
init_recog ();
+ for (m = 1; m < MAX_BITS_PER_WORD; m++)
+ {
+ pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
+ cint[m] = GEN_INT (m);
+ }
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
*************** init_expmed (void)
*** 176,202 ****
GEN_INT (GET_MODE_BITSIZE (mode)))),
SET);
}
- }
! shift_cost[0] = 0;
! shiftadd_cost[0] = shiftsub_cost[0] = add_cost[word_mode];
! for (m = 1; m < MAX_BITS_PER_WORD; m++)
! {
! rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
! shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
! XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
! if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
! shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
!
! XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
! if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
! shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
!
! XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
! if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
! shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
}
end_sequence ();
--- 162,213 ----
GEN_INT (GET_MODE_BITSIZE (mode)))),
SET);
}
! shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
! gen_rtx_ASHIFT (mode, reg,
! const0_rtx)));
!
! shiftadd_insn
! = emit_insn (gen_rtx_SET (VOIDmode, reg,
! gen_rtx_PLUS (mode,
! gen_rtx_MULT (mode,
! reg,
! const0_rtx),
! reg)));
!
! shiftsub_insn
! = emit_insn (gen_rtx_SET (VOIDmode, reg,
! gen_rtx_MINUS (mode,
! gen_rtx_MULT (mode,
! reg,
! const0_rtx),
! reg)));
!
! shift_pat = PATTERN (shift_insn);
! shiftadd_pat = PATTERN (shiftadd_insn);
! shiftsub_pat = PATTERN (shiftsub_insn);
! shift_cost[mode][0] = 0;
! shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
! n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
! for (m = 1; m < n; m++)
! {
! shift_cost[mode][m] = 32000;
! XEXP (SET_SRC (shift_pat), 1) = cint[m];
! if (recog (shift_pat, shift_insn, &dummy) >= 0)
! shift_cost[mode][m] = rtx_cost (SET_SRC (shift_pat), SET);
!
! shiftadd_cost[mode][m] = 32000;
! XEXP (XEXP (SET_SRC (shiftadd_pat), 0), 1) = pow2[m];
! if (recog (shiftadd_pat, shiftadd_insn, &dummy) >= 0)
! shiftadd_cost[mode][m] = rtx_cost (SET_SRC (shiftadd_pat), SET);
!
! shiftsub_cost[mode][m] = 32000;
! XEXP (XEXP (SET_SRC (shiftsub_pat), 0), 1) = pow2[m];
! if (recog (shiftsub_pat, shiftsub_insn, &dummy) >= 0)
! shiftsub_cost[mode][m] = rtx_cost (SET_SRC (shiftsub_pat), SET);
! }
}
end_sequence ();
*************** synth_mult (struct algorithm *alg_out, u
*** 2226,2232 ****
if (m < BITS_PER_WORD)
{
q = t >> m;
! cost = shift_cost[m];
synth_mult (alg_in, q, cost_limit - cost, mode);
cost += alg_in->cost;
--- 2237,2243 ----
if (m < BITS_PER_WORD)
{
q = t >> m;
! cost = shift_cost[mode][m];
synth_mult (alg_in, q, cost_limit - cost, mode);
cost += alg_in->cost;
*************** synth_mult (struct algorithm *alg_out, u
*** 2310,2318 ****
d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
! cost = add_cost[mode] + shift_cost[m];
! if (shiftadd_cost[m] < cost)
! cost = shiftadd_cost[m];
synth_mult (alg_in, t / d, cost_limit - cost, mode);
cost += alg_in->cost;
--- 2321,2329 ----
d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
! cost = add_cost[mode] + shift_cost[mode][m];
! if (shiftadd_cost[mode][m] < cost)
! cost = shiftadd_cost[mode][m];
synth_mult (alg_in, t / d, cost_limit - cost, mode);
cost += alg_in->cost;
*************** synth_mult (struct algorithm *alg_out, u
*** 2331,2339 ****
d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
! cost = add_cost[mode] + shift_cost[m];
! if (shiftsub_cost[m] < cost)
! cost = shiftsub_cost[m];
synth_mult (alg_in, t / d, cost_limit - cost, mode);
cost += alg_in->cost;
--- 2342,2350 ----
d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
! cost = add_cost[mode] + shift_cost[mode][m];
! if (shiftsub_cost[mode][m] < cost)
! cost = shiftsub_cost[mode][m];
synth_mult (alg_in, t / d, cost_limit - cost, mode);
cost += alg_in->cost;
*************** synth_mult (struct algorithm *alg_out, u
*** 2358,2364 ****
m = exact_log2 (q);
if (m >= 0 && m < BITS_PER_WORD)
{
! cost = shiftadd_cost[m];
synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
cost += alg_in->cost;
--- 2369,2375 ----
m = exact_log2 (q);
if (m >= 0 && m < BITS_PER_WORD)
{
! cost = shiftadd_cost[mode][m];
synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
cost += alg_in->cost;
*************** synth_mult (struct algorithm *alg_out, u
*** 2377,2383 ****
m = exact_log2 (q);
if (m >= 0 && m < BITS_PER_WORD)
{
! cost = shiftsub_cost[m];
synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
cost += alg_in->cost;
--- 2388,2394 ----
m = exact_log2 (q);
if (m >= 0 && m < BITS_PER_WORD)
{
! cost = shiftsub_cost[mode][m];
synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
cost += alg_in->cost;
*************** expand_mult_highpart_optab (enum machine
*** 2911,2917 ****
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
! && (mul_highpart_cost[mode] + 2 * shift_cost[size-1]
+ 4 * add_cost[mode] < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
--- 2922,2928 ----
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
! && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
+ 4 * add_cost[mode] < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
*************** expand_mult_highpart_optab (enum machine
*** 2938,2944 ****
moptab = smul_optab;
if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
! && mul_cost[wider_mode] + shift_cost[size-1] < max_cost)
{
tem = expand_binop (wider_mode, moptab, op0, op1, 0,
unsignedp, OPTAB_WIDEN);
--- 2949,2955 ----
moptab = smul_optab;
if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
! && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
{
tem = expand_binop (wider_mode, moptab, op0, op1, 0,
unsignedp, OPTAB_WIDEN);
*************** expand_mult_highpart_optab (enum machine
*** 2950,2956 ****
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
! && (mul_widen_cost[wider_mode] + 2 * shift_cost[size-1]
+ 4 * add_cost[mode] < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
--- 2961,2967 ----
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
! && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
+ 4 * add_cost[mode] < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
*************** expand_mult_highpart (enum machine_mode
*** 3004,3010 ****
return expand_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
! extra_cost = shift_cost[GET_MODE_BITSIZE (mode) - 1];
/* Check whether we try to multiply by a negative constant. */
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
--- 3015,3021 ----
return expand_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
! extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
/* Check whether we try to multiply by a negative constant. */
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
*************** expand_divmod (int rem_flag, enum tree_c
*** 3344,3352 ****
if (post_shift - 1 >= BITS_PER_WORD)
goto fail1;
! extra_cost = (shift_cost[post_shift - 1]
! + shift_cost[1]
! + 2 * add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1,
max_cost - extra_cost);
--- 3355,3364 ----
if (post_shift - 1 >= BITS_PER_WORD)
goto fail1;
! extra_cost
! = (shift_cost[compute_mode][post_shift - 1]
! + shift_cost[compute_mode][1]
! + 2 * add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1,
max_cost - extra_cost);
*************** expand_divmod (int rem_flag, enum tree_c
*** 3376,3383 ****
t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
build_int_2 (pre_shift, 0),
NULL_RTX, 1);
! extra_cost = (shift_cost[pre_shift]
! + shift_cost[post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml,
NULL_RTX, 1,
max_cost - extra_cost);
--- 3388,3396 ----
t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
build_int_2 (pre_shift, 0),
NULL_RTX, 1);
! extra_cost
! = (shift_cost[compute_mode][pre_shift]
! + shift_cost[compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml,
NULL_RTX, 1,
max_cost - extra_cost);
*************** expand_divmod (int rem_flag, enum tree_c
*** 3511,3518 ****
|| size - 1 >= BITS_PER_WORD)
goto fail1;
! extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1]
+ add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
--- 3524,3531 ----
|| size - 1 >= BITS_PER_WORD)
goto fail1;
! extra_cost = (shift_cost[compute_mode][post_shift]
! + shift_cost[compute_mode][size - 1]
+ add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
*************** expand_divmod (int rem_flag, enum tree_c
*** 3543,3550 ****
goto fail1;
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
! extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1]
+ 2 * add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
--- 3556,3563 ----
goto fail1;
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
! extra_cost = (shift_cost[compute_mode][post_shift]
! + shift_cost[compute_mode][size - 1]
+ 2 * add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
*************** expand_divmod (int rem_flag, enum tree_c
*** 3634,3641 ****
NULL_RTX, 0);
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
! extra_cost = (shift_cost[post_shift]
! + shift_cost[size - 1]
+ 2 * add_cost[compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
--- 3647,3654 ----
NULL_RTX, 0);
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
! extra_cost = (shift_cost[compute_mode][post_shift]
! + shift_cost[compute_mode][size - 1]
+ 2 * add_cost[compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
Roger
--
Roger Sayle, E-mail: roger@eyesopen.com
OpenEye Scientific Software, WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road, Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507. Fax: (+1) 505-473-0833