This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] expand_sdiv_pow2 improvements for cmove targets (take 2)
- From: Roger Sayle <roger at eyesopen dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Richard Henderson <rth at redhat dot com>
- Date: Mon, 2 Aug 2004 18:06:02 -0600 (MDT)
- Subject: [PATCH] expand_sdiv_pow2 improvements for cmove targets (take 2)
On Sun, 25 Jul 2004, Richard Henderson wrote:
> On Sun, Jul 25, 2004 at 07:31:59AM -0600, Roger Sayle wrote:
> > + /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
> > + cmov = gen_rtx_LT (VOIDmode, temp2, const0_rtx);
> > + cmov = gen_rtx_IF_THEN_ELSE (mode, cmov, temp, temp2);
> > + cmov = gen_rtx_SET (mode, temp2, cmov);
> > + emit_insn (cmov);
>
> Why are you building this by hand instead of using emit_conditional_move,
> and/or leaving this for ifcvt to handle? With the latermost option, I'd
> simply emit the branch if HAVE_conditional_move is defined.
It serves me right for not reading the comments above the code in ifcvt.c,
where I'd copied this idiom from. Indeed, just a few lines further down
in that function, noce_emit_cmove, it calls emit_conditional_move as you
suggest.
I believe that it's better to handle this directly via a call to
emit_conditional_move rather than leave it for ifcvt.c for several
reasons. The biggest motivation is that ifcvt.c can only handle simple
if-then-else blocks where the then or the else contains a only a single
instruction. For constants, that require more than a single instruction
to add, ifcvt.c would be unable to synthesize a conditional move, and the
resulting conditional branch sequence would be significantly worse.
For example, on alphaev67-dec-osf5.1, we now generate the following for
division by 65536 (which matches the Compaq C compiler output).
div64k: ldah $1,1($16)
bis $31,$16,$0
lda $1,-1($1)
cmovlt $16,$1,$0
sra $0,16,$0
ret $31,($26),1
Other good reasons include better code generation at -O0, and better
memory usage/compile-time performance.
The following patch has been tested on both i686-pc-linux-gnu and
alphaev67-dec-osf5.1, all default languages, and regression tested
with a top-level "make -k check" with no new failures.
Ok for mainline?
2004-08-02 Roger Sayle <roger@eyesopen.com>
* expmed.c (sdiv_pow2_cheap, smod_pow2_cheap): Change type to bool.
(have_cmovlt_p): New global array indexed by machine mode.
(init_expmed): Add new reg2, lt, ite, set and insn fields to "all".
Use "recog" to initialize "have_cmovlt_p" for each integer mode.
(expand_sdiv2_pow2): Add an alternate implementation for signed
division, if the target provides a suitable conditional move insn.
Index: expmed.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expmed.c,v
retrieving revision 1.186
diff -c -3 -p -r1.186 expmed.c
*** expmed.c 25 Jul 2004 19:15:43 -0000 1.186
--- expmed.c 1 Aug 2004 01:44:55 -0000
*************** static rtx expand_sdiv_pow2 (enum machin
*** 59,66 ****
Usually, this will mean that the MD file will emit non-branch
sequences. */
! static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
! static int smod_pow2_cheap[NUM_MACHINE_MODES];
#ifndef SLOW_UNALIGNED_ACCESS
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
--- 59,73 ----
Usually, this will mean that the MD file will emit non-branch
sequences. */
! static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
! static bool smod_pow2_cheap[NUM_MACHINE_MODES];
!
! /* Nonzero means this target has a "conditional move if less than zero"
! instruction, such as the alpha's cmovlt. */
!
! #ifdef HAVE_conditional_move
! static bool have_cmovlt_p[NUM_MACHINE_MODES];
! #endif
#ifndef SLOW_UNALIGNED_ACCESS
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
*************** init_expmed (void)
*** 109,115 ****
{
struct
{
! struct rtx_def reg;
struct rtx_def plus; rtunion plus_fld1;
struct rtx_def neg;
struct rtx_def udiv; rtunion udiv_fld1;
--- 116,123 ----
{
struct
{
! struct rtx_def reg; rtunion reg_fld[2];
! struct rtx_def reg2; rtunion reg2_fld[2];
struct rtx_def plus; rtunion plus_fld1;
struct rtx_def neg;
struct rtx_def udiv; rtunion udiv_fld1;
*************** init_expmed (void)
*** 124,136 ****
struct rtx_def shift_mult; rtunion shift_mult_fld1;
struct rtx_def shift_add; rtunion shift_add_fld1;
struct rtx_def shift_sub; rtunion shift_sub_fld1;
} all;
rtx pow2[MAX_BITS_PER_WORD];
rtx cint[MAX_BITS_PER_WORD];
! int m, n;
enum machine_mode mode, wider_mode;
zero_cost = rtx_cost (const0_rtx, 0);
for (m = 1; m < MAX_BITS_PER_WORD; m++)
--- 132,150 ----
struct rtx_def shift_mult; rtunion shift_mult_fld1;
struct rtx_def shift_add; rtunion shift_add_fld1;
struct rtx_def shift_sub; rtunion shift_sub_fld1;
+ struct rtx_def lt; rtunion lt_fld1;
+ struct rtx_def ite; rtunion ite_fld[2];
+ struct rtx_def set; rtunion set_fld1;
+ struct rtx_def insn; rtunion insn_fld[8];
} all;
rtx pow2[MAX_BITS_PER_WORD];
rtx cint[MAX_BITS_PER_WORD];
! int m, n, dummy;
enum machine_mode mode, wider_mode;
+ init_recog ();
+
zero_cost = rtx_cost (const0_rtx, 0);
for (m = 1; m < MAX_BITS_PER_WORD; m++)
*************** init_expmed (void)
*** 144,149 ****
--- 158,166 ----
PUT_CODE (&all.reg, REG);
REGNO (&all.reg) = 10000;
+ PUT_CODE (&all.reg2, REG);
+ REGNO (&all.reg2) = 10001;
+
PUT_CODE (&all.plus, PLUS);
XEXP (&all.plus, 0) = &all.reg;
XEXP (&all.plus, 1) = &all.reg;
*************** init_expmed (void)
*** 194,204 ****
--- 211,239 ----
XEXP (&all.shift_sub, 0) = &all.shift_mult;
XEXP (&all.shift_sub, 1) = &all.reg;
+ PUT_CODE (&all.lt, LT);
+ XEXP (&all.lt, 0) = &all.reg;
+ XEXP (&all.lt, 1) = const0_rtx;
+
+ PUT_CODE (&all.ite, IF_THEN_ELSE);
+ XEXP (&all.ite, 0) = &all.lt;
+ XEXP (&all.ite, 1) = &all.reg2;
+ XEXP (&all.ite, 2) = &all.reg;
+
+ PUT_CODE (&all.set, SET);
+ XEXP (&all.set, 0) = &all.reg;
+ XEXP (&all.set, 1) = &all.ite;
+
+ PUT_CODE (&all.insn, INSN);
+ XEXP (&all.insn, 5) = &all.set;
+ XINT (&all.insn, 6) = -1;
+
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
PUT_MODE (&all.reg, mode);
+ PUT_MODE (&all.reg2, mode);
PUT_MODE (&all.plus, mode);
PUT_MODE (&all.neg, mode);
PUT_MODE (&all.udiv, mode);
*************** init_expmed (void)
*** 210,215 ****
--- 245,252 ----
PUT_MODE (&all.shift_mult, mode);
PUT_MODE (&all.shift_add, mode);
PUT_MODE (&all.shift_sub, mode);
+ PUT_MODE (&all.ite, mode);
+ PUT_MODE (&all.set, mode);
add_cost[mode] = rtx_cost (&all.plus, SET);
neg_cost[mode] = rtx_cost (&all.neg, SET);
*************** init_expmed (void)
*** 244,249 ****
--- 281,291 ----
shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
}
+
+ #ifdef HAVE_conditional_move
+ INSN_CODE (&all.insn) = -1;
+ have_cmovlt_p[mode] = recog (&all.set, &all.insn, &dummy) >= 0;
+ #endif
}
}
*************** expand_sdiv_pow2 (enum machine_mode mode
*** 3194,3199 ****
--- 3236,3258 ----
return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
}
+ #ifdef HAVE_conditional_move
+ if (have_cmovlt_p[mode] && BRANCH_COST >= 2)
+ {
+ rtx temp2;
+
+ temp2 = copy_to_mode_reg (mode, op0);
+ temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
+ NULL_RTX, 0, OPTAB_LIB_WIDEN);
+ temp = force_reg (mode, temp);
+
+ /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
+ temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
+ mode, temp, temp2, mode, 0);
+ return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
+ }
+ #endif
+
if (BRANCH_COST >= 2)
{
int ushift = GET_MODE_BITSIZE (mode) - logd;
Roger
--