This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[arm-csl-branch] Updated rtx_costs.
- From: Paul Brook <paul at codesourcery dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Richard Earnshaw <rearnsha at arm dot com>
- Date: Mon, 15 Dec 2003 11:44:33 +0000
- Subject: [arm-csl-branch] Updated rtx_costs.
Attached is patch with updated rtx_costs for arm926ejs and arm1026ejs cores.
The only real difference between these cores and the existing costs seems
to be that multiplies are cheaper. Both cores look similar from a relative
insn cost POV.
Basically rtx_cost returns 1 for a single cycle reg op (eg add). A constant
pool operand adds a penalty of 4.
Existing costs for multiplies are comaratively high (8) and depended on the
actual value multiplied.
The 9e/10e cores do SI multiplies in 2/3 cycles and SI*SI->DI mul in 3/4.
I've used the smaller of these values on the assumption that the scheduler
will usually be able to avoid the 1 cycle interlock with the following
insn. DI*DI->DI have a cost of 7 (3+2+2).
The thumb variant I've given a cost of 4 insns as only the MULS variants are
available. The thumb variants seem to use a different costing scale, just
to confuse things.
Ok?
Paul
2003-12-15 Paul Brook <paul@codesourcery.com>
* config/arm.c (arm_rtx_costs_1): Add MUL costs for arm9e/10e cores.
Index: gcc/config/arm/arm.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.303.2.4
diff -u -p -r1.303.2.4 arm.c
--- gcc/config/arm/arm.c 10 Dec 2003 12:16:04 -0000 1.303.2.4
+++ gcc/config/arm/arm.c 12 Dec 2003 16:11:19 -0000
@@ -3152,6 +3152,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code co
enum machine_mode mode = GET_MODE (x);
enum rtx_code subcode;
int extra_cost;
+ int cheap_mul;
if (TARGET_THUMB)
{
@@ -3169,20 +3170,25 @@ arm_rtx_costs_1 (rtx x, enum rtx_code co
return COSTS_N_INSNS (1);
case MULT:
- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- {
- int cycles = 0;
- unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
-
- while (i)
- {
- i >>= 2;
- cycles++;
- }
- return COSTS_N_INSNS (2) + cycles;
+ if (arm_tune == arm926ejs
+ || arm_tune == arm1026ejs)
+ return COSTS_N_INSNS (4);
+ else
+ {
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ int cycles = 0;
+ unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+
+ while (i)
+ {
+ i >>= 2;
+ cycles++;
+ }
+ return COSTS_N_INSNS (2) + cycles;
+ }
+ return COSTS_N_INSNS (1) + 16;
}
- return COSTS_N_INSNS (1) + 16;
-
case SET:
return (COSTS_N_INSNS (1)
+ 4 * ((GET_CODE (SET_SRC (x)) == MEM)
@@ -3396,19 +3402,26 @@ arm_rtx_costs_1 (rtx x, enum rtx_code co
return 8;
case MULT:
+ if (arm_tune == arm9e
+ || arm_tune == arm926ejs
+ || arm_tune == arm1026ejs)
+ cheap_mul = 1;
+ else
+ cheap_mul = 0;
/* There is no point basing this on the tuning, since it is always the
fast variant if it exists at all. */
if (arm_fast_multiply && mode == DImode
&& (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
- return 8;
+ return cheap_mul ? 3 : 8;
if (GET_MODE_CLASS (mode) == MODE_FLOAT
|| mode == DImode)
- return 30;
+ return (mode == DImode && cheap_mul) ? 7 : 30;
- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && !cheap_mul)
{
unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
& (unsigned HOST_WIDE_INT) 0xffffffff);
@@ -3427,7 +3440,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code co
return add_cost;
}
- return (((tune_flags & FL_FAST_MULT) ? 8 : 30)
+ return (((tune_flags & FL_FAST_MULT) ? (cheap_mul ? 2 : 8) : 30)
+ (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
+ (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4));