This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[arm-csl-branch] Updated rtx_costs.

From: Paul Brook <paul at codesourcery dot com>
To: gcc-patches at gcc dot gnu dot org
Cc: Richard Earnshaw <rearnsha at arm dot com>
Date: Mon, 15 Dec 2003 11:44:33 +0000
Subject: [arm-csl-branch] Updated rtx_costs.

Attached is patch with updated rtx_costs for arm926ejs and arm1026ejs cores. 
The only real difference between these cores and the existing costs seems 
to be that multiplies are cheaper. Both cores look similar from a relative 
insn cost POV.

Basically rtx_cost returns 1 for a single cycle reg op (eg add). A constant 
pool operand adds a penalty of 4.

Existing costs for multiplies are comaratively high (8) and depended on the 
actual value multiplied.
The 9e/10e cores do SI multiplies in 2/3 cycles and SI*SI->DI mul in 3/4. 
I've used the smaller of these values on the assumption that the scheduler 
will usually be able to avoid the 1 cycle interlock with the following 
insn. DI*DI->DI have a cost of 7 (3+2+2).

The thumb variant I've given a cost of 4 insns as only the MULS variants are 
available. The thumb variants seem to use a different costing scale, just 
to confuse things.

Ok?

Paul

2003-12-15  Paul Brook  <paul@codesourcery.com>

	* config/arm.c (arm_rtx_costs_1): Add MUL costs for arm9e/10e cores.

Index: gcc/config/arm/arm.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.303.2.4
diff -u -p -r1.303.2.4 arm.c
--- gcc/config/arm/arm.c	10 Dec 2003 12:16:04 -0000	1.303.2.4
+++ gcc/config/arm/arm.c	12 Dec 2003 16:11:19 -0000
@@ -3152,6 +3152,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code co
   enum machine_mode mode = GET_MODE (x);
   enum rtx_code subcode;
   int extra_cost;
+  int cheap_mul;
 
   if (TARGET_THUMB)
     {
@@ -3169,20 +3170,25 @@ arm_rtx_costs_1 (rtx x, enum rtx_code co
 	  return COSTS_N_INSNS (1);
 	  
 	case MULT:							
-	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)			
-	    {								
-	      int cycles = 0;						
-	      unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
-	      
-	      while (i)						
-		{							
-		  i >>= 2;						
-		  cycles++;						
-		}							
-	      return COSTS_N_INSNS (2) + cycles;			
+	  if (arm_tune == arm926ejs
+	      || arm_tune == arm1026ejs)
+	    return COSTS_N_INSNS (4);
+	  else
+	    {
+	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)			
+		{								
+		  int cycles = 0;						
+		  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+		  
+		  while (i)						
+		    {							
+		      i >>= 2;						
+		      cycles++;						
+		    }							
+		  return COSTS_N_INSNS (2) + cycles;			
+		}
+	      return COSTS_N_INSNS (1) + 16;
 	    }
-	  return COSTS_N_INSNS (1) + 16;
-	  
 	case SET:							
 	  return (COSTS_N_INSNS (1)					
 		  + 4 * ((GET_CODE (SET_SRC (x)) == MEM)		
@@ -3396,19 +3402,26 @@ arm_rtx_costs_1 (rtx x, enum rtx_code co
       return 8;
 
     case MULT:
+      if (arm_tune == arm9e
+	  || arm_tune == arm926ejs
+	  || arm_tune == arm1026ejs)
+	cheap_mul = 1;
+      else
+	cheap_mul = 0;
       /* There is no point basing this on the tuning, since it is always the
 	 fast variant if it exists at all.  */
       if (arm_fast_multiply && mode == DImode
 	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
-	return 8;
+	return cheap_mul ? 3 : 8;
 
       if (GET_MODE_CLASS (mode) == MODE_FLOAT
 	  || mode == DImode)
-	return 30;
+	return (mode == DImode && cheap_mul) ? 7 : 30;
 
-      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && !cheap_mul)
 	{
 	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
 				      & (unsigned HOST_WIDE_INT) 0xffffffff);
@@ -3427,7 +3440,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code co
 	  return add_cost;
 	}
 
-      return (((tune_flags & FL_FAST_MULT) ? 8 : 30)
+      return (((tune_flags & FL_FAST_MULT) ? (cheap_mul ? 2 : 8) : 30)
 	      + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
 	      + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4));

Follow-Ups:
- Re: [arm-csl-branch] Updated rtx_costs.
  - From: Richard Earnshaw

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]