This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Add floating point timings to rs6000_rtx_costs


On Tue, 6 Jul 2004, David Edelsohn wrote:
> 	On POWER4/POWER5, the latency of fast, simple instructions is 2
> cycles.  The cost of those instructions is COSTS_N_INSNS(1).  Your earlier
> patch did not change that and there is no intention of enumerating the
> cost of each trivial operation as COSTS_N_INSN(2) for those processors.
> You need to divide all of your FP costs for POWER4/POWER5 by 2.

Ahhh!  Thanks I didn't appreciate that simple integer operations took
two cycles on POWER4/POWER5.

<humour>
There may be some interesting optimizations using the "pc" to perform
arithmetic operations, as increments of the PC complete in one-cycle
which is faster than the equivalent POWER4 instruction on a GPR :>
</humour>


The following patch incorporates both of your suggestions; the scaling
of power4_cost's new FP costs by 0.5 to match the cost relative to an
integer addition, and fixes the ppc640_cost typo in the ChangeLog entry.

The following patch has been tested on powerpc-apple-darwin7.4.0 with a
full "make bootstrap", all default languages, and regression tested with
a top-level "make -k check" with no new failures.

Ok for mainline?


2004-07-06  Roger Sayle  <roger@eyesopen.com>

	* config/rs6000/rs6000.c (struct processor_costs): Add new fields
	for simple floating point operations "fp", double precision
	multiplication "dmul", and single and double precision division
	"sdiv" and "ddiv".  Update all CPU variant tables as appropriate.
	(ppc630_cost): New table split from ppc620_cost, to distinguish
	differences in floating point latencies.
	(rs6000_override_options): Use ppc630_cost for PROCESSOR_PPC630.
	(rs6000_rtx_costs): Add support for single and double precision
	floating point addition, subtraction, multiplication, division,
	fused-multiply-add, fused-multiply-sub, negation, absolute value
	and negative absolute value.  Tweak MEM case to use prefered idiom.


Index: config/rs6000/rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.660
diff -c -3 -p -r1.660 rs6000.c
*** config/rs6000/rs6000.c	2 Jul 2004 18:00:46 -0000	1.660
--- config/rs6000/rs6000.c	6 Jul 2004 22:13:26 -0000
*************** struct processor_costs {
*** 292,297 ****
--- 292,301 ----
    const int muldi;        /* cost of DImode multiplication.  */
    const int divsi;        /* cost of SImode division.  */
    const int divdi;        /* cost of DImode division.  */
+   const int fp;           /* cost of simple SFmode and DFmode insns.  */
+   const int dmul;         /* cost of DFmode multiplication (and fmadd).  */
+   const int sdiv;         /* cost of SFmode division (fdivs).  */
+   const int ddiv;         /* cost of DFmode division (fdiv).  */
  };

  const struct processor_costs *rs6000_cost;
*************** struct processor_costs size32_cost = {
*** 307,312 ****
--- 311,320 ----
    1,    /* muldi */
    1,    /* divsi */
    1,    /* divdi */
+   1,    /* fp */
+   1,    /* dmul */
+   1,    /* sdiv */
+   1,    /* ddiv */
  };

  /* Instruction size costs on 64bit processors.  */
*************** struct processor_costs size64_cost = {
*** 318,323 ****
--- 326,335 ----
    1,    /* muldi */
    1,    /* divsi */
    1,    /* divdi */
+   1,    /* fp */
+   1,    /* dmul */
+   1,    /* sdiv */
+   1,    /* ddiv */
  };

  /* Instruction costs on RIOS1 processors.  */
*************** struct processor_costs rios1_cost = {
*** 329,334 ****
--- 341,350 ----
    5,    /* muldi */
    19,   /* divsi */
    19,   /* divdi */
+   2,    /* fp */
+   2,    /* dmul */
+   19,   /* sdiv */
+   19,   /* ddiv */
  };

  /* Instruction costs on RIOS2 processors.  */
*************** struct processor_costs rios2_cost = {
*** 340,345 ****
--- 356,365 ----
    2,    /* muldi */
    13,   /* divsi */
    13,   /* divdi */
+   2,    /* fp */
+   2,    /* dmul */
+   17,   /* sdiv */
+   17,   /* ddiv */
  };

  /* Instruction costs on RS64A processors.  */
*************** struct processor_costs rs64a_cost = {
*** 351,356 ****
--- 371,380 ----
    34,   /* muldi */
    65,   /* divsi */
    67,   /* divdi */
+   4,    /* fp */
+   4,    /* dmul */
+   31,   /* sdiv */
+   31,   /* ddiv */
  };

  /* Instruction costs on MPCCORE processors.  */
*************** struct processor_costs mpccore_cost = {
*** 362,367 ****
--- 386,395 ----
    2,    /* muldi */
    6,    /* divsi */
    6,    /* divdi */
+   4,    /* fp */
+   5,    /* dmul */
+   10,   /* sdiv */
+   17,   /* ddiv */
  };

  /* Instruction costs on PPC403 processors.  */
*************** struct processor_costs ppc403_cost = {
*** 373,378 ****
--- 401,410 ----
    4,    /* muldi */
    33,   /* divsi */
    33,   /* divdi */
+   11,   /* fp */
+   11,   /* dmul */
+   11,   /* sdiv */
+   11,   /* ddiv */
  };

  /* Instruction costs on PPC405 processors.  */
*************** struct processor_costs ppc405_cost = {
*** 384,389 ****
--- 416,425 ----
    5,    /* muldi */
    35,   /* divsi */
    35,   /* divdi */
+   11,   /* fp */
+   11,   /* dmul */
+   11,   /* sdiv */
+   11,   /* ddiv */
  };

  /* Instruction costs on PPC440 processors.  */
*************** struct processor_costs ppc440_cost = {
*** 395,400 ****
--- 431,440 ----
    3,    /* muldi */
    34,   /* divsi */
    34,   /* divdi */
+   5,    /* fp */
+   5,    /* dmul */
+   19,   /* sdiv */
+   33,   /* ddiv */
  };

  /* Instruction costs on PPC601 processors.  */
*************** struct processor_costs ppc601_cost = {
*** 406,411 ****
--- 446,455 ----
    5,    /* muldi */
    36,   /* divsi */
    36,   /* divdi */
+   4,    /* fp */
+   5,    /* dmul */
+   17,   /* sdiv */
+   31,   /* ddiv */
  };

  /* Instruction costs on PPC603 processors.  */
*************** struct processor_costs ppc603_cost = {
*** 417,422 ****
--- 461,470 ----
    5,    /* muldi */
    37,   /* divsi */
    37,   /* divdi */
+   3,    /* fp */
+   4,    /* dmul */
+   18,   /* sdiv */
+   33,   /* ddiv */
  };

  /* Instruction costs on PPC604 processors.  */
*************** struct processor_costs ppc604_cost = {
*** 428,433 ****
--- 476,485 ----
    4,    /* muldi */
    20,   /* divsi */
    20,   /* divdi */
+   3,    /* fp */
+   3,    /* dmul */
+   18,   /* sdiv */
+   32,   /* ddiv */
  };

  /* Instruction costs on PPC604e processors.  */
*************** struct processor_costs ppc604e_cost = {
*** 439,447 ****
    2,    /* muldi */
    20,   /* divsi */
    20,   /* divdi */
  };

! /* Instruction costs on PPC620 and PPC630 processors.  */
  static const
  struct processor_costs ppc620_cost = {
    5,    /* mulsi */
--- 491,503 ----
    2,    /* muldi */
    20,   /* divsi */
    20,   /* divdi */
+   3,    /* fp */
+   3,    /* dmul */
+   18,   /* sdiv */
+   32,   /* ddiv */
  };

! /* Instruction costs on PPC620 processors.  */
  static const
  struct processor_costs ppc620_cost = {
    5,    /* mulsi */
*************** struct processor_costs ppc620_cost = {
*** 450,455 ****
--- 506,530 ----
    7,    /* muldi */
    21,   /* divsi */
    37,   /* divdi */
+   3,    /* fp */
+   3,    /* dmul */
+   18,   /* sdiv */
+   32,   /* ddiv */
+ };
+
+ /* Instruction costs on PPC630 processors.  */
+ static const
+ struct processor_costs ppc630_cost = {
+   5,    /* mulsi */
+   4,    /* mulsi_const */
+   3,    /* mulsi_const9 */
+   7,    /* muldi */
+   21,   /* divsi */
+   37,   /* divdi */
+   3,    /* fp */
+   3,    /* dmul */
+   17,   /* sdiv */
+   21,   /* ddiv */
  };

  /* Instruction costs on PPC750 and PPC7400 processors.  */
*************** struct processor_costs ppc750_cost = {
*** 461,466 ****
--- 536,545 ----
    5,    /* muldi */
    17,   /* divsi */
    17,   /* divdi */
+   3,    /* fp */
+   3,    /* dmul */
+   17,   /* sdiv */
+   31,   /* ddiv */
  };

  /* Instruction costs on PPC7450 processors.  */
*************** struct processor_costs ppc7450_cost = {
*** 472,477 ****
--- 551,560 ----
    4,    /* muldi */
    23,   /* divsi */
    23,   /* divdi */
+   5,    /* fp */
+   5,    /* dmul */
+   21,   /* sdiv */
+   35,   /* ddiv */
  };

  /* Instruction costs on PPC8540 processors.  */
*************** struct processor_costs ppc8540_cost = {
*** 483,488 ****
--- 566,575 ----
    4,    /* muldi */
    19,   /* divsi */
    19,   /* divdi */
+   4,    /* fp */
+   4,    /* dmul */
+   29,   /* sdiv */
+   29,   /* ddiv */
  };

  /* Instruction costs on POWER4 and POWER5 processors.  */
*************** struct processor_costs power4_cost = {
*** 494,499 ****
--- 581,590 ----
    4,    /* muldi */
    18,   /* divsi */
    34,   /* divdi */
+   3,    /* fp */
+   3,    /* dmul */
+   17,   /* sdiv */
+   17,   /* ddiv */
  };


*************** rs6000_override_options (const char *def
*** 1370,1379 ****
  	break;

        case PROCESSOR_PPC620:
-       case PROCESSOR_PPC630:
  	rs6000_cost = &ppc620_cost;
  	break;

        case PROCESSOR_PPC750:
        case PROCESSOR_PPC7400:
  	rs6000_cost = &ppc750_cost;
--- 1461,1473 ----
  	break;

        case PROCESSOR_PPC620:
  	rs6000_cost = &ppc620_cost;
  	break;

+       case PROCESSOR_PPC630:
+ 	rs6000_cost = &ppc630_cost;
+ 	break;
+
        case PROCESSOR_PPC750:
        case PROCESSOR_PPC7400:
  	rs6000_cost = &ppc750_cost;
*************** static bool
*** 16419,16424 ****
--- 16513,16520 ----
  rs6000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
  		  int *total)
  {
+   enum machine_mode mode = GET_MODE (x);
+
    switch (code)
      {
        /* On the RS/6000, if it is valid in the insn, it is free.
*************** rs6000_rtx_costs (rtx x, int code, int o
*** 16433,16448 ****
        return true;

      case PLUS:
!       *total = ((GET_CODE (XEXP (x, 1)) == CONST_INT
! 		 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1))
! 					       + 0x8000) >= 0x10000)
! 		 && ((INTVAL (XEXP (x, 1)) & 0xffff) != 0))
! 		? COSTS_N_INSNS (2)
! 		: COSTS_N_INSNS (1));
        return true;

      case MINUS:
!       *total = COSTS_N_INSNS (1);
        return true;

      case AND:
--- 16529,16558 ----
        return true;

      case PLUS:
!       if (mode == DFmode)
! 	*total = GET_CODE (XEXP (x, 0)) == MULT
! 		 ? COSTS_N_INSNS (rs6000_cost->dmul)
! 		 : COSTS_N_INSNS (rs6000_cost->fp);
!       else if (mode == SFmode)
! 	*total = COSTS_N_INSNS (rs6000_cost->fp);
!       else
! 	*total = ((GET_CODE (XEXP (x, 1)) == CONST_INT
! 		  && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1))
! 						+ 0x8000) >= 0x10000)
! 		  && ((INTVAL (XEXP (x, 1)) & 0xffff) != 0))
! 		 ? COSTS_N_INSNS (2)
! 		 : COSTS_N_INSNS (1));
        return true;

      case MINUS:
!       if (mode == DFmode)
! 	*total = GET_CODE (XEXP (x, 0)) == MULT
! 		 ? COSTS_N_INSNS (rs6000_cost->dmul)
! 		 : COSTS_N_INSNS (rs6000_cost->fp);
!       else if (mode == SFmode)
! 	*total = COSTS_N_INSNS (rs6000_cost->fp);
!       else
!         *total = COSTS_N_INSNS (1);
        return true;

      case AND:
*************** rs6000_rtx_costs (rtx x, int code, int o
*** 16464,16470 ****
  	  else
  	    *total = COSTS_N_INSNS (rs6000_cost->mulsi_const);
  	}
!       else if (GET_MODE (XEXP (x, 1)) == DImode)
  	*total = COSTS_N_INSNS (rs6000_cost->muldi);
        else
  	*total = COSTS_N_INSNS (rs6000_cost->mulsi);
--- 16574,16584 ----
  	  else
  	    *total = COSTS_N_INSNS (rs6000_cost->mulsi_const);
  	}
!       else if (mode == DFmode)
! 	*total = COSTS_N_INSNS (rs6000_cost->dmul);
!       else if (mode == SFmode)
! 	*total = COSTS_N_INSNS (rs6000_cost->fp);
!       else if (mode == DImode)
  	*total = COSTS_N_INSNS (rs6000_cost->muldi);
        else
  	*total = COSTS_N_INSNS (rs6000_cost->mulsi);
*************** rs6000_rtx_costs (rtx x, int code, int o
*** 16472,16477 ****
--- 16586,16597 ----

      case DIV:
      case MOD:
+       if (FLOAT_MODE_P (mode))
+ 	{
+ 	  *total = mode == DFmode ? COSTS_N_INSNS (rs6000_cost->ddiv)
+ 				  : COSTS_N_INSNS (rs6000_cost->sdiv);
+ 	  return true;
+ 	}
        if (GET_CODE (XEXP (x, 1)) == CONST_INT
  	  && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
  	{
*************** rs6000_rtx_costs (rtx x, int code, int o
*** 16492,16500 ****
        *total = COSTS_N_INSNS (4);
        return true;

      case MEM:
        /* MEM should be slightly more expensive than (plus (reg) (const)).  */
!       *total = 5;
        return true;

      default:
--- 16612,16628 ----
        *total = COSTS_N_INSNS (4);
        return true;

+     case NEG:
+     case ABS:
+       if (FLOAT_MODE_P (mode))
+ 	*total = COSTS_N_INSNS (rs6000_cost->fp);
+       else
+ 	*total = COSTS_N_INSNS (1);
+       return true;
+
      case MEM:
        /* MEM should be slightly more expensive than (plus (reg) (const)).  */
!       *total = COSTS_N_INSNS (1) + 1;
        return true;

      default:


Roger
--


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]