This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
On Tue, 6 Jul 2004, David Edelsohn wrote: > On POWER4/POWER5, the latency of fast, simple instructions is 2 > cycles. The cost of those instructions is COSTS_N_INSNS(1). Your earlier > patch did not change that and there is no intention of enumerating the > cost of each trivial operation as COSTS_N_INSN(2) for those processors. > You need to divide all of your FP costs for POWER4/POWER5 by 2. Ahhh! Thanks I didn't appreciate that simple integer operations took two cycles on POWER4/POWER5. <humour> There may be some interesting optimizations using the "pc" to perform arithmetic operations, as increments of the PC complete in one-cycle which is faster than the equivalent POWER4 instruction on a GPR :> </humour> The following patch incorporates both of your suggestions; the scaling of power4_cost's new FP costs by 0.5 to match the cost relative to an integer addition, and fixes the ppc640_cost typo in the ChangeLog entry. The following patch has been tested on powerpc-apple-darwin7.4.0 with a full "make bootstrap", all default languages, and regression tested with a top-level "make -k check" with no new failures. Ok for mainline? 2004-07-06 Roger Sayle <roger@eyesopen.com> * config/rs6000/rs6000.c (struct processor_costs): Add new fields for simple floating point operations "fp", double precision multiplication "dmul", and single and double precision division "sdiv" and "ddiv". Update all CPU variant tables as appropriate. (ppc630_cost): New table split from ppc620_cost, to distinguish differences in floating point latencies. (rs6000_override_options): Use ppc630_cost for PROCESSOR_PPC630. (rs6000_rtx_costs): Add support for single and double precision floating point addition, subtraction, multiplication, division, fused-multiply-add, fused-multiply-sub, negation, absolute value and negative absolute value. Tweak MEM case to use prefered idiom. Index: config/rs6000/rs6000.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v retrieving revision 1.660 diff -c -3 -p -r1.660 rs6000.c *** config/rs6000/rs6000.c 2 Jul 2004 18:00:46 -0000 1.660 --- config/rs6000/rs6000.c 6 Jul 2004 22:13:26 -0000 *************** struct processor_costs { *** 292,297 **** --- 292,301 ---- const int muldi; /* cost of DImode multiplication. */ const int divsi; /* cost of SImode division. */ const int divdi; /* cost of DImode division. */ + const int fp; /* cost of simple SFmode and DFmode insns. */ + const int dmul; /* cost of DFmode multiplication (and fmadd). */ + const int sdiv; /* cost of SFmode division (fdivs). */ + const int ddiv; /* cost of DFmode division (fdiv). */ }; const struct processor_costs *rs6000_cost; *************** struct processor_costs size32_cost = { *** 307,312 **** --- 311,320 ---- 1, /* muldi */ 1, /* divsi */ 1, /* divdi */ + 1, /* fp */ + 1, /* dmul */ + 1, /* sdiv */ + 1, /* ddiv */ }; /* Instruction size costs on 64bit processors. */ *************** struct processor_costs size64_cost = { *** 318,323 **** --- 326,335 ---- 1, /* muldi */ 1, /* divsi */ 1, /* divdi */ + 1, /* fp */ + 1, /* dmul */ + 1, /* sdiv */ + 1, /* ddiv */ }; /* Instruction costs on RIOS1 processors. */ *************** struct processor_costs rios1_cost = { *** 329,334 **** --- 341,350 ---- 5, /* muldi */ 19, /* divsi */ 19, /* divdi */ + 2, /* fp */ + 2, /* dmul */ + 19, /* sdiv */ + 19, /* ddiv */ }; /* Instruction costs on RIOS2 processors. */ *************** struct processor_costs rios2_cost = { *** 340,345 **** --- 356,365 ---- 2, /* muldi */ 13, /* divsi */ 13, /* divdi */ + 2, /* fp */ + 2, /* dmul */ + 17, /* sdiv */ + 17, /* ddiv */ }; /* Instruction costs on RS64A processors. */ *************** struct processor_costs rs64a_cost = { *** 351,356 **** --- 371,380 ---- 34, /* muldi */ 65, /* divsi */ 67, /* divdi */ + 4, /* fp */ + 4, /* dmul */ + 31, /* sdiv */ + 31, /* ddiv */ }; /* Instruction costs on MPCCORE processors. */ *************** struct processor_costs mpccore_cost = { *** 362,367 **** --- 386,395 ---- 2, /* muldi */ 6, /* divsi */ 6, /* divdi */ + 4, /* fp */ + 5, /* dmul */ + 10, /* sdiv */ + 17, /* ddiv */ }; /* Instruction costs on PPC403 processors. */ *************** struct processor_costs ppc403_cost = { *** 373,378 **** --- 401,410 ---- 4, /* muldi */ 33, /* divsi */ 33, /* divdi */ + 11, /* fp */ + 11, /* dmul */ + 11, /* sdiv */ + 11, /* ddiv */ }; /* Instruction costs on PPC405 processors. */ *************** struct processor_costs ppc405_cost = { *** 384,389 **** --- 416,425 ---- 5, /* muldi */ 35, /* divsi */ 35, /* divdi */ + 11, /* fp */ + 11, /* dmul */ + 11, /* sdiv */ + 11, /* ddiv */ }; /* Instruction costs on PPC440 processors. */ *************** struct processor_costs ppc440_cost = { *** 395,400 **** --- 431,440 ---- 3, /* muldi */ 34, /* divsi */ 34, /* divdi */ + 5, /* fp */ + 5, /* dmul */ + 19, /* sdiv */ + 33, /* ddiv */ }; /* Instruction costs on PPC601 processors. */ *************** struct processor_costs ppc601_cost = { *** 406,411 **** --- 446,455 ---- 5, /* muldi */ 36, /* divsi */ 36, /* divdi */ + 4, /* fp */ + 5, /* dmul */ + 17, /* sdiv */ + 31, /* ddiv */ }; /* Instruction costs on PPC603 processors. */ *************** struct processor_costs ppc603_cost = { *** 417,422 **** --- 461,470 ---- 5, /* muldi */ 37, /* divsi */ 37, /* divdi */ + 3, /* fp */ + 4, /* dmul */ + 18, /* sdiv */ + 33, /* ddiv */ }; /* Instruction costs on PPC604 processors. */ *************** struct processor_costs ppc604_cost = { *** 428,433 **** --- 476,485 ---- 4, /* muldi */ 20, /* divsi */ 20, /* divdi */ + 3, /* fp */ + 3, /* dmul */ + 18, /* sdiv */ + 32, /* ddiv */ }; /* Instruction costs on PPC604e processors. */ *************** struct processor_costs ppc604e_cost = { *** 439,447 **** 2, /* muldi */ 20, /* divsi */ 20, /* divdi */ }; ! /* Instruction costs on PPC620 and PPC630 processors. */ static const struct processor_costs ppc620_cost = { 5, /* mulsi */ --- 491,503 ---- 2, /* muldi */ 20, /* divsi */ 20, /* divdi */ + 3, /* fp */ + 3, /* dmul */ + 18, /* sdiv */ + 32, /* ddiv */ }; ! /* Instruction costs on PPC620 processors. */ static const struct processor_costs ppc620_cost = { 5, /* mulsi */ *************** struct processor_costs ppc620_cost = { *** 450,455 **** --- 506,530 ---- 7, /* muldi */ 21, /* divsi */ 37, /* divdi */ + 3, /* fp */ + 3, /* dmul */ + 18, /* sdiv */ + 32, /* ddiv */ + }; + + /* Instruction costs on PPC630 processors. */ + static const + struct processor_costs ppc630_cost = { + 5, /* mulsi */ + 4, /* mulsi_const */ + 3, /* mulsi_const9 */ + 7, /* muldi */ + 21, /* divsi */ + 37, /* divdi */ + 3, /* fp */ + 3, /* dmul */ + 17, /* sdiv */ + 21, /* ddiv */ }; /* Instruction costs on PPC750 and PPC7400 processors. */ *************** struct processor_costs ppc750_cost = { *** 461,466 **** --- 536,545 ---- 5, /* muldi */ 17, /* divsi */ 17, /* divdi */ + 3, /* fp */ + 3, /* dmul */ + 17, /* sdiv */ + 31, /* ddiv */ }; /* Instruction costs on PPC7450 processors. */ *************** struct processor_costs ppc7450_cost = { *** 472,477 **** --- 551,560 ---- 4, /* muldi */ 23, /* divsi */ 23, /* divdi */ + 5, /* fp */ + 5, /* dmul */ + 21, /* sdiv */ + 35, /* ddiv */ }; /* Instruction costs on PPC8540 processors. */ *************** struct processor_costs ppc8540_cost = { *** 483,488 **** --- 566,575 ---- 4, /* muldi */ 19, /* divsi */ 19, /* divdi */ + 4, /* fp */ + 4, /* dmul */ + 29, /* sdiv */ + 29, /* ddiv */ }; /* Instruction costs on POWER4 and POWER5 processors. */ *************** struct processor_costs power4_cost = { *** 494,499 **** --- 581,590 ---- 4, /* muldi */ 18, /* divsi */ 34, /* divdi */ + 3, /* fp */ + 3, /* dmul */ + 17, /* sdiv */ + 17, /* ddiv */ }; *************** rs6000_override_options (const char *def *** 1370,1379 **** break; case PROCESSOR_PPC620: - case PROCESSOR_PPC630: rs6000_cost = &ppc620_cost; break; case PROCESSOR_PPC750: case PROCESSOR_PPC7400: rs6000_cost = &ppc750_cost; --- 1461,1473 ---- break; case PROCESSOR_PPC620: rs6000_cost = &ppc620_cost; break; + case PROCESSOR_PPC630: + rs6000_cost = &ppc630_cost; + break; + case PROCESSOR_PPC750: case PROCESSOR_PPC7400: rs6000_cost = &ppc750_cost; *************** static bool *** 16419,16424 **** --- 16513,16520 ---- rs6000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) { + enum machine_mode mode = GET_MODE (x); + switch (code) { /* On the RS/6000, if it is valid in the insn, it is free. *************** rs6000_rtx_costs (rtx x, int code, int o *** 16433,16448 **** return true; case PLUS: ! *total = ((GET_CODE (XEXP (x, 1)) == CONST_INT ! && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) ! + 0x8000) >= 0x10000) ! && ((INTVAL (XEXP (x, 1)) & 0xffff) != 0)) ! ? COSTS_N_INSNS (2) ! : COSTS_N_INSNS (1)); return true; case MINUS: ! *total = COSTS_N_INSNS (1); return true; case AND: --- 16529,16558 ---- return true; case PLUS: ! if (mode == DFmode) ! *total = GET_CODE (XEXP (x, 0)) == MULT ! ? COSTS_N_INSNS (rs6000_cost->dmul) ! : COSTS_N_INSNS (rs6000_cost->fp); ! else if (mode == SFmode) ! *total = COSTS_N_INSNS (rs6000_cost->fp); ! else ! *total = ((GET_CODE (XEXP (x, 1)) == CONST_INT ! && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) ! + 0x8000) >= 0x10000) ! && ((INTVAL (XEXP (x, 1)) & 0xffff) != 0)) ! ? COSTS_N_INSNS (2) ! : COSTS_N_INSNS (1)); return true; case MINUS: ! if (mode == DFmode) ! *total = GET_CODE (XEXP (x, 0)) == MULT ! ? COSTS_N_INSNS (rs6000_cost->dmul) ! : COSTS_N_INSNS (rs6000_cost->fp); ! else if (mode == SFmode) ! *total = COSTS_N_INSNS (rs6000_cost->fp); ! else ! *total = COSTS_N_INSNS (1); return true; case AND: *************** rs6000_rtx_costs (rtx x, int code, int o *** 16464,16470 **** else *total = COSTS_N_INSNS (rs6000_cost->mulsi_const); } ! else if (GET_MODE (XEXP (x, 1)) == DImode) *total = COSTS_N_INSNS (rs6000_cost->muldi); else *total = COSTS_N_INSNS (rs6000_cost->mulsi); --- 16574,16584 ---- else *total = COSTS_N_INSNS (rs6000_cost->mulsi_const); } ! else if (mode == DFmode) ! *total = COSTS_N_INSNS (rs6000_cost->dmul); ! else if (mode == SFmode) ! *total = COSTS_N_INSNS (rs6000_cost->fp); ! else if (mode == DImode) *total = COSTS_N_INSNS (rs6000_cost->muldi); else *total = COSTS_N_INSNS (rs6000_cost->mulsi); *************** rs6000_rtx_costs (rtx x, int code, int o *** 16472,16477 **** --- 16586,16597 ---- case DIV: case MOD: + if (FLOAT_MODE_P (mode)) + { + *total = mode == DFmode ? COSTS_N_INSNS (rs6000_cost->ddiv) + : COSTS_N_INSNS (rs6000_cost->sdiv); + return true; + } if (GET_CODE (XEXP (x, 1)) == CONST_INT && exact_log2 (INTVAL (XEXP (x, 1))) >= 0) { *************** rs6000_rtx_costs (rtx x, int code, int o *** 16492,16500 **** *total = COSTS_N_INSNS (4); return true; case MEM: /* MEM should be slightly more expensive than (plus (reg) (const)). */ ! *total = 5; return true; default: --- 16612,16628 ---- *total = COSTS_N_INSNS (4); return true; + case NEG: + case ABS: + if (FLOAT_MODE_P (mode)) + *total = COSTS_N_INSNS (rs6000_cost->fp); + else + *total = COSTS_N_INSNS (1); + return true; + case MEM: /* MEM should be slightly more expensive than (plus (reg) (const)). */ ! *total = COSTS_N_INSNS (1) + 1; return true; default: Roger --
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |