From ebde32fd2478df9909e07843451956ce89a02b46 Mon Sep 17 00:00:00 2001 From: Ben Elliston Date: Tue, 6 Oct 2009 19:37:40 +0000 Subject: [PATCH] config.gcc (powerpc*-*-*): Handle a2. * config.gcc (powerpc*-*-*): Handle a2. * config/rs6000/rs6000.md (cpu): Add ppca2. Include "a2.md". * config/rs6000/a2.md: New file. * config/rs6000/rs6000.opt (mno-update): New. (mupdate): Return to using a mask, not a var. * config/rs6000/rs6000.h (ASM_CPU_SPEC): Add support for a2. (enum processor_type): Add PROCESSOR_PPCA2. * config/rs6000/rs6000.c (ppca2_cost): New costs. (rs6000_override_options): Add "a2" to processor_target_table. Update rs6000_always_hint logic. Correctly set rs6000_cost for a2. * doc/invoke.texi (RS/6000 and PowerPC Options): Document -mcpu=a2. From-SVN: r152499 --- gcc/ChangeLog | 16 +++ gcc/config.gcc | 4 +- gcc/config/rs6000/a2.md | 238 +++++++++++++++++++++++++++++++++++ gcc/config/rs6000/rs6000.c | 29 ++++- gcc/config/rs6000/rs6000.h | 4 +- gcc/config/rs6000/rs6000.md | 3 +- gcc/config/rs6000/rs6000.opt | 6 +- gcc/doc/invoke.texi | 4 +- 8 files changed, 296 insertions(+), 8 deletions(-) create mode 100644 gcc/config/rs6000/a2.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 684352fbb60c..b7c4f5412371 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2009-10-07 Ben Elliston + + * config.gcc (powerpc*-*-*): Handle a2. + * config/rs6000/rs6000.md (cpu): Add ppca2. Include "a2.md". + * config/rs6000/a2.md: New file. + * config/rs6000/rs6000.opt (mno-update): New. + (mupdate): Return to using a mask, not a var. + * config/rs6000/rs6000.h (ASM_CPU_SPEC): Add support for a2. + (enum processor_type): Add PROCESSOR_PPCA2. + * config/rs6000/rs6000.c (ppca2_cost): New costs. + (rs6000_override_options): Add "a2" to processor_target_table. + Update rs6000_always_hint logic. Correctly set rs6000_cost for + a2. + * doc/invoke.texi (RS/6000 and PowerPC Options): Document + -mcpu=a2. + 2009-10-06 Uros Bizjak * config/i386/i386.md (float2): diff --git a/gcc/config.gcc b/gcc/config.gcc index 1b0194b0589a..9e3ada7fb649 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -327,7 +327,7 @@ powerpc*-*-*) extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h" need_64bit_hwint=yes case x$with_cpu in - xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[34567]|xpower6x|xrs64a|xcell) + xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[34567]|xpower6x|xrs64a|xcell|xa2) cpu_is_64bit=yes ;; esac @@ -3070,7 +3070,7 @@ case "${target}" in | 401 | 403 | 405 | 405fp | 440 | 440fp | 464 | 464fp \ | 505 | 601 | 602 | 603 | 603e | ec603e | 604 \ | 604e | 620 | 630 | 740 | 750 | 7400 | 7450 \ - | e300c[23] | 854[08] | e500mc \ + | a2 | e300c[23] | 854[08] | e500mc \ | 801 | 821 | 823 | 860 | 970 | G3 | G4 | G5 | cell) # OK ;; diff --git a/gcc/config/rs6000/a2.md b/gcc/config/rs6000/a2.md new file mode 100644 index 000000000000..570753663b26 --- /dev/null +++ b/gcc/config/rs6000/a2.md @@ -0,0 +1,238 @@ +;; Scheduling description for PowerPC A2 processors. +;; Copyright (C) 2009 Free Software Foundation, Inc. +;; +;; Contributed by Ben Elliston (bje@au.ibm.com). + +(define_automaton "ppca2") + +;; CPU units + +;; The multiplier pipeline. +(define_cpu_unit "mult" "ppca2") + +;; The auxillary processor unit (FP/vector unit). +(define_cpu_unit "axu" "ppca2") + +;; D.4.6 +;; Some peculiarities for certain SPRs + +(define_insn_reservation "ppca2-mfcr" 1 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppca2")) + "nothing") + +(define_insn_reservation "ppca2-mfjmpr" 5 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppca2")) + "nothing") + +(define_insn_reservation "ppca2-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; D.4.8 +(define_insn_reservation "ppca2-imul" 1 + (and (eq_attr "type" "imul,imul2,imul3,imul_compare") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; FIXME: latency and multiplier reservation for 64-bit multiply? +(define_insn_reservation "ppca2-lmul" 6 + (and (eq_attr "type" "lmul,lmul_compare") + (eq_attr "cpu" "ppca2")) + "mult*3") + +;; D.4.9 +(define_insn_reservation "ppca2-idiv" 32 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppca2")) + "mult*32") + +(define_insn_reservation "ppca2-ldiv" 65 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "ppca2")) + "mult*65") + +;; D.4.13 +(define_insn_reservation "ppca2-load" 5 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; D.8.1 +(define_insn_reservation "ppca2-fp" 6 + (and (eq_attr "type" "fp") ;; Ignore fpsimple insn types (SPE only). + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.4 +(define_insn_reservation "ppca2-fp-load" 6 + (and (eq_attr "type" "fpload,fpload_u,fpload_ux") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.5 +(define_insn_reservation "ppca2-fp-store" 2 + (and (eq_attr "type" "fpstore,fpstore_u,fpstore_ux") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.6 +(define_insn_reservation "ppca2-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.7 +;; +;; Instructions from the same thread succeeding the floating-point +;; divide cannot be executed until the floating-point divide has +;; completed. Since there is nothing else we can do, this thread will +;; just have to stall. + +(define_insn_reservation "ppca2-ddiv" 72 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppca2")) + "axu") + +(define_insn_reservation "ppca2-sdiv" 59 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.8 +;; +;; Instructions from the same thread succeeding the floating-point +;; divide cannot be executed until the floating-point divide has +;; completed. Since there is nothing else we can do, this thread will +;; just have to stall. + +(define_insn_reservation "ppca2-dsqrt" 69 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "ppca2")) + "axu") + +(define_insn_reservation "ppca2-ssqrt" 65 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "ppca2")) + "axu") +;; Scheduling description for PowerPC A2 processors. +;; Copyright (C) 2008 Free Software Foundation, Inc. +;; +;; Contributed by Ben Elliston (bje@au.ibm.com). + +(define_automaton "a2") + +;; CPU units + +;; The multiplier pipeline. +(define_cpu_unit "mult" "a2") + +;; The auxillary processor unit (FP/vector unit). +(define_cpu_unit "axu" "a2") + +;; D.4.6 +;; Some peculiarities for certain SPRs + +(define_insn_reservation "ppca2-mfcr" 1 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppca2")) + "nothing") + +(define_insn_reservation "ppca2-mfjmpr" 5 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppca2")) + "nothing") + +(define_insn_reservation "ppca2-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; D.4.8 +(define_insn_reservation "ppca2-imul" 1 + (and (eq_attr "type" "imul,imul2,imul3,imul_compare") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; FIXME: latency and multiplier reservation for 64-bit multiply? +(define_insn_reservation "ppca2-lmul" 6 + (and (eq_attr "type" "lmul,lmul_compare") + (eq_attr "cpu" "ppca2")) + "mult*3") + +;; D.4.9 +(define_insn_reservation "ppca2-idiv" 32 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppca2")) + "mult*32") + +(define_insn_reservation "ppca2-ldiv" 65 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "ppca2")) + "mult*65") + +;; D.4.13 +(define_insn_reservation "pcca2-load" 5 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; D.8.1 +(define_insn_reservation "ppca2-fp" 6 + (and (eq_attr "type" "fp") ;; Ignore fpsimple insn types (SPE only). + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.4 +(define_insn_reservation "ppca2-fp-load" 6 + (and (eq_attr "type" "fpload,fpload_u,fpload_ux") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.5 +(define_insn_reservation "ppca2-fp-store" 2 + (and (eq_attr "type" "fpstore,fpstore_u,fpstore_ux") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.6 +(define_insn_reservation "ppca2-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.7 +;; +;; Instructions from the same thread succeeding the floating-point +;; divide cannot be executed until the floating-point divide has +;; completed. Since there is nothing else we can do, this thread will +;; just have to stall. + +(define_insn_reservation "ppca2-ddiv" 72 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppca2")) + "axu") + +(define_insn_reservation "ppca2-sdiv" 59 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.8 +;; +;; Instructions from the same thread succeeding the floating-point +;; divide cannot be executed until the floating-point divide has +;; completed. Since there is nothing else we can do, this thread will +;; just have to stall. + +(define_insn_reservation "ppca2-dsqrt" 69 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "ppca2")) + "axu") + +(define_insn_reservation "ppca2-ssqrt" 65 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "ppca2")) + "axu") diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index f44bc992a481..1215a9f2da81 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -797,6 +797,25 @@ struct processor_costs power7_cost = { 12, /* prefetch streams */ }; +/* Instruction costs on POWER A2 processors. */ +static const +struct processor_costs ppca2_cost = { + COSTS_N_INSNS (16), /* mulsi */ + COSTS_N_INSNS (16), /* mulsi_const */ + COSTS_N_INSNS (16), /* mulsi_const9 */ + COSTS_N_INSNS (16), /* muldi */ + COSTS_N_INSNS (22), /* divsi */ + COSTS_N_INSNS (28), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (59), /* sdiv */ + COSTS_N_INSNS (72), /* ddiv */ + 64, + 16, /* l1 cache */ + 2048, /* l2 cache */ + 16, /* prefetch streams */ +}; + static bool rs6000_function_ok_for_sibcall (tree, tree); static const char *rs6000_invalid_within_doloop (const_rtx); @@ -2149,6 +2168,9 @@ rs6000_override_options (const char *default_cpu) /* 8548 has a dummy entry for now. */ {"8548", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN | MASK_ISEL}, + {"a2", PROCESSOR_PPCA2, + POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_POPCNTB + | MASK_CMPB | MASK_NO_UPDATE }, {"e300c2", PROCESSOR_PPCE300C2, POWERPC_BASE_MASK | MASK_SOFT_FLOAT}, {"e300c3", PROCESSOR_PPCE300C3, POWERPC_BASE_MASK}, {"e500mc", PROCESSOR_PPCE500MC, POWERPC_BASE_MASK | MASK_PPC_GFXOPT @@ -2216,7 +2238,7 @@ rs6000_override_options (const char *default_cpu) | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW | MASK_DLMZB | MASK_CMPB | MASK_MFPGPR | MASK_DFP - | MASK_POPCNTD | MASK_VSX | MASK_ISEL) + | MASK_POPCNTD | MASK_VSX | MASK_ISEL | MASK_NO_UPDATE) }; /* Set the pointer size. */ @@ -2495,6 +2517,7 @@ rs6000_override_options (const char *default_cpu) && rs6000_cpu != PROCESSOR_POWER5 && rs6000_cpu != PROCESSOR_POWER6 && rs6000_cpu != PROCESSOR_POWER7 + && rs6000_cpu != PROCESSOR_PPCA2 && rs6000_cpu != PROCESSOR_CELL); rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5 @@ -2713,6 +2736,10 @@ rs6000_override_options (const char *default_cpu) rs6000_cost = &power7_cost; break; + case PROCESSOR_PPCA2: + rs6000_cost = &ppca2_cost; + break; + default: gcc_unreachable (); } diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 52d9a594be22..a5ce9dd0f7f6 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -107,6 +107,7 @@ %{mcpu=power6: %(asm_cpu_power6) -maltivec} \ %{mcpu=power6x: %(asm_cpu_power6) -maltivec} \ %{mcpu=power7: %(asm_cpu_power7)} \ +%{mcpu=a2: -ma2} \ %{mcpu=powerpc: -mppc} \ %{mcpu=rios: -mpwr} \ %{mcpu=rios1: -mpwr} \ @@ -334,7 +335,8 @@ enum processor_type PROCESSOR_POWER5, PROCESSOR_POWER6, PROCESSOR_POWER7, - PROCESSOR_CELL + PROCESSOR_CELL, + PROCESSOR_PPCA2 }; /* FPU operations supported. diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index ba51f1cebc74..7b3de2ad230a 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -139,7 +139,7 @@ ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in rs6000.h. -(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,power4,power5,power6,power7,cell" +(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,power4,power5,power6,power7,cell,ppca2" (const (symbol_ref "rs6000_cpu_attr"))) @@ -171,6 +171,7 @@ (include "power7.md") (include "cell.md") (include "xfpu.md") +(include "a2.md") (include "predicates.md") (include "constraints.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 90af9dce47b2..63f0f8c15824 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -155,8 +155,12 @@ mvectorize-builtins Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1) ; Explicitly control whether we vectorize the builtins or not. +mno-update +Target Report RejectNegative Mask(NO_UPDATE) +Do not generate load/store with update instructions + mupdate -Target Report Var(TARGET_UPDATE) Init(1) +Target Report RejectNegative InverseMask(NO_UPDATE, UPDATE) Generate load/store with update instructions mavoid-indexed-addresses diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 75622b3d19b6..a805f2b59cd1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -14607,8 +14607,8 @@ Supported values for @var{cpu_type} are @samp{401}, @samp{403}, @samp{505}, @samp{601}, @samp{602}, @samp{603}, @samp{603e}, @samp{604}, @samp{604e}, @samp{620}, @samp{630}, @samp{740}, @samp{7400}, @samp{7450}, @samp{750}, @samp{801}, @samp{821}, @samp{823}, -@samp{860}, @samp{970}, @samp{8540}, @samp{e300c2}, @samp{e300c3}, -@samp{e500mc}, @samp{ec603e}, @samp{G3}, @samp{G4}, @samp{G5}, +@samp{860}, @samp{970}, @samp{8540}, @samp{a2}, @samp{e300c2}, +@samp{e300c3}, @samp{e500mc}, @samp{ec603e}, @samp{G3}, @samp{G4}, @samp{G5}, @samp{power}, @samp{power2}, @samp{power3}, @samp{power4}, @samp{power5}, @samp{power5+}, @samp{power6}, @samp{power6x}, @samp{power7}, @samp{common}, @samp{powerpc}, @samp{powerpc64}, @samp{rios}, -- 2.43.5