This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH, rs6000: add PPCA2 support
- From: Peter Bergner <bergner at vnet dot ibm dot com>
- To: Ben Elliston <bje at au1 dot ibm dot com>
- Cc: gcc-patches at gcc dot gnu dot org, dje dot gcc at gmail dot com
- Date: Wed, 30 Sep 2009 12:56:55 -0500
- Subject: Re: PATCH, rs6000: add PPCA2 support
- References: <1253850801.25070.11.camel@helios>
On Fri, 2009-09-25 at 13:53 +1000, Ben Elliston wrote:
> The following patch adds support for the PowerPC A2 processor. It
> contains only rs6000-specific changes and does not touch any target
> independent code. One change worth noting is:
>
> * config/rs6000/rs6000.opt (mno-update): New.
> (mupdate): Return to using a mask, not a var.
>
> This was changed by Mike Meissner, Pat Haugen and Revital on June 23rd
> from a mask to a var to recover target mask bits. I have reverted this
> to a mask because (a) there are now sufficient mask bits available and
> (b) because I want this mask to be explicitly cleared in the "ppca2"
> entry of the processor_target_table.
>
> Tested with a `make check' on powerpc-linux and powerpc64-linux and a
> bootstrap on x86-64-linux. The documentation changes were tested with a
> `make info'. OK for the trunk?
Offline, David asked why we didn't follow the convention of using "a2" as
the option name (similar to "440", etc.) rather than "ppca2". Ben is on
vacation, so I refreshed the patch with that request. Is this ok for
trunk, assuming my powerpc64-linux bootstrap and regtesting don't show
any problems?
Peter
2009-09-30 Ben Elliston <bje@au.ibm.com>
* config.gcc (powerpc*-*-*): Handle a2.
* config/rs6000/rs6000.md (cpu): Add ppca2. Include "a2.md".
* config/rs6000/a2.md: New file.
* config/rs6000/rs6000.opt (mno-update): New.
(mupdate): Return to using a mask, not a var.
* config/rs6000/rs6000.h (ASM_CPU_SPEC): Add support for a2.
(enum processor_type): Add PROCESSOR_PPCA2.
* config/rs6000/rs6000.c (ppca2_cost): New costs.
(rs6000_override_options): Add "a2" to processor_target_table.
Update rs6000_always_hint logic. Correctly set rs6000_cost for
a2.
* doc/invoke.texi (RS/6000 and PowerPC Options): Document
-mcpu=a2.
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi (revision 152331)
+++ doc/invoke.texi (working copy)
@@ -14386,7 +14386,7 @@ Supported values for @var{cpu_type} are
@samp{e500mc}, @samp{ec603e}, @samp{G3}, @samp{G4}, @samp{G5},
@samp{power}, @samp{power2}, @samp{power3}, @samp{power4},
@samp{power5}, @samp{power5+}, @samp{power6}, @samp{power6x}, @samp{power7},
-@samp{common}, @samp{powerpc}, @samp{powerpc64}, @samp{rios},
+@samp{common}, @samp{powerpc}, @samp{powerpc64}, @samp{a2}, @samp{rios},
@samp{rios1}, @samp{rios2}, @samp{rsc}, and @samp{rs64}.
@option{-mcpu=common} selects a completely generic processor. Code
Index: config.gcc
===================================================================
--- config.gcc (revision 152331)
+++ config.gcc (working copy)
@@ -327,7 +327,7 @@ powerpc*-*-*)
extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h"
need_64bit_hwint=yes
case x$with_cpu in
- xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[34567]|xpower6x|xrs64a|xcell)
+ xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[34567]|xpower6x|xrs64a|xcell|xa2)
cpu_is_64bit=yes
;;
esac
@@ -3065,7 +3065,7 @@ case "${target}" in
eval "with_$which=405"
;;
"" | common \
- | power | power[234567] | power6x | powerpc | powerpc64 \
+ | power | power[234567] | power6x | powerpc | powerpc64 | a2 \
| rios | rios1 | rios2 | rsc | rsc1 | rs64a \
| 401 | 403 | 405 | 405fp | 440 | 440fp | 464 | 464fp \
| 505 | 601 | 602 | 603 | 603e | ec603e | 604 \
Index: config/rs6000/rs6000.opt
===================================================================
--- config/rs6000/rs6000.opt (revision 152331)
+++ config/rs6000/rs6000.opt (working copy)
@@ -155,8 +155,12 @@ mvectorize-builtins
Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1)
; Explicitly control whether we vectorize the builtins or not.
+mno-update
+Target Report RejectNegative Mask(NO_UPDATE)
+Do not generate load/store with update instructions
+
mupdate
-Target Report Var(TARGET_UPDATE) Init(1)
+Target Report RejectNegative InverseMask(NO_UPDATE, UPDATE)
Generate load/store with update instructions
mavoid-indexed-addresses
Index: config/rs6000/rs6000.c
===================================================================
--- config/rs6000/rs6000.c (revision 152331)
+++ config/rs6000/rs6000.c (working copy)
@@ -797,6 +797,25 @@ struct processor_costs power7_cost = {
12, /* prefetch streams */
};
+/* Instruction costs on POWER A2 processors. */
+static const
+struct processor_costs ppca2_cost = {
+ COSTS_N_INSNS (16), /* mulsi */
+ COSTS_N_INSNS (16), /* mulsi_const */
+ COSTS_N_INSNS (16), /* mulsi_const9 */
+ COSTS_N_INSNS (16), /* muldi */
+ COSTS_N_INSNS (22), /* divsi */
+ COSTS_N_INSNS (28), /* divdi */
+ COSTS_N_INSNS (3), /* fp */
+ COSTS_N_INSNS (3), /* dmul */
+ COSTS_N_INSNS (59), /* sdiv */
+ COSTS_N_INSNS (72), /* ddiv */
+ 64,
+ 16, /* l1 cache */
+ 2048, /* l2 cache */
+ 16, /* prefetch streams */
+};
+
static bool rs6000_function_ok_for_sibcall (tree, tree);
static const char *rs6000_invalid_within_doloop (const_rtx);
@@ -2189,6 +2208,9 @@ rs6000_override_options (const char *def
POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
| MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
| MASK_VSX}, /* Don't add MASK_ISEL by default */
+ {"a2", PROCESSOR_PPCA2,
+ POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_POPCNTB
+ | MASK_CMPB | MASK_NO_UPDATE },
{"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK},
{"powerpc64", PROCESSOR_POWERPC64,
POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64},
@@ -2216,7 +2238,7 @@ rs6000_override_options (const char *def
| MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC
| MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW
| MASK_DLMZB | MASK_CMPB | MASK_MFPGPR | MASK_DFP
- | MASK_POPCNTD | MASK_VSX | MASK_ISEL)
+ | MASK_POPCNTD | MASK_VSX | MASK_ISEL | MASK_NO_UPDATE)
};
/* Set the pointer size. */
@@ -2495,6 +2517,7 @@ rs6000_override_options (const char *def
&& rs6000_cpu != PROCESSOR_POWER5
&& rs6000_cpu != PROCESSOR_POWER6
&& rs6000_cpu != PROCESSOR_POWER7
+ && rs6000_cpu != PROCESSOR_PPCA2
&& rs6000_cpu != PROCESSOR_CELL);
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
@@ -2713,6 +2736,10 @@ rs6000_override_options (const char *def
rs6000_cost = &power7_cost;
break;
+ case PROCESSOR_PPCA2:
+ rs6000_cost = &ppca2_cost;
+ break;
+
default:
gcc_unreachable ();
}
Index: config/rs6000/rs6000.h
===================================================================
--- config/rs6000/rs6000.h (revision 152331)
+++ config/rs6000/rs6000.h (working copy)
@@ -107,6 +107,7 @@
%{mcpu=power6: %(asm_cpu_power6) -maltivec} \
%{mcpu=power6x: %(asm_cpu_power6) -maltivec} \
%{mcpu=power7: %(asm_cpu_power7)} \
+%{mcpu=a2: -ma2} \
%{mcpu=powerpc: -mppc} \
%{mcpu=rios: -mpwr} \
%{mcpu=rios1: -mpwr} \
@@ -334,7 +335,8 @@ enum processor_type
PROCESSOR_POWER5,
PROCESSOR_POWER6,
PROCESSOR_POWER7,
- PROCESSOR_CELL
+ PROCESSOR_CELL,
+ PROCESSOR_PPCA2
};
/* FPU operations supported.
Index: config/rs6000/rs6000.md
===================================================================
--- config/rs6000/rs6000.md (revision 152331)
+++ config/rs6000/rs6000.md (working copy)
@@ -139,7 +139,7 @@ (define_attr "length" ""
;; Processor type -- this attribute must exactly match the processor_type
;; enumeration in rs6000.h.
-(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,power4,power5,power6,power7,cell"
+(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,power4,power5,power6,power7,cell,ppca2"
(const (symbol_ref "rs6000_cpu_attr")))
@@ -171,6 +171,7 @@ (define_attr "cell_micro" "not,condition
(include "power7.md")
(include "cell.md")
(include "xfpu.md")
+(include "a2.md")
(include "predicates.md")
(include "constraints.md")
Index: config/rs6000/a2.md
===================================================================
--- config/rs6000/a2.md (revision 0)
+++ config/rs6000/a2.md (revision 0)
@@ -0,0 +1,119 @@
+;; Scheduling description for PowerPC A2 processors.
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; Contributed by Ben Elliston (bje@au.ibm.com).
+
+(define_automaton "ppca2")
+
+;; CPU units
+
+;; The multiplier pipeline.
+(define_cpu_unit "mult" "ppca2")
+
+;; The auxillary processor unit (FP/vector unit).
+(define_cpu_unit "axu" "ppca2")
+
+;; D.4.6
+;; Some peculiarities for certain SPRs
+
+(define_insn_reservation "ppca2-mfcr" 1
+ (and (eq_attr "type" "mfcr")
+ (eq_attr "cpu" "ppca2"))
+ "nothing")
+
+(define_insn_reservation "ppca2-mfjmpr" 5
+ (and (eq_attr "type" "mfjmpr")
+ (eq_attr "cpu" "ppca2"))
+ "nothing")
+
+(define_insn_reservation "ppca2-mtjmpr" 5
+ (and (eq_attr "type" "mtjmpr")
+ (eq_attr "cpu" "ppca2"))
+ "nothing")
+
+;; D.4.8
+(define_insn_reservation "ppca2-imul" 1
+ (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+ (eq_attr "cpu" "ppca2"))
+ "nothing")
+
+;; FIXME: latency and multiplier reservation for 64-bit multiply?
+(define_insn_reservation "ppca2-lmul" 6
+ (and (eq_attr "type" "lmul,lmul_compare")
+ (eq_attr "cpu" "ppca2"))
+ "mult*3")
+
+;; D.4.9
+(define_insn_reservation "ppca2-idiv" 32
+ (and (eq_attr "type" "idiv")
+ (eq_attr "cpu" "ppca2"))
+ "mult*32")
+
+(define_insn_reservation "ppca2-ldiv" 65
+ (and (eq_attr "type" "ldiv")
+ (eq_attr "cpu" "ppca2"))
+ "mult*65")
+
+;; D.4.13
+(define_insn_reservation "ppca2-load" 5
+ (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+ (eq_attr "cpu" "ppca2"))
+ "nothing")
+
+;; D.8.1
+(define_insn_reservation "ppca2-fp" 6
+ (and (eq_attr "type" "fp") ;; Ignore fpsimple insn types (SPE only).
+ (eq_attr "cpu" "ppca2"))
+ "axu")
+
+;; D.8.4
+(define_insn_reservation "ppca2-fp-load" 6
+ (and (eq_attr "type" "fpload,fpload_u,fpload_ux")
+ (eq_attr "cpu" "ppca2"))
+ "axu")
+
+;; D.8.5
+(define_insn_reservation "ppca2-fp-store" 2
+ (and (eq_attr "type" "fpstore,fpstore_u,fpstore_ux")
+ (eq_attr "cpu" "ppca2"))
+ "axu")
+
+;; D.8.6
+(define_insn_reservation "ppca2-fpcompare" 5
+ (and (eq_attr "type" "fpcompare")
+ (eq_attr "cpu" "ppca2"))
+ "axu")
+
+;; D.8.7
+;;
+;; Instructions from the same thread succeeding the floating-point
+;; divide cannot be executed until the floating-point divide has
+;; completed. Since there is nothing else we can do, this thread will
+;; just have to stall.
+
+(define_insn_reservation "ppca2-ddiv" 72
+ (and (eq_attr "type" "ddiv")
+ (eq_attr "cpu" "ppca2"))
+ "axu")
+
+(define_insn_reservation "ppca2-sdiv" 59
+ (and (eq_attr "type" "sdiv")
+ (eq_attr "cpu" "ppca2"))
+ "axu")
+
+;; D.8.8
+;;
+;; Instructions from the same thread succeeding the floating-point
+;; divide cannot be executed until the floating-point divide has
+;; completed. Since there is nothing else we can do, this thread will
+;; just have to stall.
+
+(define_insn_reservation "ppca2-dsqrt" 69
+ (and (eq_attr "type" "dsqrt")
+ (eq_attr "cpu" "ppca2"))
+ "axu")
+
+(define_insn_reservation "ppca2-ssqrt" 65
+ (and (eq_attr "type" "ssqrt")
+ (eq_attr "cpu" "ppca2"))
+ "axu")