This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Add pa6t PowerPC machine desciption


Greetings,

I'm hoping to get this in under the wire for 4.3.
This is the core patch to support P.A. Semi's PWRficient processor.

Subsequent patches will add:
* support for newer instructions, such as use of single-field m[tf]ocrf in prologue/epilogue,
since the multi-field case is ucoded on pa6t
* embedded option -mcfe to support CFE firmware
* avoidance of load-signextend-update insns which are ucoded on pa6t


Bootstrapped and checked for powerpc64-linux on power5 with no regressions. OK?

Greg

2008-01-16  Greg McGary  <greg@mcgary.org>

	* config.gcc (powerpc*-*-*): Accept --with-cpu=pa6t --with-tune=pa6t.
	* config/rs6000/rs6000.c (pa6t_cost): New costs.
	(rs6000_override_options, rs6000_adjust_cost, is_microcoded_insn,
	is_cracked_insn, rs6000_issue_rate, insn_must_be_first_in_group,
	rs6000_sched_finish): Support pa6t.
	(strip_extend, separate_dependent_loads_from_stores): New functions.
	* config/rs6000/rs6000.h (ASM_CPU_SPEC): Add mcpu=pa6t.
	(enum processor_type) [PROCESSOR_PA6T]: New constant.
	* config/rs6000/rs6000.md: Include "pa6t.md".
	* config/rs6000/pa6t.md: New file.

Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 131585)
+++ gcc/doc/invoke.texi	(working copy)
@@ -1,5 +1,5 @@
 @c Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-@c 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+@c 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
 @c Free Software Foundation, Inc.
 @c This is part of the GCC manual.
 @c For copying conditions, see the file gcc.texi.
@@ -12,7 +12,7 @@
 @c man begin COPYRIGHT
 Copyright @copyright{} 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007 Free Software Foundation, Inc.
+2007, 2008 Free Software Foundation, Inc.
 
 Permission is granted to copy, distribute and/or modify this document
 under the terms of the GNU Free Documentation License, Version 1.2 or
@@ -12798,10 +12798,11 @@
 @samp{604e}, @samp{620}, @samp{630}, @samp{740}, @samp{7400},
 @samp{7450}, @samp{750}, @samp{801}, @samp{821}, @samp{823},
 @samp{860}, @samp{970}, @samp{8540}, @samp{ec603e}, @samp{G3},
-@samp{G4}, @samp{G5}, @samp{power}, @samp{power2}, @samp{power3},
-@samp{power4}, @samp{power5}, @samp{power5+}, @samp{power6},
-@samp{power6x}, @samp{common}, @samp{powerpc}, @samp{powerpc64},
-@samp{rios}, @samp{rios1}, @samp{rios2}, @samp{rsc}, and @samp{rs64}.
+@samp{G4}, @samp{G5}, @samp{pa6t}, @samp{power}, @samp{power2},
+@samp{power3}, @samp{power4}, @samp{power5}, @samp{power5+},
+@samp{power6}, @samp{power6x}, @samp{common}, @samp{powerpc},
+@samp{powerpc64}, @samp{rios}, @samp{rios1}, @samp{rios2}, @samp{rsc},
+and @samp{rs64}.
 
 @option{-mcpu=common} selects a completely generic processor.  Code
 generated under this option will run on any POWER or PowerPC processor.
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc	(revision 131585)
+++ gcc/config.gcc	(working copy)
@@ -1,5 +1,5 @@
 # GCC target-specific configuration file.
-# Copyright 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+# Copyright 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
 # Free Software Foundation, Inc.
 
 #This file is part of GCC.
@@ -320,7 +320,7 @@
 	extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h"
 	need_64bit_hwint=yes
 	case x$with_cpu in
-	    xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456]|xpower6x|xrs64a|xcell)
+	    xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456]|xpower6x|xrs64a|xcell|xpa6t)
 		cpu_is_64bit=yes
 		;;
 	esac
@@ -3131,7 +3131,8 @@
 			| 401 | 403 | 405 | 405fp | 440 | 440fp | 505 \
 			| 601 | 602 | 603 | 603e | ec603e | 604 \
 			| 604e | 620 | 630 | 740 | 750 | 7400 | 7450 \
-			| 854[08] | 801 | 821 | 823 | 860 | 970 | G3 | G4 | G5 | cell)
+			| 854[08] | 801 | 821 | 823 | 860 | 970 | G3 | G4 | G5 \
+			| cell | pa6t)
 				# OK
 				;;
 			*)
Index: gcc/config/rs6000/pa6t.md
===================================================================
--- gcc/config/rs6000/pa6t.md	(revision 0)
+++ gcc/config/rs6000/pa6t.md	(revision 0)
@@ -0,0 +1,291 @@
+;; -*- Mode: Scheme -*-
+;; Scheduling description for P.A.Semi PA6T processors.
+;; Copyright (C) 2008
+;; Free Software Foundation, Inc.
+;; Contributed by Greg McGary <greg@mcgary.org>
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option)
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources: P.A.Semi internal documentation
+
+;; PA6T has three pipes per core:
+;; 1: Integer (shift,rotate,add,sub,mul,div) or Vector or Float
+;; 2: Integer (shift,rotate,add,sub) or Vector or Branch
+;; 3: Load/Store
+;;
+;; The dynamic scheduler has a 64 uop queue and 64 speculative regs.
+;; Instructions that update more than one register are cracked into two
+;; or more internal ops.  The chip can fetch 4 and issue up to 3
+;; internal ops per cycle.
+
+(define_automaton "pa6t_misc,pa6t_int,pa6t_fp,pa6t_vmx")
+
+(define_cpu_unit "fg0_pa6t,fg1_pa6t,fg2_pa6t,fg3_pa6t" "pa6t_misc")
+(define_cpu_unit "pick0_pa6t,pick1_pa6t,pick2_pa6t" "pa6t_misc")
+;;; (define_cpu_unit "lsu_pa6t" "pa6t_misc")
+(define_cpu_unit "iu0_pa6t,iu1_pa6t,imul_pa6t,idiv_pa6t" "pa6t_int")
+(define_cpu_unit "bpu_pa6t" "pa6t_int")
+(define_cpu_unit "fpmadd_pa6t,fpdiv_pa6t" "pa6t_fp")
+(define_cpu_unit "vmx_simple_permute_pa6t,vmx_float_complex_pa6t" "pa6t_vmx")
+;;; fgN=Fetch Group N
+
+; Fetch-group slots are allocated in program order.
+(absence_set "fg0_pa6t" "fg1_pa6t,fg2_pa6t,fg3_pa6t")
+(absence_set "fg1_pa6t" "fg2_pa6t,fg3_pa6t")
+(absence_set "fg2_pa6t" "fg3_pa6t")
+
+(define_reservation "pick_int_pa6t"
+		    "(fg0_pa6t,pick0_pa6t)\
+		    |(fg1_pa6t,pick1_pa6t)\
+		    |(fg2_pa6t,pick0_pa6t)\
+		    |(fg3_pa6t,pick1_pa6t)")
+
+(define_reservation "pick_int_iuX_pa6t"
+		    "(fg0_pa6t,pick0_pa6t,iu0_pa6t)\
+		    |(fg1_pa6t,pick1_pa6t,iu1_pa6t)\
+		    |(fg2_pa6t,pick0_pa6t,iu0_pa6t)\
+		    |(fg3_pa6t,pick1_pa6t,iu1_pa6t)")
+
+;;; ??? Does this "wrapping" of fg3+fg0 work with absence_set
+;;; that enforces monotonic sequence of fgN?
+
+(define_reservation "pick_2int_pa6t"
+		    "(fg0_pa6t+fg1_pa6t,pick0_pa6t+pick1_pa6t)\
+		    |(fg1_pa6t+fg2_pa6t,pick1_pa6t+pick0_pa6t)\
+		    |(fg2_pa6t+fg3_pa6t,pick0_pa6t+pick1_pa6t)\
+		    |(fg3_pa6t+fg0_pa6t,pick1_pa6t+pick0_pa6t)")
+
+;;; The FGs below are not strictly required, just good
+;;; for avoiding overload of a particular insn picker:
+
+(define_reservation "pick_imul_idiv_pa6t"
+		    "(fg0_pa6t,pick0_pa6t)\
+		    |(fg2_pa6t,pick0_pa6t)")
+
+(define_reservation "pick_fpu_pa6t"
+		    "(fg0_pa6t,pick0_pa6t)\
+		    |(fg2_pa6t,pick0_pa6t)")
+
+(define_reservation "pick_vmx_simple_permute_pa6t"
+		    "(fg0_pa6t,pick0_pa6t)\
+		    |(fg2_pa6t,pick0_pa6t)")
+
+(define_reservation "pick_vmx_float_complex_pa6t"
+		    "(fg1_pa6t,pick1_pa6t)\
+		    |(fg3_pa6t,pick1_pa6t)")
+
+(define_reservation "pick_branch_pa6t"
+		    "(fg1_pa6t,pick1_pa6t)\
+		    |(fg3_pa6t,pick1_pa6t)")
+
+(define_reservation "pick_ldst_pa6t"
+		    "(fg0_pa6t,pick2_pa6t)\
+		    |(fg1_pa6t,pick2_pa6t)\
+		    |(fg2_pa6t,pick2_pa6t)\
+		    |(fg3_pa6t,pick2_pa6t)")
+
+(define_reservation "pick_ldst_crack_pa6t"
+		    "(fg0_pa6t,pick2_pa6t)")
+
+;;;;;;;;;;;;;;;; Integer
+
+;;; ??? "2" is the latency in CPU cycles of an ALU op,
+;;; but we don't seem to account for the 10ish cycles
+;;; of fetch/decode/issue that precede the ALU op.
+
+(define_insn_reservation "pa6t-integer" 4
+  (and (eq_attr "type" "integer,insert_word,cmp,fast_compare")
+       (eq_attr "cpu" "pa6t"))
+  "pick_int_iuX_pa6t,nothing")
+
+;;; Why does power4 have much hairier REGEXPs for "two" & "three" ???
+
+(define_insn_reservation "pa6t-two" 3
+  (and (eq_attr "type" "two,compare,delayed_compare")
+       (eq_attr "cpu" "pa6t"))
+  "pick_2int_pa6t")
+
+;;; ??? rs6000.md has some "compare" as multi-insn sequences
+;;; while others are single-insns, but the assumption is that they
+;;; crack to two uops.  This might be true for the 970, but is often
+;;; wrong for PA6T.
+
+(define_insn_reservation "pa6t-compare" 3
+  (and (eq_attr "type" "compare,delayed_compare")
+       (eq_attr "cpu" "pa6t"))
+  "pick_2int_pa6t,nothing*2")
+
+(define_insn_reservation "pa6t-three" 4
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "pa6t"))
+  "pick_2int_pa6t,pick_int_pa6t,nothing*2")
+
+(define_insn_reservation "pa6t-imul" 8
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "pa6t"))
+  "pick_imul_idiv_pa6t,iu0_pa6t,imul_pa6t*4,nothing*2")
+
+(define_insn_reservation "pa6t-lmul" 10
+  (and (eq_attr "type" "lmul,lmul_compare")
+       (eq_attr "cpu" "pa6t"))
+  "pick_imul_idiv_pa6t,iu0_pa6t,imul_pa6t*4,nothing*4")
+
+(define_insn_reservation "pa6t-idiv" 38
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "pa6t"))
+  "pick_imul_idiv_pa6t,iu0_pa6t,idiv_pa6t*36")
+
+(define_insn_reservation "pa6t-ldiv" 70
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "pa6t"))
+  "pick_imul_idiv_pa6t,iu0_pa6t,idiv_pa6t*68")
+
+;;;;;;;;;;;;;;;; Floating Point
+
+; Basic FP latency is 6 cycles
+(define_insn_reservation "pa6t-fp" 9
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "pa6t"))
+  "pick_fpu_pa6t,fpmadd_pa6t,nothing*7")
+
+(define_insn_reservation "pa6t-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "pa6t"))
+  "pick_fpu_pa6t,fpmadd_pa6t,nothing*3")
+
+(define_insn_reservation "pa6t-sdiv" 18
+  (and (eq_attr "type" "sdiv,ssqrt")
+       (eq_attr "cpu" "pa6t"))
+  "pick_fpu_pa6t,fpdiv_pa6t*17")
+
+(define_insn_reservation "pa6t-ddiv" 33
+  (and (eq_attr "type" "ddiv,dsqrt")
+       (eq_attr "cpu" "pa6t"))
+  "pick_fpu_pa6t,fpdiv_pa6t*32")
+
+;;;;;;;;;;;;;;;; Branch
+
+(define_insn_reservation "pa6t-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "pa6t"))
+  "pick_branch_pa6t,bpu_pa6t")
+
+(define_insn_reservation "pa6t-branch" 2
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "pa6t"))
+  "pick_branch_pa6t,bpu_pa6t")
+
+;;;;;;;;;;;;;;;; Condition Register
+
+(define_insn_reservation "pa6t-cr" 4
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "pa6t"))
+  "pick_int_iuX_pa6t,nothing")
+
+;; ??? 1 cycle for each CR field
+(define_insn_reservation "pa6t-mfcr" 11
+  (and (eq_attr "type" "mfcr,mfcrf,mtcr")
+       (eq_attr "cpu" "pa6t"))
+  "fg0_pa6t,pick0_pa6t,(iu0_pa6t,iu1_pa6t)*4,nothing")
+
+;;;;;;;;;;;;;;;; Vector
+
+(define_insn_reservation "pa6t-vmx-simple" 6
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "pa6t"))
+  "pick_vmx_simple_permute_pa6t,vmx_simple_permute_pa6t,nothing*3")
+
+(define_insn_reservation "pa6t-vmx-complex" 10
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "pa6t"))
+  "pick_vmx_float_complex_pa6t,vmx_float_complex_pa6t,nothing*7")
+
+; vecfp compare
+(define_insn_reservation "pa6t-vmx-compare" 6
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "pa6t"))
+  "pick_vmx_simple_permute_pa6t,vmx_simple_permute_pa6t,nothing*3")
+
+(define_insn_reservation "pa6t-vmx-float" 10
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "pa6t"))
+  "pick_vmx_float_complex_pa6t,vmx_float_complex_pa6t,nothing*7")
+
+(define_insn_reservation "pa6t-vmx-permute" 4
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "pa6t"))
+  "pick_vmx_simple_permute_pa6t,vmx_simple_permute_pa6t,nothing")
+
+;;;;;;;;;;;;;;;; Load/Store
+
+(define_insn_reservation "pa6t-load-int" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-load-int-ext" 2
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_crack_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-load-int-ext-update" 2
+  (and (eq_attr "type" "load_ext_u,load_ext_ux")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_crack_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-load-update" 2
+  (and (eq_attr "type" "load_u,load_ux")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_crack_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-load-fp" 2
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-load-fp-update" 2
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_crack_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-load-vmx" 2
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-store-int" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-store-int-update" 2
+  (and (eq_attr "type" "store_u,store_ux")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_crack_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-store-fp" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-store-fp-update" 2
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_crack_pa6t") ;,lsu_pa6t
+
+(define_insn_reservation "pa6t-store-vmx" 2
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "pa6t"))
+  "pick_ldst_pa6t") ;,lsu_pa6t
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 131585)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -1,6 +1,6 @@
 /* Subroutines used for code generation on IBM RS/6000.
    Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
    Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
@@ -700,6 +700,25 @@
   16,			/* prefetch streams */
 };
 
+/* Instruction costs on PA6T processors.  */
+static const
+struct processor_costs pa6t_cost = {
+  COSTS_N_INSNS (6),    /* mulsi */
+  COSTS_N_INSNS (6),    /* mulsi_const */
+  COSTS_N_INSNS (6),    /* mulsi_const9 */
+  COSTS_N_INSNS (8),    /* muldi */
+  COSTS_N_INSNS (36),   /* divsi */
+  COSTS_N_INSNS (68),   /* divdi */
+  COSTS_N_INSNS (8),    /* fp */
+  COSTS_N_INSNS (8),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (32),   /* ddiv */
+  64,			/* cache line size */
+  64,			/* l1 cache */
+  2048,			/* l2 cache */
+  12,			/* prefetch streams */
+};
+
 
 static bool rs6000_function_ok_for_sibcall (tree, tree);
 static const char *rs6000_invalid_within_doloop (const_rtx);
@@ -794,6 +813,8 @@
 static int force_new_group (int, FILE *, rtx *, rtx, bool *, int, int *);
 static int redefine_groups (FILE *, int, rtx, rtx);
 static int pad_groups (FILE *, int, rtx, rtx);
+static rtx strip_extend (rtx);
+static void separate_dependent_loads_from_stores (rtx, rtx);
 static void rs6000_sched_finish (FILE *, int);
 static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
 static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
@@ -1417,6 +1438,9 @@
 	 {"G4",  PROCESSOR_PPC7450, POWERPC_7400_MASK},
 	 {"G5", PROCESSOR_POWER4,
 	  POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
+	 {"pa6t", PROCESSOR_PA6T,
+	  POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB
+	  | MASK_FPRND | MASK_CMPB | MASK_MFPGPR | MASK_PPC_GPOPT},
 	 {"power", PROCESSOR_POWER, MASK_POWER | MASK_MULTIPLE | MASK_STRING},
 	 {"power2", PROCESSOR_POWER,
 	  MASK_POWER | MASK_POWER2 | MASK_MULTIPLE | MASK_STRING},
@@ -1642,12 +1666,15 @@
   rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
 			&& rs6000_cpu != PROCESSOR_POWER5
                         && rs6000_cpu != PROCESSOR_POWER6
-			&& rs6000_cpu != PROCESSOR_CELL);
+			&& rs6000_cpu != PROCESSOR_CELL
+			&& rs6000_cpu != PROCESSOR_PA6T);
   rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
-			 || rs6000_cpu == PROCESSOR_POWER5);
+			 || rs6000_cpu == PROCESSOR_POWER5
+			 || rs6000_cpu == PROCESSOR_PA6T);
   rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
                                  || rs6000_cpu == PROCESSOR_POWER5
-                                 || rs6000_cpu == PROCESSOR_POWER6);
+                                 || rs6000_cpu == PROCESSOR_POWER6
+				 || rs6000_cpu == PROCESSOR_PA6T);
 
   rs6000_sched_restricted_insns_priority
     = (rs6000_sched_groups ? 1 : 0);
@@ -1833,6 +1860,10 @@
 	rs6000_cost = &power6_cost;
 	break;
 
+      case PROCESSOR_PA6T:
+	rs6000_cost = &pa6t_cost;
+	break;
+
       default:
 	gcc_unreachable ();
       }
@@ -17786,15 +17817,21 @@
         /* Data dependency; DEP_INSN writes a register that INSN reads
 	   some cycles later.  */
 
-        /* Separate a load from a narrower, dependent store.  */
         if (rs6000_sched_groups
             && GET_CODE (PATTERN (insn)) == SET
             && GET_CODE (PATTERN (dep_insn)) == SET
             && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
-            && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
-            && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
-                > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
-          return cost + 14;
+	    && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM)
+	  {
+	    /* Separate a load from a narrower, dependent store.  */
+	    if (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
+		> GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))
+	      return cost + 14;
+	    if (rs6000_cpu == PROCESSOR_PA6T
+		&& rtx_equal_p (XEXP (PATTERN (insn), 1),
+				XEXP (PATTERN (dep_insn), 0)))
+	      return 100;
+	  }
 
         attr_type = get_attr_type (insn);
 
@@ -17820,7 +17857,8 @@
                  || rs6000_cpu_attr == CPU_PPC7450
                  || rs6000_cpu_attr == CPU_POWER4
                  || rs6000_cpu_attr == CPU_POWER5
-                 || rs6000_cpu_attr == CPU_CELL)
+                 || rs6000_cpu_attr == CPU_CELL
+		 || rs6000_cpu_attr == CPU_PA6T)
                 && recog_memoized (dep_insn)
                 && (INSN_CODE (dep_insn) >= 0))
 
@@ -18078,10 +18116,18 @@
       enum attr_type type = get_attr_type (insn);
       if (type == TYPE_LOAD_EXT_U
 	  || type == TYPE_LOAD_EXT_UX
-	  || type == TYPE_LOAD_UX
-	  || type == TYPE_STORE_UX
 	  || type == TYPE_MFCR)
 	return true;
+      if (rs6000_cpu == PROCESSOR_PA6T)
+	{
+	  if (type == TYPE_MFCRF || type == TYPE_MTCR)
+	    return true;
+	}
+      else
+	{
+	  if (type == TYPE_LOAD_UX || type == TYPE_STORE_UX)
+	    return true;
+	}
     }
 
   return false;
@@ -18104,12 +18150,22 @@
       if (type == TYPE_LOAD_U || type == TYPE_STORE_U
 	  || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U
 	  || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX
-	  || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR
-	  || type == TYPE_COMPARE || type == TYPE_DELAYED_COMPARE
-	  || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE
-	  || type == TYPE_IDIV || type == TYPE_LDIV
-	  || type == TYPE_INSERT_WORD)
+	  || type == TYPE_LOAD_EXT || type == TYPE_INSERT_WORD
+	  || type == TYPE_DELAYED_CR || type == TYPE_DELAYED_COMPARE)
 	return true;
+
+      if (rs6000_cpu == PROCESSOR_PA6T)
+	{
+	  if (type == TYPE_VAR_DELAYED_COMPARE)
+	    return true;
+	}
+      else
+	{
+	  if (type == TYPE_COMPARE
+	      || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE
+	      || type == TYPE_IDIV || type == TYPE_LDIV)
+	    return true;
+	}
     }
 
   return false;
@@ -18320,6 +18376,7 @@
   case CPU_RS64A:
   case CPU_PPC601: /* ? */
   case CPU_PPC7450:
+  case CPU_PA6T:
     return 3;
   case CPU_PPC440:
   case CPU_PPC603:
@@ -18862,6 +18919,7 @@
       if (is_cracked_insn (insn))
         return true;
     case PROCESSOR_POWER4:
+    case PROCESSOR_PA6T:
       if (is_microcoded_insn (insn))
         return true;
 
@@ -19332,6 +19390,86 @@
   load_store_pendulum = 0;
 }
 
+static rtx
+strip_extend (rtx x)
+{
+  for (;;)
+    switch (GET_CODE (x))
+      {
+      case MEM:
+	return x;
+
+      case SIGN_EXTEND:
+      case ZERO_EXTEND:
+      case FLOAT_EXTEND:
+	x = XEXP (x, 0);
+	break;
+
+      default:
+	/* debug_rtx (x); */
+	return NULL_RTX;
+      }
+  return NULL_RTX;
+}
+
+/* PA6T suffers long replays when a load that hits a store goes out of
+   order.  The hardware scheduler needs at least two fetch-groups of
+   separation between the store and load in order to recognize the
+   dependency.  Here we look for such cases and insert NOPs to provide
+   the necessary gap.  The code to match producers and consumers of
+   MEMs is simplistic and doesn't catch pointer aliases or overlaps.
+   Even so, it's an improvement over doing nothing.
+
+   FIXME: For PA6T this invalidates the n_groups calculation of its
+   caller, but I'm not going to fret about it now since that number is
+   only used in debug-trace reports.  */
+
+void
+separate_dependent_loads_from_stores (rtx prev_head_insn, rtx tail)
+{
+  rtx producer = get_next_active_insn (prev_head_insn, tail);
+
+  while (producer && producer != tail)
+    {
+      rtx producer_pat = PATTERN (producer);
+      if (is_store_insn (producer) && GET_CODE (producer_pat) == SET)
+	{
+	  sd_iterator_def sd_it;
+	  dep_t dep;
+	  FOR_EACH_DEP (producer, SD_LIST_RES_FORW, sd_it, dep)
+	    {
+	      rtx consumer = DEP_CON (dep);
+	      rtx consumer_pat = PATTERN (consumer);
+	      if (DEP_TYPE (dep) == REG_DEP_TRUE
+		  && is_load_insn (consumer)
+		  && GET_CODE (consumer_pat) == SET
+		  && rtx_equal_p (strip_extend (XEXP (consumer_pat, 1)),
+				  XEXP (producer_pat, 0)))
+		{
+		  rtx next;
+		  int insn_count = 0;
+		  for (next = producer; next != consumer; next = NEXT_INSN (next))
+		    if (++insn_count >= 8)
+		      break;
+		  /* FIXME: this is non-optimal, since we might insert
+		     more nops than necessary to separate the load
+		     from the store by two fetch groups.  We don't
+		     have enough information about fetch-group
+		     boundaries here.  The scheduler is issue-group
+		     centered (pa6t issue width is 3), whereas this
+		     bit is fetch-group centered (pa6t fetch width is
+		     4). */
+		  while (insn_count++ < 8)
+		    emit_insn_before (gen_nop (), consumer);
+		  break;
+		}
+	    }
+	}
+
+      producer = NEXT_INSN (producer);
+    }
+}
+
 /* The following function is called at the end of scheduling BB.
    After reload, it inserts nops at insn group bundling.  */
 
@@ -19357,6 +19495,10 @@
 				    current_sched_info->prev_head,
 				    current_sched_info->next_tail);
 
+      if (rs6000_cpu == PROCESSOR_PA6T)
+	separate_dependent_loads_from_stores (current_sched_info->prev_head,
+					      current_sched_info->next_tail);
+
       if (sched_verbose >= 6)
 	{
     	  fprintf (dump, "ngroups = %d\n", n_groups);
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h	(revision 131585)
+++ gcc/config/rs6000/rs6000.h	(working copy)
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler, for IBM RS/6000.
    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
    Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
@@ -117,6 +117,7 @@
 %{mcpu=G5: -mpower4 -maltivec} \
 %{mcpu=8540: -me500} \
 %{mcpu=8548: -me500} \
+%{mcpu=pa6t: -mpa6t} \
 %{maltivec: -maltivec} \
 -many"
 
@@ -265,7 +266,8 @@
    PROCESSOR_POWER4,
    PROCESSOR_POWER5,
    PROCESSOR_POWER6,
-   PROCESSOR_CELL
+   PROCESSOR_CELL,
+   PROCESSOR_PA6T
 };
 
 extern enum processor_type rs6000_cpu;
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 131585)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -1,6 +1,6 @@
 ;; Machine description for IBM RISC System 6000 (POWER) for GNU C compiler
 ;; Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-;; 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+;; 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
 ;; Free Software Foundation, Inc.
 ;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
@@ -133,7 +133,7 @@
 ;; Processor type -- this attribute must exactly match the processor_type
 ;; enumeration in rs6000.h.
 
-(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5,power6,cell"
+(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5,power6,cell,pa6t"
   (const (symbol_ref "rs6000_cpu_attr")))
 
 
@@ -170,6 +170,7 @@
 (include "power5.md")
 (include "power6.md")
 (include "cell.md")
+(include "pa6t.md")
 
 (include "predicates.md")
 (include "constraints.md")

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]