This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH]: RFC: Add power7 support to the rs6000 (part 3 of 12)


2009-06-04  Michael Meissner  <meissner@linux.vnet.ibm.com>
	    Pat Haugen  <pthaugen@us.ibm.com>
	    Revital1 Eres <ERES@il.ibm.com>

	* config/rs6000/constraints.md (wd constraint): New constraint to
	for preferred register class to hold V2DF data if -mvsx.
	(wf constraint): New constraint for preferred register class to
	hold V4SF data if -mvsx.
	(wd constraint): New constraint for preferred register class to
	hold scalar DF data if -mvsx.
	(wa constraint): New constraint to target either the traditional
	floating point or Altivec registers if -mvsx.
	(j constraint): New constraint to match a vector constant of all
	zeroes.

	* config/rs6000/predicates.md (vsx_register_operand): New
	predicate for vsx registers.
	(vfloat_operand): New predicate for moving the vector support from
	altivec.md to vector.md to absract it for both altivec and vsx.
	(vint_operand): Ditto.
	(vlogical_operand): Ditto.
	(easy_fp_constant): Vector of all zeroes or scalar DF 0.0 are easy
	constants under VSX.
	(indexed_or_indirect_p): Add VSX support.
	(altivec_indexed_or_indirect_operand): New predicate to match the
	Altivec memory reference with the implicit AND -16.
	(vec_init_operand): New predicate for rejecting a vector
	initialization consistant of two double memory references, unless
	they are the same item, where we can use the load and splat
	instruction.

	* config/rs6000/spe.md (spe_fixuns_truncdfsi2): Rename from
	fixuns_trancdfsi2, the fixuns_truncdfsi2 expander is moved to
	rs6000.md.

	* config/rs6000/power7.md: New file to give power7 tuning.

Index: gcc/config/rs6000/constraints.md
===================================================================
--- gcc/config/rs6000/constraints.md	(.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk)	(revision 148152)
+++ gcc/config/rs6000/constraints.md	(working copy)
@@ -17,6 +17,8 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
+;; Available constraint letters: "e", "k", "u", "A", "B", "C", "D"
+
 ;; Register constraints
 
 (define_register_constraint "f" "TARGET_HARD_FLOAT && TARGET_FPRS
@@ -50,6 +52,28 @@ (define_register_constraint "y" "CR_REGS
 (define_register_constraint "z" "XER_REGS"
   "@internal")
 
+;; Use w as a prefix to add VSX modes
+;; vector double (V2DF)
+(define_register_constraint "wd" "rs6000_vector_reg_class[V2DFmode]"
+  "@internal")
+
+;; vector float (V4SF)
+(define_register_constraint "wf" "rs6000_vector_reg_class[V4SFmode]"
+  "@internal")
+
+;; scalar double (DF)
+(define_register_constraint "ws" "rs6000_vector_reg_class[DFmode]"
+  "@internal")
+
+;; any VSX register
+(define_register_constraint "wa" "rs6000_vsx_reg_class"
+  "@internal")
+
+;; Altivec style load/store that ignores the bottom bits of the address
+(define_memory_constraint "wZ"
+  "Indexed or indirect memory operand, ignoring the bottom 4 bits"
+  (match_operand 0 "altivec_indexed_or_indirect_operand"))
+
 ;; Integer constraints
 
 (define_constraint "I"
@@ -159,3 +183,7 @@ (define_constraint "t"
 (define_constraint "W"
   "vector constant that does not require memory"
   (match_operand 0 "easy_vector_constant"))
+
+(define_constraint "j"
+  "Zero vector constant"
+  (match_test "(op == const0_rtx || op == CONST0_RTX (GET_MODE (op)))"))
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md	(.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk)	(revision 148152)
+++ gcc/config/rs6000/predicates.md	(working copy)
@@ -38,6 +38,37 @@ (define_predicate "altivec_register_oper
 		     || ALTIVEC_REGNO_P (REGNO (op))
 		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
 
+;; Return 1 if op is a VSX register.
+(define_predicate "vsx_register_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || VSX_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is a vector register that operates on floating point vectors
+;; (either altivec or VSX).
+(define_predicate "vfloat_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || VFLOAT_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is a vector register that operates on integer vectors
+;; (only altivec, VSX doesn't support integer vectors)
+(define_predicate "vint_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || VINT_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is a vector register to do logical operations on (and, or,
+;; xor, etc.)
+(define_predicate "vlogical_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || VLOGICAL_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
 ;; Return 1 if op is XER register.
 (define_predicate "xer_operand"
   (and (match_code "reg")
@@ -234,6 +265,10 @@ (define_predicate "easy_fp_constant"
 	      && num_insns_constant_wide ((HOST_WIDE_INT) k[3]) == 1);
 
     case DFmode:
+      /* The constant 0.f is easy under VSX.  */
+      if (op == CONST0_RTX (DFmode) && VECTOR_UNIT_VSX_P (DFmode))
+	return 1;
+
       /* Force constants to memory before reload to utilize
 	 compress_float_constant.
 	 Avoid this when flag_unsafe_math_optimizations is enabled
@@ -292,6 +327,9 @@ (define_predicate "easy_vector_constant"
   if (TARGET_PAIRED_FLOAT)
     return false;
 
+  if ((VSX_VECTOR_MODE (mode) || mode == TImode) && zero_constant (op, mode))
+    return true;
+
   if (ALTIVEC_VECTOR_MODE (mode))
     {
       if (zero_constant (op, mode))
@@ -396,16 +434,36 @@ (define_predicate "indexed_or_indirect_o
   (match_code "mem")
 {
   op = XEXP (op, 0);
-  if (TARGET_ALTIVEC
-      && ALTIVEC_VECTOR_MODE (mode)
+  if (VECTOR_MEM_ALTIVEC_P (mode)
       && GET_CODE (op) == AND
       && GET_CODE (XEXP (op, 1)) == CONST_INT
       && INTVAL (XEXP (op, 1)) == -16)
     op = XEXP (op, 0);
 
+  else if (VECTOR_MEM_VSX_P (mode)
+	   && GET_CODE (op) == PRE_MODIFY)
+    op = XEXP (op, 1);
+
   return indexed_or_indirect_address (op, mode);
 })
 
+;; Return 1 if the operand is an indexed or indirect memory operand with an
+;; AND -16 in it, used to recognize when we need to switch to Altivec loads
+;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
+;; while VSX uses the full address and traps)
+(define_predicate "altivec_indexed_or_indirect_operand"
+  (match_code "mem")
+{
+  op = XEXP (op, 0);
+  if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+      && GET_CODE (op) == AND
+      && GET_CODE (XEXP (op, 1)) == CONST_INT
+      && INTVAL (XEXP (op, 1)) == -16)
+    return indexed_or_indirect_address (XEXP (op, 0), mode);
+
+  return 0;
+})
+
 ;; Return 1 if the operand is an indexed or indirect address.
 (define_special_predicate "indexed_or_indirect_address"
   (and (match_test "REG_P (op)
@@ -1336,3 +1394,19 @@ (define_predicate "stmw_operation"
 
   return 1;
 })
+
+;; Return true if the operand is a legitimate parallel for vec_init
+(define_predicate "vec_init_operand"
+  (match_code "parallel")
+{
+  /* Disallow V2DF mode with MEM's unless both are the same under VSX.  */
+  if (mode == V2DFmode && VECTOR_UNIT_VSX_P (mode))
+    {
+      rtx op0 = XVECEXP (op, 0, 0);
+      rtx op1 = XVECEXP (op, 0, 1);
+      if ((MEM_P (op0) || MEM_P (op1)) && !rtx_equal_p (op0, op1))
+	return 0;
+    }
+
+  return 1;
+})
Index: gcc/config/rs6000/spe.md
===================================================================
--- gcc/config/rs6000/spe.md	(.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk)	(revision 148152)
+++ gcc/config/rs6000/spe.md	(working copy)
@@ -99,7 +99,7 @@ (define_insn "*divsf3_gpr"
 
 ;; Floating point conversion instructions.
 
-(define_insn "fixuns_truncdfsi2"
+(define_insn "spe_fixuns_truncdfsi2"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 	(unsigned_fix:SI (match_operand:DF 1 "gpc_reg_operand" "r")))]
   "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
Index: gcc/config/rs6000/power7.md
===================================================================
--- gcc/config/rs6000/power7.md	(.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk)	(revision 0)
+++ gcc/config/rs6000/power7.md	(revision 148152)
@@ -0,0 +1,318 @@
+;; Scheduling description for IBM POWER7 processor.
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power7iu,power7lsu,power7vsu,power7misc")
+
+(define_cpu_unit "iu1_power7,iu2_power7" "power7iu")
+(define_cpu_unit "lsu1_power7,lsu2_power7" "power7lsu")
+(define_cpu_unit "vsu1_power7,vsu2_power7" "power7vsu")
+(define_cpu_unit "bpu_power7,cru_power7" "power7misc")
+(define_cpu_unit "du1_power7,du2_power7,du3_power7,du4_power7,du5_power7"
+                 "power7misc")
+
+
+(define_reservation "DU_power7"
+		    "du1_power7|du2_power7|du3_power7|du4_power7")
+
+(define_reservation "DU2F_power7"
+		    "du1_power7+du2_power7")
+
+(define_reservation "DU4_power7"
+		    "du1_power7+du2_power7+du3_power7+du4_power7")
+
+(define_reservation "FXU_power7"
+                    "iu1_power7|iu2_power7")
+
+(define_reservation "VSU_power7"
+                    "vsu1_power7|vsu2_power7")
+
+(define_reservation "LSU_power7"
+                    "lsu1_power7|lsu2_power7")
+
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du1_power7" "du2_power7,du3_power7,du4_power7,du5_power7")
+(absence_set "du2_power7" "du3_power7,du4_power7,du5_power7")
+(absence_set "du3_power7" "du4_power7,du5_power7")
+(absence_set "du4_power7" "du5_power7")
+
+
+; LS Unit
+(define_insn_reservation "power7-load" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-load-ext" 3
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7,FXU_power7")
+
+(define_insn_reservation "power7-load-update" 2
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-load-update-indexed" 3
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-load-ext-update" 4
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-load-ext-update-indexed" 4
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-fpload" 3
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-fpload-update" 3
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-store" 6 ; store-forwarding latency
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-store-update" 6
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-store-update-indexed" 6
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-fpstore" 6
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+VSU_power7")
+
+(define_insn_reservation "power7-fpstore-update" 6
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+VSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+(define_insn_reservation "power7-stcx" 10
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+(define_insn_reservation "power7-vecload" 3
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-vecstore" 6
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+VSU_power7")
+
+(define_insn_reservation "power7-sync" 11
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+
+; FX Unit
+(define_insn_reservation "power7-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,exts")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-cntlz" 2
+  (and (eq_attr "type" "cntlz")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power7"))
+  "DU_power7+DU_power7,FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power7"))
+  "DU_power7+DU_power7+DU_power7,FXU_power7,FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-cmp" 1
+  (and (eq_attr "type" "cmp,fast_compare")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,FXU_power7,FXU_power7")
+
+(define_bypass 3 "power7-cmp,power7-compare" "power7-crlogical,power7-delayedcr")
+
+(define_insn_reservation "power7-mul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,lmul")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-mul-compare" 5
+  (and (eq_attr "type" "imul_compare,lmul_compare")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,FXU_power7,nothing*3,FXU_power7")
+
+(define_insn_reservation "power7-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,iu1_power7*36|iu2_power7*36")
+
+(define_insn_reservation "power7-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,iu1_power7*68|iu2_power7*68")
+
+(define_insn_reservation "power7-isync" 1 ;
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7")
+
+
+; CR Unit
+(define_insn_reservation "power7-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,FXU_power7")
+
+(define_insn_reservation "power7-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7+FXU_power7")
+
+(define_insn_reservation "power7-crlogical" 3
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-delayedcr" 3
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mfcr" 6
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,cru_power7+FXU_power7")
+
+
+; BR Unit
+; Branches take dispatch Slot 4.  The presence_sets prevent other insn from
+; grabbing previous dispatch slots once this is assigned.
+(define_insn_reservation "power7-branch" 3
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power7"))
+  "(du5_power7\
+   |du4_power7+du5_power7\
+   |du3_power7+du4_power7+du5_power7\
+   |du2_power7+du3_power7+du4_power7+du5_power7\
+   |du1_power7+du2_power7+du3_power7+du4_power7+du5_power7),bpu_power7")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP)
+(define_insn_reservation "power7-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_bypass 8 "power7-fp" "power7-branch")
+
+(define_insn_reservation "power7-fpcompare" 4
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-sdiv" 26
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-ddiv" 32
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-sqrt" 31
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-dsqrt" 43
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecsimple" 2
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-veccmp" 7
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecfloat" 7
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,VSU_power7")
+
+(define_bypass 6 "power7-vecfloat" "power7-vecfloat")
+
+(define_insn_reservation "power7-veccomplex" 7
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecperm" 3
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "power7"))
+  "du2_power7,VSU_power7")

-- 
Michael Meissner, IBM
4 Technology Place Drive, MS 2203A, Westford, MA, 01886, USA
meissner@linux.vnet.ibm.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]