This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

S/390: Add z990 instruction scheduling


Hello,

this patch adds instruction scheduling support for the new
z990 processor (which uses an in-order superscalar pipeline
structure).  This was implemented by Hartmut Penner.

Bootstrapped/regtested on s390-ibm-linux, s390x-ibm-linux,
and s390x-ibm-linux with --with-arch=z990 (together with
the following z990 long-displacement patch).

Bye,
Ulrich

ChangeLog:

	* config/s390/2084.md: New file.
	* config/s390/s390.md: Include it.
	* config/s390/s390.c (s390_adjust_priority): New function.
	(TARGET_SCHED_ADJUST_PRIORITY): Define.
	(s390_first_cycle_multipass_dfa_lookahead): New function.
	(TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD): Define.
	(s390_sched_reorder2): New function.
	(TARGET_SCHED_REORDER2): Define.
	(s390_adjust_cost): Support PROCESSOR_2084_Z990 cpu type.
	(s390_issue_rate): Likewise.


Index: gcc/config/s390/s390.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/s390/s390.c,v
retrieving revision 1.98
diff -c -p -r1.98 s390.c
*** gcc/config/s390/s390.c	30 Jun 2003 19:47:21 -0000	1.98
--- gcc/config/s390/s390.c	30 Jun 2003 19:57:03 -0000
*************** static void s390_output_mi_thunk PARAMS 
*** 68,79 ****
--- 68,83 ----
  static enum attr_type s390_safe_attr_type PARAMS ((rtx));
  
  static int s390_adjust_cost PARAMS ((rtx, rtx, rtx, int));
+ static int s390_adjust_priority PARAMS ((rtx, int));
  static int s390_issue_rate PARAMS ((void));
  static int s390_use_dfa_pipeline_interface PARAMS ((void));
+ static int s390_first_cycle_multipass_dfa_lookahead PARAMS ((void));
+ static int s390_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
  static bool s390_rtx_costs PARAMS ((rtx, int, int, int *));
  static int s390_address_cost PARAMS ((rtx));
  static void s390_reorg PARAMS ((void));
  
+ 
  #undef  TARGET_ASM_ALIGNED_HI_OP
  #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
  #undef  TARGET_ASM_ALIGNED_DI_OP
*************** static void s390_reorg PARAMS ((void));
*** 115,124 ****
--- 119,134 ----
  
  #undef  TARGET_SCHED_ADJUST_COST
  #define TARGET_SCHED_ADJUST_COST s390_adjust_cost
+ #undef  TARGET_SCHED_ADJUST_PRIORITY
+ #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
  #undef TARGET_SCHED_ISSUE_RATE
  #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
  #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
  #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE s390_use_dfa_pipeline_interface
+ #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+ #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
+ #undef TARGET_SCHED_REORDER2
+ #define TARGET_SCHED_REORDER2 s390_sched_reorder2
  
  #undef TARGET_RTX_COSTS
  #define TARGET_RTX_COSTS s390_rtx_costs
*************** addr_generation_dependency_p (dep_rtx, i
*** 3611,3617 ****
  
  /* Return 1, if dep_insn sets register used in insn in the agen unit.  */
  
- 
  int 
  s390_agen_dep_p(dep_insn, insn)
       rtx dep_insn;
--- 3621,3626 ----
*************** s390_agen_dep_p(dep_insn, insn)
*** 3634,3640 ****
    return 0;
  }
  
- 
  /* Return the modified cost of the dependency of instruction INSN
     on instruction DEP_INSN through the link LINK.  COST is the 
     default cost of that dependency.
--- 3643,3648 ----
*************** s390_adjust_cost (insn, link, dep_insn, 
*** 3669,3675 ****
  
    /* DFA based scheduling checks address dependency in md file.  */
    if (s390_use_dfa_pipeline_interface ())
!      return cost;
  
    dep_rtx = PATTERN (dep_insn);
  
--- 3677,3692 ----
  
    /* DFA based scheduling checks address dependency in md file.  */
    if (s390_use_dfa_pipeline_interface ())
!   {
!     /* Operand forward in case of lr, load and la.  */ 
!     if (s390_tune == PROCESSOR_2084_Z990
!         && cost == 1
! 	&& (s390_safe_attr_type (dep_insn) == TYPE_LA
! 	    || s390_safe_attr_type (dep_insn) == TYPE_LR
! 	    || s390_safe_attr_type (dep_insn) == TYPE_LOAD))
!       return 0;
!     return cost;
!   }
  
    dep_rtx = PATTERN (dep_insn);
  
*************** s390_adjust_cost (insn, link, dep_insn, 
*** 3687,3698 ****
--- 3704,3750 ----
  
    return cost;
  }
+ /* A C statement (sans semicolon) to update the integer scheduling priority
+    INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
+    reduce the priority to execute INSN later.  Do not define this macro if
+    you do not need to adjust the scheduling priorities of insns. 
+ 
+    A STD instruction should be scheduled earlier, 
+    in order to use the bypass.  */
+ 
+ static int
+ s390_adjust_priority (insn, priority)
+      rtx insn ATTRIBUTE_UNUSED;
+      int priority;
+ {
+   if (! INSN_P (insn))
+     return priority;
+ 
+   if (s390_tune != PROCESSOR_2084_Z990)
+     return priority;
+ 
+   switch (s390_safe_attr_type (insn))
+     {
+       case TYPE_FSTORED:
+       case TYPE_FSTORES:
+ 	priority = priority << 3;
+ 	break;
+       case TYPE_STORE:
+ 	priority = priority << 1;
+ 	break;
+       default:
+         break;
+     }
+   return priority;
+ }
  
  /* The number of instructions that can be issued per cycle.  */
  
  static int
  s390_issue_rate ()
  {
+   if (s390_tune == PROCESSOR_2084_Z990) 
+     return 3;
    return 1;
  }
  
*************** s390_issue_rate ()
*** 3702,3712 ****
  static int
  s390_use_dfa_pipeline_interface ()
  {
!   if (s390_tune == PROCESSOR_2064_Z900)
      return 1;
    return 0;
  
  }
  
  /* Split all branches that exceed the maximum distance.  
     Returns true if this created a new literal pool entry.  
--- 3754,3786 ----
  static int
  s390_use_dfa_pipeline_interface ()
  {
!   if (s390_tune == PROCESSOR_2064_Z900
!       || s390_tune == PROCESSOR_2084_Z990)
      return 1;
+ 
    return 0;
+ }
+ 
+ static int
+ s390_first_cycle_multipass_dfa_lookahead ()
+ {
+   return s390_use_dfa_pipeline_interface () ? 4 : 0;
+ }
+ 
+ /* Called after issuing each insn.
+    Triggers default sort algorithm to better slot instructions.  */
  
+ static int
+ s390_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
+      FILE *dump ATTRIBUTE_UNUSED;
+      int sched_verbose ATTRIBUTE_UNUSED;
+      rtx *ready ATTRIBUTE_UNUSED;
+      int *pn_ready ATTRIBUTE_UNUSED;
+      int clock_var ATTRIBUTE_UNUSED;
+ {
+     return s390_issue_rate();
  }
+ 
  
  /* Split all branches that exceed the maximum distance.  
     Returns true if this created a new literal pool entry.  
Index: gcc/config/s390/s390.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/s390/s390.md,v
retrieving revision 1.57
diff -c -p -r1.57 s390.md
*** gcc/config/s390/s390.md	30 Jun 2003 19:47:22 -0000	1.57
--- gcc/config/s390/s390.md	30 Jun 2003 19:57:05 -0000
***************
*** 219,224 ****
--- 219,225 ----
  ;; Pipeline description for z900
  
  (include "2064.md")
+ (include "2084.md")
  
  ;; Length in bytes.
  
*** /dev/null	Fri Mar 21 21:55:32 2003
--- gcc/config/s390/2084.md	Mon Jun 30 21:56:09 2003
***************
*** 0 ****
--- 1,262 ----
+ ;; Scheduling description for z990 (cpu 2084).
+ ;;   Copyright (C) 2003 Free Software Foundation, Inc.
+ ;;   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+ ;;                  Ulrich Weigand (uweigand@de.ibm.com).
+ ;;
+ ;; This file is part of GNU CC.
+ ;;
+ ;; GNU CC is free software; you can redistribute it and/or modify
+ ;; it under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ ;;
+ ;; GNU CC is distributed in the hope that it will be useful,
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ ;; GNU General Public License for more details.
+ ;;
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GNU CC; see the file COPYING.  If not, write to
+ ;; the Free Software Foundation, 59 Temple Place - Suite 330,
+ ;; Boston, MA 02111-1307, USA.
+ 
+ (define_automaton "x_ipu")
+ 
+ (define_cpu_unit "x_e1_r,x_e1_s,x_e1_t"  "x_ipu")
+ (define_cpu_unit "x_wr_r,x_wr_s,x_wr_t,x_wr_fp" "x_ipu")
+ (define_cpu_unit "x_s1,x_s2,x_s3,x_s4"   "x_ipu")
+ (define_cpu_unit "x_t1,x_t2,x_t3,x_t4"   "x_ipu")
+ (define_cpu_unit "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6"   "x_ipu")
+ (define_cpu_unit "x_store_tok"   "x_ipu")
+ (define_cpu_unit "x_ms,x_mt"   "x_ipu")
+ 
+ (define_reservation "x-e1-st" "(x_e1_s | x_e1_t)")
+ 
+ (define_reservation "x-e1-np" "(x_e1_r + x_e1_s + x_e1_t)")
+ 
+ (absence_set "x_e1_r" "x_e1_s,x_e1_t")
+ (absence_set "x_e1_s" "x_e1_t")
+ 
+ ;; Try to avoid int <-> fp transitions.
+ 
+ (define_reservation "x-x" "x_s1|x_t1,x_s2|x_t2,x_s3|x_t3,x_s4|x_t4")
+ (define_reservation "x-f" "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6")
+ (define_reservation "x-wr-st" "((x_wr_s | x_wr_t),x-x)")
+ (define_reservation "x-wr-np" "((x_wr_r + x_wr_s + x_wr_t),x-x)")
+ (define_reservation "x-wr-fp" "x_wr_fp,x-f")
+ (define_reservation "x-mem"   "x_ms|x_mt")
+ 
+ (absence_set "x_wr_fp"
+              "x_s1,x_s2,x_s3,x_s4,x_t1,x_t2,x_t3,x_t4,x_wr_s,x_wr_t")
+ 
+ (absence_set "x_e1_r,x_wr_r,x_wr_s,x_wr_t"
+              "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6,x_wr_fp")
+ 
+ ;; Don't have any load type insn in same group as store
+ 
+ (absence_set "x_ms,x_mt" "x_store_tok")
+ 
+ 
+ ;;
+ ;; Simple insns
+ ;;
+ 
+ (define_insn_reservation "x_lr" 1
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "lr"))
+   "x-e1-st,x-wr-st") 
+ 
+ (define_insn_reservation "x_la" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "la"))
+   "x-e1-st,x-wr-st") 
+ 
+ (define_insn_reservation "x_larl" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "larl"))
+   "x-e1-st,x-wr-st") 
+ 
+ (define_insn_reservation "x_load" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "load"))
+   "x-e1-st+x-mem,x-wr-st") 
+ 
+ (define_insn_reservation "x_store" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "store"))
+   "x-e1-st+x_store_tok,x-wr-st") 
+ 
+ (define_insn_reservation "x_branch" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "branch"))
+   "x_e1_r,x_wr_r") 
+ 
+ (define_insn_reservation "x_call" 5 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "jsr"))
+   "x-e1-np*5,x-wr-np") 
+ 
+ ;;
+ ;; Multicycle insns
+ ;;
+ 
+ (define_insn_reservation "x_ss" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "op_type" "SS"))
+   "x-e1-np,x-wr-np") 
+ 
+ (define_insn_reservation "x_stm" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "stm"))
+   "(x-e1-np+x_store_tok)*10,x-wr-np") 
+ 
+ (define_insn_reservation "x_lm" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "lm"))
+   "x-e1-np*10,x-wr-np") 
+ 
+ (define_insn_reservation "x_nn" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "op_type" "NN"))
+   "x-e1-np,x-wr-np") 
+ 
+ (define_insn_reservation "x_o2" 2 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "o2"))
+   "x-e1-np*2,x-wr-np") 
+ 
+ (define_insn_reservation "x_o3" 3 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "o3"))
+   "x-e1-np*3,x-wr-np") 
+ 
+ ;;
+ ;; Floating point insns
+ ;;
+ 
+ (define_insn_reservation "x_fsimpd" 6 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "fsimpd,fmuld"))
+   "x_e1_t,x-wr-fp") 
+ 
+ (define_insn_reservation "x_fsimps" 6 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "fsimps,fmuls"))
+   "x_e1_t,x-wr-fp") 
+ 
+ (define_insn_reservation "x_fdivd" 36
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "fdivd"))
+   "x_e1_t*30,x-wr-fp") 
+ 
+ (define_insn_reservation "x_fdivs" 36 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "fdivs"))
+   "x_e1_t*30,x-wr-fp") 
+ 
+ (define_insn_reservation "x_floadd" 6 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "floadd"))
+   "x_e1_t,x-wr-fp") 
+ 
+ (define_insn_reservation "x_floads" 6 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "floads"))
+   "x_e1_t,x-wr-fp") 
+ 
+ (define_insn_reservation "x_fstored" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "fstored"))
+   "x_e1_t,x-wr-fp") 
+ 
+ (define_insn_reservation "x_fstores" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "fstores"))
+   "x_e1_t,x-wr-fp") 
+ 
+ (define_insn_reservation "x_ftoi" 1 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "ftoi"))
+   "x_e1_t*3,x-wr-fp") 
+ 
+ (define_insn_reservation "x_itof" 7 
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "type" "itof"))
+   "x_e1_t*3,x-wr-fp") 
+ 
+ (define_bypass 1 "x_fsimpd" "x_fstored")
+ 
+ (define_bypass 1 "x_fsimps" "x_fstores")
+ 
+ (define_bypass 1 "x_floadd" "x_fsimpd,x_fstored,x_floadd")
+ 	         
+ (define_bypass 1 "x_floads" "x_fsimps,x_fstores,x_floads")
+ 
+ ;;
+ ;; Insns still not mentioned are checked for
+ ;; the usage of the agen unit 
+ ;;
+ 
+ (define_insn_reservation "x_int" 1
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "atype" "reg"))
+   "x-e1-st,x-wr-st") 
+ 
+ (define_insn_reservation "x_agen" 1
+   (and (eq_attr "cpu" "z990")
+        (eq_attr "atype" "agen"))
+   "x-e1-st+x-mem,x-wr-st") 
+ 
+ ;;
+ ;; s390_agen_dep_p returns 1, if a register is set in the 
+ ;; first insn and used in the dependend insn to form a address.
+ ;;
+ 
+ ;;
+ ;; If a intruction uses a register to address memory, it needs
+ ;; to be set 5 cycles in advance.
+ ;; 
+ 
+ (define_bypass 5 "x_int,x_agen,x_lr" 
+                  "x_agen,x_la,x_call,x_load,x_store,x_ss,x_stm,x_lm"
+ 	         "s390_agen_dep_p")
+ 
+ (define_bypass 9 "x_int,x_agen,x_lr" 
+                  "x_floadd, x_floads, x_fstored, x_fstores,\
+ 		  x_fsimpd, x_fsimps, x_fdivd, x_fdivs"
+ 	         "s390_agen_dep_p")
+ ;;
+ ;; A load type instruction uses a bypass to feed the result back	
+ ;; to the address generation pipeline stage. 
+ ;;
+ 
+ (define_bypass 4 "x_load"    
+                  "x_agen,x_la,x_call,x_load,x_store,x_ss,x_stm,x_lm"
+ 	         "s390_agen_dep_p")
+ 
+ (define_bypass 5 "x_load"
+                  "x_floadd, x_floads, x_fstored, x_fstores,\
+ 		  x_fsimpd, x_fsimps, x_fdivd, x_fdivs"
+ 	         "s390_agen_dep_p")
+ 
+ ;;
+ ;; A load address type instruction uses a bypass to feed the 
+ ;; result back to the address generation pipeline stage. 
+ ;;
+ 
+ (define_bypass 3 "x_larl,x_la" 
+                  "x_agen,x_la,x_call,x_load,x_store,x_ss,x_stm,x_lm"
+ 	         "s390_agen_dep_p")
+ 
+ (define_bypass 5 "x_larl, x_la"
+                  "x_floadd, x_floads, x_fstored, x_fstores,\
+ 		  x_fsimpd, x_fsimps, x_fdivd, x_fdivs"
+ 	         "s390_agen_dep_p")
+ 
+ ;;
+ ;; Operand forwarding
+ ;;
+ 
+ (define_bypass 0 "x_lr,x_la,x_load" "x_int,x_lr")
+ 
+ 

-- 
  Dr. Ulrich Weigand
  weigand@informatik.uni-erlangen.de


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]