This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
S/390: Add z990 instruction scheduling
- From: Ulrich Weigand <weigand at i1 dot informatik dot uni-erlangen dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 1 Jul 2003 00:08:55 +0200 (MET DST)
- Subject: S/390: Add z990 instruction scheduling
Hello,
this patch adds instruction scheduling support for the new
z990 processor (which uses an in-order superscalar pipeline
structure). This was implemented by Hartmut Penner.
Bootstrapped/regtested on s390-ibm-linux, s390x-ibm-linux,
and s390x-ibm-linux with --with-arch=z990 (together with
the following z990 long-displacement patch).
Bye,
Ulrich
ChangeLog:
* config/s390/2084.md: New file.
* config/s390/s390.md: Include it.
* config/s390/s390.c (s390_adjust_priority): New function.
(TARGET_SCHED_ADJUST_PRIORITY): Define.
(s390_first_cycle_multipass_dfa_lookahead): New function.
(TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD): Define.
(s390_sched_reorder2): New function.
(TARGET_SCHED_REORDER2): Define.
(s390_adjust_cost): Support PROCESSOR_2084_Z990 cpu type.
(s390_issue_rate): Likewise.
Index: gcc/config/s390/s390.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/s390/s390.c,v
retrieving revision 1.98
diff -c -p -r1.98 s390.c
*** gcc/config/s390/s390.c 30 Jun 2003 19:47:21 -0000 1.98
--- gcc/config/s390/s390.c 30 Jun 2003 19:57:03 -0000
*************** static void s390_output_mi_thunk PARAMS
*** 68,79 ****
--- 68,83 ----
static enum attr_type s390_safe_attr_type PARAMS ((rtx));
static int s390_adjust_cost PARAMS ((rtx, rtx, rtx, int));
+ static int s390_adjust_priority PARAMS ((rtx, int));
static int s390_issue_rate PARAMS ((void));
static int s390_use_dfa_pipeline_interface PARAMS ((void));
+ static int s390_first_cycle_multipass_dfa_lookahead PARAMS ((void));
+ static int s390_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
static bool s390_rtx_costs PARAMS ((rtx, int, int, int *));
static int s390_address_cost PARAMS ((rtx));
static void s390_reorg PARAMS ((void));
+
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
#undef TARGET_ASM_ALIGNED_DI_OP
*************** static void s390_reorg PARAMS ((void));
*** 115,124 ****
--- 119,134 ----
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST s390_adjust_cost
+ #undef TARGET_SCHED_ADJUST_PRIORITY
+ #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE s390_issue_rate
#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE s390_use_dfa_pipeline_interface
+ #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+ #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
+ #undef TARGET_SCHED_REORDER2
+ #define TARGET_SCHED_REORDER2 s390_sched_reorder2
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS s390_rtx_costs
*************** addr_generation_dependency_p (dep_rtx, i
*** 3611,3617 ****
/* Return 1, if dep_insn sets register used in insn in the agen unit. */
-
int
s390_agen_dep_p(dep_insn, insn)
rtx dep_insn;
--- 3621,3626 ----
*************** s390_agen_dep_p(dep_insn, insn)
*** 3634,3640 ****
return 0;
}
-
/* Return the modified cost of the dependency of instruction INSN
on instruction DEP_INSN through the link LINK. COST is the
default cost of that dependency.
--- 3643,3648 ----
*************** s390_adjust_cost (insn, link, dep_insn,
*** 3669,3675 ****
/* DFA based scheduling checks address dependency in md file. */
if (s390_use_dfa_pipeline_interface ())
! return cost;
dep_rtx = PATTERN (dep_insn);
--- 3677,3692 ----
/* DFA based scheduling checks address dependency in md file. */
if (s390_use_dfa_pipeline_interface ())
! {
! /* Operand forward in case of lr, load and la. */
! if (s390_tune == PROCESSOR_2084_Z990
! && cost == 1
! && (s390_safe_attr_type (dep_insn) == TYPE_LA
! || s390_safe_attr_type (dep_insn) == TYPE_LR
! || s390_safe_attr_type (dep_insn) == TYPE_LOAD))
! return 0;
! return cost;
! }
dep_rtx = PATTERN (dep_insn);
*************** s390_adjust_cost (insn, link, dep_insn,
*** 3687,3698 ****
--- 3704,3750 ----
return cost;
}
+ /* A C statement (sans semicolon) to update the integer scheduling priority
+ INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
+ reduce the priority to execute INSN later. Do not define this macro if
+ you do not need to adjust the scheduling priorities of insns.
+
+ A STD instruction should be scheduled earlier,
+ in order to use the bypass. */
+
+ static int
+ s390_adjust_priority (insn, priority)
+ rtx insn ATTRIBUTE_UNUSED;
+ int priority;
+ {
+ if (! INSN_P (insn))
+ return priority;
+
+ if (s390_tune != PROCESSOR_2084_Z990)
+ return priority;
+
+ switch (s390_safe_attr_type (insn))
+ {
+ case TYPE_FSTORED:
+ case TYPE_FSTORES:
+ priority = priority << 3;
+ break;
+ case TYPE_STORE:
+ priority = priority << 1;
+ break;
+ default:
+ break;
+ }
+ return priority;
+ }
/* The number of instructions that can be issued per cycle. */
static int
s390_issue_rate ()
{
+ if (s390_tune == PROCESSOR_2084_Z990)
+ return 3;
return 1;
}
*************** s390_issue_rate ()
*** 3702,3712 ****
static int
s390_use_dfa_pipeline_interface ()
{
! if (s390_tune == PROCESSOR_2064_Z900)
return 1;
return 0;
}
/* Split all branches that exceed the maximum distance.
Returns true if this created a new literal pool entry.
--- 3754,3786 ----
static int
s390_use_dfa_pipeline_interface ()
{
! if (s390_tune == PROCESSOR_2064_Z900
! || s390_tune == PROCESSOR_2084_Z990)
return 1;
+
return 0;
+ }
+
+ static int
+ s390_first_cycle_multipass_dfa_lookahead ()
+ {
+ return s390_use_dfa_pipeline_interface () ? 4 : 0;
+ }
+
+ /* Called after issuing each insn.
+ Triggers default sort algorithm to better slot instructions. */
+ static int
+ s390_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
+ FILE *dump ATTRIBUTE_UNUSED;
+ int sched_verbose ATTRIBUTE_UNUSED;
+ rtx *ready ATTRIBUTE_UNUSED;
+ int *pn_ready ATTRIBUTE_UNUSED;
+ int clock_var ATTRIBUTE_UNUSED;
+ {
+ return s390_issue_rate();
}
+
/* Split all branches that exceed the maximum distance.
Returns true if this created a new literal pool entry.
Index: gcc/config/s390/s390.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/s390/s390.md,v
retrieving revision 1.57
diff -c -p -r1.57 s390.md
*** gcc/config/s390/s390.md 30 Jun 2003 19:47:22 -0000 1.57
--- gcc/config/s390/s390.md 30 Jun 2003 19:57:05 -0000
***************
*** 219,224 ****
--- 219,225 ----
;; Pipeline description for z900
(include "2064.md")
+ (include "2084.md")
;; Length in bytes.
*** /dev/null Fri Mar 21 21:55:32 2003
--- gcc/config/s390/2084.md Mon Jun 30 21:56:09 2003
***************
*** 0 ****
--- 1,262 ----
+ ;; Scheduling description for z990 (cpu 2084).
+ ;; Copyright (C) 2003 Free Software Foundation, Inc.
+ ;; Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+ ;; Ulrich Weigand (uweigand@de.ibm.com).
+ ;;
+ ;; This file is part of GNU CC.
+ ;;
+ ;; GNU CC is free software; you can redistribute it and/or modify
+ ;; it under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ ;;
+ ;; GNU CC is distributed in the hope that it will be useful,
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ;; GNU General Public License for more details.
+ ;;
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GNU CC; see the file COPYING. If not, write to
+ ;; the Free Software Foundation, 59 Temple Place - Suite 330,
+ ;; Boston, MA 02111-1307, USA.
+
+ (define_automaton "x_ipu")
+
+ (define_cpu_unit "x_e1_r,x_e1_s,x_e1_t" "x_ipu")
+ (define_cpu_unit "x_wr_r,x_wr_s,x_wr_t,x_wr_fp" "x_ipu")
+ (define_cpu_unit "x_s1,x_s2,x_s3,x_s4" "x_ipu")
+ (define_cpu_unit "x_t1,x_t2,x_t3,x_t4" "x_ipu")
+ (define_cpu_unit "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6" "x_ipu")
+ (define_cpu_unit "x_store_tok" "x_ipu")
+ (define_cpu_unit "x_ms,x_mt" "x_ipu")
+
+ (define_reservation "x-e1-st" "(x_e1_s | x_e1_t)")
+
+ (define_reservation "x-e1-np" "(x_e1_r + x_e1_s + x_e1_t)")
+
+ (absence_set "x_e1_r" "x_e1_s,x_e1_t")
+ (absence_set "x_e1_s" "x_e1_t")
+
+ ;; Try to avoid int <-> fp transitions.
+
+ (define_reservation "x-x" "x_s1|x_t1,x_s2|x_t2,x_s3|x_t3,x_s4|x_t4")
+ (define_reservation "x-f" "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6")
+ (define_reservation "x-wr-st" "((x_wr_s | x_wr_t),x-x)")
+ (define_reservation "x-wr-np" "((x_wr_r + x_wr_s + x_wr_t),x-x)")
+ (define_reservation "x-wr-fp" "x_wr_fp,x-f")
+ (define_reservation "x-mem" "x_ms|x_mt")
+
+ (absence_set "x_wr_fp"
+ "x_s1,x_s2,x_s3,x_s4,x_t1,x_t2,x_t3,x_t4,x_wr_s,x_wr_t")
+
+ (absence_set "x_e1_r,x_wr_r,x_wr_s,x_wr_t"
+ "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6,x_wr_fp")
+
+ ;; Don't have any load type insn in same group as store
+
+ (absence_set "x_ms,x_mt" "x_store_tok")
+
+
+ ;;
+ ;; Simple insns
+ ;;
+
+ (define_insn_reservation "x_lr" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "lr"))
+ "x-e1-st,x-wr-st")
+
+ (define_insn_reservation "x_la" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "la"))
+ "x-e1-st,x-wr-st")
+
+ (define_insn_reservation "x_larl" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "larl"))
+ "x-e1-st,x-wr-st")
+
+ (define_insn_reservation "x_load" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "load"))
+ "x-e1-st+x-mem,x-wr-st")
+
+ (define_insn_reservation "x_store" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "store"))
+ "x-e1-st+x_store_tok,x-wr-st")
+
+ (define_insn_reservation "x_branch" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "branch"))
+ "x_e1_r,x_wr_r")
+
+ (define_insn_reservation "x_call" 5
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "jsr"))
+ "x-e1-np*5,x-wr-np")
+
+ ;;
+ ;; Multicycle insns
+ ;;
+
+ (define_insn_reservation "x_ss" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "op_type" "SS"))
+ "x-e1-np,x-wr-np")
+
+ (define_insn_reservation "x_stm" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "stm"))
+ "(x-e1-np+x_store_tok)*10,x-wr-np")
+
+ (define_insn_reservation "x_lm" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "lm"))
+ "x-e1-np*10,x-wr-np")
+
+ (define_insn_reservation "x_nn" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "op_type" "NN"))
+ "x-e1-np,x-wr-np")
+
+ (define_insn_reservation "x_o2" 2
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "o2"))
+ "x-e1-np*2,x-wr-np")
+
+ (define_insn_reservation "x_o3" 3
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "o3"))
+ "x-e1-np*3,x-wr-np")
+
+ ;;
+ ;; Floating point insns
+ ;;
+
+ (define_insn_reservation "x_fsimpd" 6
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "fsimpd,fmuld"))
+ "x_e1_t,x-wr-fp")
+
+ (define_insn_reservation "x_fsimps" 6
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "fsimps,fmuls"))
+ "x_e1_t,x-wr-fp")
+
+ (define_insn_reservation "x_fdivd" 36
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "fdivd"))
+ "x_e1_t*30,x-wr-fp")
+
+ (define_insn_reservation "x_fdivs" 36
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "fdivs"))
+ "x_e1_t*30,x-wr-fp")
+
+ (define_insn_reservation "x_floadd" 6
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "floadd"))
+ "x_e1_t,x-wr-fp")
+
+ (define_insn_reservation "x_floads" 6
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "floads"))
+ "x_e1_t,x-wr-fp")
+
+ (define_insn_reservation "x_fstored" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "fstored"))
+ "x_e1_t,x-wr-fp")
+
+ (define_insn_reservation "x_fstores" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "fstores"))
+ "x_e1_t,x-wr-fp")
+
+ (define_insn_reservation "x_ftoi" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "ftoi"))
+ "x_e1_t*3,x-wr-fp")
+
+ (define_insn_reservation "x_itof" 7
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "type" "itof"))
+ "x_e1_t*3,x-wr-fp")
+
+ (define_bypass 1 "x_fsimpd" "x_fstored")
+
+ (define_bypass 1 "x_fsimps" "x_fstores")
+
+ (define_bypass 1 "x_floadd" "x_fsimpd,x_fstored,x_floadd")
+
+ (define_bypass 1 "x_floads" "x_fsimps,x_fstores,x_floads")
+
+ ;;
+ ;; Insns still not mentioned are checked for
+ ;; the usage of the agen unit
+ ;;
+
+ (define_insn_reservation "x_int" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "atype" "reg"))
+ "x-e1-st,x-wr-st")
+
+ (define_insn_reservation "x_agen" 1
+ (and (eq_attr "cpu" "z990")
+ (eq_attr "atype" "agen"))
+ "x-e1-st+x-mem,x-wr-st")
+
+ ;;
+ ;; s390_agen_dep_p returns 1, if a register is set in the
+ ;; first insn and used in the dependend insn to form a address.
+ ;;
+
+ ;;
+ ;; If a intruction uses a register to address memory, it needs
+ ;; to be set 5 cycles in advance.
+ ;;
+
+ (define_bypass 5 "x_int,x_agen,x_lr"
+ "x_agen,x_la,x_call,x_load,x_store,x_ss,x_stm,x_lm"
+ "s390_agen_dep_p")
+
+ (define_bypass 9 "x_int,x_agen,x_lr"
+ "x_floadd, x_floads, x_fstored, x_fstores,\
+ x_fsimpd, x_fsimps, x_fdivd, x_fdivs"
+ "s390_agen_dep_p")
+ ;;
+ ;; A load type instruction uses a bypass to feed the result back
+ ;; to the address generation pipeline stage.
+ ;;
+
+ (define_bypass 4 "x_load"
+ "x_agen,x_la,x_call,x_load,x_store,x_ss,x_stm,x_lm"
+ "s390_agen_dep_p")
+
+ (define_bypass 5 "x_load"
+ "x_floadd, x_floads, x_fstored, x_fstores,\
+ x_fsimpd, x_fsimps, x_fdivd, x_fdivs"
+ "s390_agen_dep_p")
+
+ ;;
+ ;; A load address type instruction uses a bypass to feed the
+ ;; result back to the address generation pipeline stage.
+ ;;
+
+ (define_bypass 3 "x_larl,x_la"
+ "x_agen,x_la,x_call,x_load,x_store,x_ss,x_stm,x_lm"
+ "s390_agen_dep_p")
+
+ (define_bypass 5 "x_larl, x_la"
+ "x_floadd, x_floads, x_fstored, x_fstores,\
+ x_fsimpd, x_fsimps, x_fdivd, x_fdivs"
+ "s390_agen_dep_p")
+
+ ;;
+ ;; Operand forwarding
+ ;;
+
+ (define_bypass 0 "x_lr,x_la,x_load" "x_int,x_lr")
+
+
--
Dr. Ulrich Weigand
weigand@informatik.uni-erlangen.de