+2002-05-05 Richard Henderson <rth@redhat.com>
+
+ * config/alpha/alpha.c (alpha_adjust_cost): Remove everything but
+ memory latency adjustments.
+ (alpha_variable_issue): Remove.
+ (alpha_use_dfa_pipeline_interface): New.
+ (alpha_multipass_dfa_lookahead): New.
+ * config/alpha/alpha.md: Remove define_function_unit scheduling;
+ include new dfa scheduling.
+ (attr type): Add none.
+ (blockage): Use it.
+ * config/alpha/ev4.md: New.
+ * config/alpha/ev5.md: New.
+ * config/alpha/ev6.md: New.
+
2002-05-05 David S. Miller <davem@redhat.com>
* recog.c (store_data_bypass_p): Handle CLOBBER inside PARALLEL.
PARAMS ((rtx, rtx, rtx, int));
static int alpha_issue_rate
PARAMS ((void));
-static int alpha_variable_issue
- PARAMS ((FILE *, int, rtx, int));
+static int alpha_use_dfa_pipeline_interface
+ PARAMS ((void));
+static int alpha_multipass_dfa_lookahead
+ PARAMS ((void));
#if TARGET_ABI_UNICOSMK
static void alpha_init_machine_status
#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
-#undef TARGET_SCHED_VARIABLE_ISSUE
-#define TARGET_SCHED_VARIABLE_ISSUE alpha_variable_issue
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
+ alpha_use_dfa_pipeline_interface
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+ alpha_multipass_dfa_lookahead
struct gcc_target targetm = TARGET_INITIALIZER;
\f
/* If the dependence is an anti-dependence, there is no cost. For an
output dependence, there is sometimes a cost, but it doesn't seem
worth handling those few cases. */
-
if (REG_NOTE_KIND (link) != 0)
- return 0;
+ return cost;
/* If we can't recognize the insns, we can't really do anything. */
if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
|| dep_insn_type == TYPE_LDSYM)
cost += alpha_memory_latency-1;
- switch (alpha_cpu)
- {
- case PROCESSOR_EV4:
- /* On EV4, if INSN is a store insn and DEP_INSN is setting the data
- being stored, we can sometimes lower the cost. */
-
- if ((insn_type == TYPE_IST || insn_type == TYPE_FST)
- && (set = single_set (dep_insn)) != 0
- && GET_CODE (PATTERN (insn)) == SET
- && rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
- {
- switch (dep_insn_type)
- {
- case TYPE_ILD:
- case TYPE_FLD:
- /* No savings here. */
- return cost;
-
- case TYPE_IMUL:
- /* In these cases, we save one cycle. */
- return cost - 1;
-
- default:
- /* In all other cases, we save two cycles. */
- return MAX (0, cost - 2);
- }
- }
+ /* Everything else handled in DFA bypasses now. */
- /* Another case that needs adjustment is an arithmetic or logical
- operation. It's cost is usually one cycle, but we default it to
- two in the MD file. The only case that it is actually two is
- for the address in loads, stores, and jumps. */
-
- if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG)
- {
- switch (insn_type)
- {
- case TYPE_ILD:
- case TYPE_IST:
- case TYPE_FLD:
- case TYPE_FST:
- case TYPE_JSR:
- return cost;
- default:
- return 1;
- }
- }
-
- /* The final case is when a compare feeds into an integer branch;
- the cost is only one cycle in that case. */
-
- if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR)
- return 1;
- break;
-
- case PROCESSOR_EV5:
- /* And the lord DEC saith: "A special bypass provides an effective
- latency of 0 cycles for an ICMP or ILOG insn producing the test
- operand of an IBR or ICMOV insn." */
-
- if ((dep_insn_type == TYPE_ICMP || dep_insn_type == TYPE_ILOG)
- && (set = single_set (dep_insn)) != 0)
- {
- /* A branch only has one input. This must be it. */
- if (insn_type == TYPE_IBR)
- return 0;
- /* A conditional move has three, make sure it is the test. */
- if (insn_type == TYPE_ICMOV
- && GET_CODE (set_src = PATTERN (insn)) == SET
- && GET_CODE (set_src = SET_SRC (set_src)) == IF_THEN_ELSE
- && rtx_equal_p (SET_DEST (set), XEXP (set_src, 0)))
- return 0;
- }
-
- /* "The multiplier is unable to receive data from IEU bypass paths.
- The instruction issues at the expected time, but its latency is
- increased by the time it takes for the input data to become
- available to the multiplier" -- which happens in pipeline stage
- six, when results are comitted to the register file. */
-
- if (insn_type == TYPE_IMUL)
- {
- switch (dep_insn_type)
- {
- /* These insns produce their results in pipeline stage five. */
- case TYPE_ILD:
- case TYPE_ICMOV:
- case TYPE_IMUL:
- case TYPE_MVI:
- return cost + 1;
-
- /* Other integer insns produce results in pipeline stage four. */
- default:
- return cost + 2;
- }
- }
- break;
-
- case PROCESSOR_EV6:
- /* There is additional latency to move the result of (most) FP
- operations anywhere but the FP register file. */
-
- if ((insn_type == TYPE_FST || insn_type == TYPE_FTOI)
- && (dep_insn_type == TYPE_FADD ||
- dep_insn_type == TYPE_FMUL ||
- dep_insn_type == TYPE_FCMOV))
- return cost + 2;
-
- break;
- }
-
- /* Otherwise, return the default cost. */
return cost;
}
-/* Function to initialize the issue rate used by the scheduler. */
+/* The number of instructions that can be issued per cycle. */
+
static int
alpha_issue_rate ()
{
}
static int
-alpha_variable_issue (dump, verbose, insn, cim)
- FILE *dump ATTRIBUTE_UNUSED;
- int verbose ATTRIBUTE_UNUSED;
- rtx insn;
- int cim;
+alpha_use_dfa_pipeline_interface ()
{
- if (recog_memoized (insn) < 0 || get_attr_type (insn) == TYPE_MULTI)
- return 0;
-
- return cim - 1;
+ return true;
}
+/* How many alternative schedules to try. This should be as wide as the
+ scheduling freedom in the DFA, but no wider. Making this value too
+ large results extra work for the scheduler.
+
+ For EV4, loads can be issued to either IB0 or IB1, thus we have 2
+ alternative schedules. For EV5, we can choose between E0/E1 and
+ FA/FM. For EV6, an arithmatic insn can be issued to U0/U1/L0/L1. */
+
+static int
+alpha_multipass_dfa_lookahead ()
+{
+ return (alpha_cpu == PROCESSOR_EV6 ? 4 : 2);
+}
\f
/* Register global variables and machine-specific functions with the
garbage collector. */
(define_attr "type"
"ild,fld,ldsym,ist,fst,ibr,fbr,jsr,iadd,ilog,shift,icmov,fcmov,icmp,imul,\
-fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi"
+fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
(const_string "iadd"))
;; Describe a user's asm statement.
(define_attr "length" ""
(const_int 4))
\f
-;; On EV4 there are two classes of resources to consider: resources needed
-;; to issue, and resources needed to execute. IBUS[01] are in the first
-;; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
-;; (There are a few other register-like resources, but ...)
-
-; First, describe all of the issue constraints with single cycle delays.
-; All insns need a bus, but all except loads require one or the other.
-(define_function_unit "ev4_ibus0" 1 0
- (and (eq_attr "cpu" "ev4")
- (eq_attr "type" "fst,fbr,iadd,imul,ilog,shift,icmov,icmp"))
- 1 1)
-
-(define_function_unit "ev4_ibus1" 1 0
- (and (eq_attr "cpu" "ev4")
- (eq_attr "type" "ist,ibr,jsr,fadd,fcmov,fcpys,fmul,fdiv,misc"))
- 1 1)
-
-; Memory delivers its result in three cycles. Actually return one and
-; take care of this in adjust_cost, since we want to handle user-defined
-; memory latencies.
-(define_function_unit "ev4_abox" 1 0
- (and (eq_attr "cpu" "ev4")
- (eq_attr "type" "ild,fld,ldsym,ist,fst"))
- 1 1)
-
-; Branches have no delay cost, but do tie up the unit for two cycles.
-(define_function_unit "ev4_bbox" 1 1
- (and (eq_attr "cpu" "ev4")
- (eq_attr "type" "ibr,fbr,jsr"))
- 2 2)
-
-; Arithmetic insns are normally have their results available after
-; two cycles. There are a number of exceptions. They are encoded in
-; ADJUST_COST. Some of the other insns have similar exceptions.
-(define_function_unit "ev4_ebox" 1 0
- (and (eq_attr "cpu" "ev4")
- (eq_attr "type" "iadd,ilog,shift,icmov,icmp,misc"))
- 2 1)
-
-(define_function_unit "imul" 1 0
- (and (eq_attr "cpu" "ev4")
- (and (eq_attr "type" "imul")
- (eq_attr "opsize" "si")))
- 21 19)
-
-(define_function_unit "imul" 1 0
- (and (eq_attr "cpu" "ev4")
- (and (eq_attr "type" "imul")
- (eq_attr "opsize" "!si")))
- 23 21)
-
-(define_function_unit "ev4_fbox" 1 0
- (and (eq_attr "cpu" "ev4")
- (eq_attr "type" "fadd,fmul,fcpys,fcmov"))
- 6 1)
-
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ev4")
- (and (eq_attr "type" "fdiv")
- (eq_attr "opsize" "si")))
- 34 30)
-
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ev4")
- (and (eq_attr "type" "fdiv")
- (eq_attr "opsize" "di")))
- 63 59)
-\f
-;; EV5 scheduling. EV5 can issue 4 insns per clock.
-;;
-;; EV5 has two asymetric integer units. Model this with E0 & E1 along
-;; with the combined resource EBOX.
-
-(define_function_unit "ev5_ebox" 2 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv"))
- 1 1)
-
-; Memory takes at least 2 clocks. Return one from here and fix up with
-; user-defined latencies in adjust_cost.
-(define_function_unit "ev5_ebox" 2 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "ild,fld,ldsym"))
- 1 1)
-
-; Loads can dual issue with one another, but loads and stores do not mix.
-(define_function_unit "ev5_e0" 1 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "ild,fld,ldsym"))
- 1 1
- [(eq_attr "type" "ist,fst")])
-
-; Stores, shifts, multiplies can only issue to E0
-(define_function_unit "ev5_e0" 1 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "ist,fst,shift,imul"))
- 1 1)
-
-; Motion video insns also issue only to E0, and take two ticks.
-(define_function_unit "ev5_e0" 1 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "mvi"))
- 2 1)
-
-; Conditional moves always take 2 ticks.
-(define_function_unit "ev5_ebox" 2 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "icmov"))
- 2 1)
-
-; Branches can only issue to E1
-(define_function_unit "ev5_e1" 1 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "ibr,jsr"))
- 1 1)
-
-; Multiplies also use the integer multiplier.
-; ??? How to: "No instruction can be issued to pipe E0 exactly two
-; cycles before an integer multiplication completes."
-(define_function_unit "imul" 1 0
- (and (eq_attr "cpu" "ev5")
- (and (eq_attr "type" "imul")
- (eq_attr "opsize" "si")))
- 8 4)
-
-(define_function_unit "imul" 1 0
- (and (eq_attr "cpu" "ev5")
- (and (eq_attr "type" "imul")
- (eq_attr "opsize" "di")))
- 12 8)
-
-(define_function_unit "imul" 1 0
- (and (eq_attr "cpu" "ev5")
- (and (eq_attr "type" "imul")
- (eq_attr "opsize" "udi")))
- 14 8)
-
-;; Similarly for the FPU we have two asymetric units. But fcpys can issue
-;; on either so we have to play the game again.
-
-(define_function_unit "ev5_fbox" 2 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "fadd,fcmov,fmul,fcpys,fbr,fdiv"))
- 4 1)
-
-(define_function_unit "ev5_fm" 1 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "fmul"))
- 4 1)
-
-; Add and cmov as you would expect; fbr never produces a result;
-; fdiv issues through fa to the divider,
-(define_function_unit "ev5_fa" 1 0
- (and (eq_attr "cpu" "ev5")
- (eq_attr "type" "fadd,fcmov,fbr,fdiv"))
- 4 1)
-
-; ??? How to: "No instruction can be issued to pipe FA exactly five
-; cycles before a floating point divide completes."
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ev5")
- (and (eq_attr "type" "fdiv")
- (eq_attr "opsize" "si")))
- 15 15) ; 15 to 31 data dependent
-
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ev5")
- (and (eq_attr "type" "fdiv")
- (eq_attr "opsize" "di")))
- 22 22) ; 22 to 60 data dependent
-\f
-;; EV6 scheduling. EV6 can issue 4 insns per clock.
-;;
-;; EV6 has two symmetric pairs ("clusters") of two asymetric integer units
-;; ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
-
-;; Conditional moves decompose into two independent primitives, each
-;; taking one cycle. Since ev6 is out-of-order, we can't see anything
-;; but two cycles.
-(define_function_unit "ev6_ebox" 4 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "icmov"))
- 2 1)
-
-(define_function_unit "ev6_ebox" 4 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv,fsqrt"))
- 1 1)
-
-;; Integer loads take at least 3 clocks, and only issue to lower units.
-;; Return one from here and fix up with user-defined latencies in adjust_cost.
-(define_function_unit "ev6_l" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "ild,ldsym,ist,fst"))
- 1 1)
-
-;; FP loads take at least 4 clocks. Return two from here...
-(define_function_unit "ev6_l" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "fld"))
- 2 1)
-
-;; Motion video insns also issue only to U0, and take three ticks.
-(define_function_unit "ev6_u0" 1 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "mvi"))
- 3 1)
-
-(define_function_unit "ev6_u" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "mvi"))
- 3 1)
-
-;; Shifts issue to either upper pipe.
-(define_function_unit "ev6_u" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "shift"))
- 1 1)
-
-;; Multiplies issue only to U1, and all take 7 ticks.
-;; Rather than create a new function unit just for U1, reuse IMUL
-(define_function_unit "imul" 1 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "imul"))
- 7 1)
-
-(define_function_unit "ev6_u" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "imul"))
- 7 1)
-
-;; Branches issue to either upper pipe
-(define_function_unit "ev6_u" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "ibr"))
- 3 1)
-
-;; Calls only issue to L0.
-(define_function_unit "ev6_l0" 1 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "jsr"))
- 1 1)
-
-(define_function_unit "ev6_l" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "jsr"))
- 1 1)
-
-;; Ftoi/itof only issue to lower pipes
-(define_function_unit "ev6_l" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "ftoi"))
- 3 1)
-
-(define_function_unit "ev6_l" 2 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "itof"))
- 4 1)
-
-;; For the FPU we are very similar to EV5, except there's no insn that
-;; can issue to fm & fa, so we get to leave that out.
-
-(define_function_unit "ev6_fm" 1 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "fmul"))
- 4 1)
-
-(define_function_unit "ev6_fa" 1 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "fadd,fcpys,fbr,fdiv,fsqrt"))
- 4 1)
-
-(define_function_unit "ev6_fa" 1 0
- (and (eq_attr "cpu" "ev6")
- (eq_attr "type" "fcmov"))
- 8 1)
-
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ev6")
- (and (eq_attr "type" "fdiv")
- (eq_attr "opsize" "si")))
- 12 10)
-
-(define_function_unit "fdiv" 1 0
- (and (eq_attr "cpu" "ev6")
- (and (eq_attr "type" "fdiv")
- (eq_attr "opsize" "di")))
- 15 13)
-
-(define_function_unit "fsqrt" 1 0
- (and (eq_attr "cpu" "ev6")
- (and (eq_attr "type" "fsqrt")
- (eq_attr "opsize" "si")))
- 16 14)
-
-(define_function_unit "fsqrt" 1 0
- (and (eq_attr "cpu" "ev6")
- (and (eq_attr "type" "fsqrt")
- (eq_attr "opsize" "di")))
- 32 30)
-
-; ??? The FPU communicates with memory and the integer register file
-; via two fp store units. We need a slot in the fst immediately, and
-; a slot in LOW after the operand data is ready. At which point the
-; data may be moved either to the store queue or the integer register
-; file and the insn retired.
-
+;; Include scheduling descriptions.
+
+(include "ev4.md")
+(include "ev5.md")
+(include "ev6.md")
\f
;; First define the arithmetic insns. Note that the 32-bit forms also
;; sign-extend.
[(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
""
""
- [(set_attr "length" "0")])
+ [(set_attr "length" "0")
+ (set_attr "type" "none")])
(define_insn "jump"
[(set (pc)
--- /dev/null
+;; Scheduling description for Alpha EV4.
+;; Copyright (C) 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of GNU CC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+; On EV4 there are two classes of resources to consider: resources needed
+; to issue, and resources needed to execute. IBUS[01] are in the first
+; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
+; (There are a few other register-like resources, but ...)
+
+(define_automaton "ev4_0,ev4_1,ev4_2")
+(define_cpu_unit "ev4_ib0,ev4_ib1,ev4_abox,ev4_bbox" "ev4_0")
+(define_cpu_unit "ev4_ebox,ev4_imul" "ev4_1")
+(define_cpu_unit "ev4_fbox,ev4_fdiv" "ev4_2")
+(define_reservation "ev4_ib01" "ev4_ib0|ev4_ib1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev4_multi" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "multi"))
+ "ev4_ib0+ev4_ib1")
+
+; Loads from L0 completes in three cycles. adjust_cost still factors
+; in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev4_ld" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "ild,fld,ldsym"))
+ "ev4_ib01+ev4_abox")
+
+; Stores can issue before the data (but not address) is ready.
+(define_insn_reservation "ev4_ist" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "ist"))
+ "ev4_ib1+ev4_abox")
+
+(define_insn_reservation "ev4_fst" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "fst"))
+ "ev4_ib0+ev4_abox")
+
+; Branches have no delay cost, but do tie up the unit for two cycles.
+(define_insn_reservation "ev4_ibr" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "ibr,jsr"))
+ "ev4_ib1+ev4_bbox,ev4_bbox")
+
+(define_insn_reservation "ev4_fbr" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "fbr"))
+ "ev4_ib0+ev4_bbox,ev4_bbox")
+
+; Arithmetic insns are normally have their results available after
+; two cycles. There are a number of exceptions.
+
+(define_insn_reservation "ev4_iaddlog" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "iadd,ilog"))
+ "ev4_ib0+ev4_ebox")
+
+(define_bypass 1
+ "ev4_iaddlog"
+ "ev4_ibr,ev4_iaddlog,ev4_shiftcm,ev4_icmp,ev4_imulsi,ev4_imuldi")
+
+(define_insn_reservation "ev4_shiftcm" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "shift,icmov"))
+ "ev4_ib0+ev4_ebox")
+
+(define_insn_reservation "ev4_icmp" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "icmp"))
+ "ev4_ib0+ev4_ebox")
+
+(define_bypass 1 "ev4_icmp" "ev4_ibr")
+
+(define_bypass 0
+ "ev4_iaddlog,ev4_shiftcm,ev4_icmp"
+ "ev4_ist"
+ "store_data_bypass_p")
+
+; Multiplies use a non-piplined imul unit. Also, "no [ebox] insn can
+; be issued exactly three cycles before an integer multiply completes".
+
+(define_insn_reservation "ev4_imulsi" 21
+ (and (eq_attr "cpu" "ev4")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "si")))
+ "ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox")
+
+(define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p")
+
+(define_insn_reservation "ev4_imuldi" 23
+ (and (eq_attr "cpu" "ev4")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "!si")))
+ "ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox")
+
+(define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p")
+
+; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in.
+(define_insn_reservation "ev4_fpop" 6
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "fadd,fmul,fcpys,fcmov"))
+ "ev4_ib1+ev4_fbox")
+
+(define_bypass 4 "ev4_fpop" "ev4_fpop")
+
+; The floating point divider is not pipelined. Also, "no FPOP insn can be
+; issued exactly five or exactly six cycles before an fdiv insn completes".
+
+(define_insn_reservation "ev4_fdivsf" 34
+ (and (eq_attr "cpu" "ev4")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "si")))
+ "ev4_ib1+ev4_fdiv,ev4_fdiv*28,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+(define_insn_reservation "ev4_fdivdf" 63
+ (and (eq_attr "cpu" "ev4")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "di")))
+ "ev4_ib1+ev4_fdiv,ev4_fdiv*57,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+; Traps don't consume or produce data.
+(define_insn_reservation "ev4_misc" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "misc"))
+ "ev4_ib1")
--- /dev/null
+;; Scheduling description for Alpha EV5.
+;; Copyright (C) 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of GNU CC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;; EV5 has two asymetric integer units, E0 and E1, plus separate
+;; FP add and multiply units.
+
+(define_automaton "ev5_0,ev5_1")
+(define_cpu_unit "ev5_e0,ev5_e1,ev5_fa,ev5_fm" "ev5_0")
+(define_reservation "ev5_e01" "ev5_e0|ev5_e1")
+(define_reservation "ev5_fam" "ev5_fa|ev5_fm")
+(define_cpu_unit "ev5_imul" "ev5_0")
+(define_cpu_unit "ev5_fdiv" "ev5_1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev5_multi" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "multi"))
+ "ev5_e0+ev5_e1+ev5_fa+ev5_fm")
+
+; Stores can only issue to E0, and may not issue with loads.
+; Model this with some fake units.
+
+(define_cpu_unit "ev5_l0,ev5_l1,ev5_st" "ev5_0")
+(define_reservation "ev5_ld" "ev5_l0|ev5_l1")
+(exclusion_set "ev5_l0,ev5_l1" "ev5_st")
+
+(define_insn_reservation "ev5_st" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "ist,fst"))
+ "ev5_e0+ev5_st")
+
+; Loads from L0 complete in two cycles. adjust_cost still factors
+; in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev5_ld" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "ild,fld,ldsym"))
+ "ev5_e01+ev5_ld")
+
+; Integer branches slot only to E1.
+(define_insn_reservation "ev5_ibr" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "ibr"))
+ "ev5_e1")
+
+(define_insn_reservation "ev5_jsr" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "jsr"))
+ "ev5_e1")
+
+(define_insn_reservation "ev5_shiftmvi" 2
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "shift,mvi"))
+ "ev5_e0")
+
+(define_insn_reservation "ev5_cmov" 2
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "icmov"))
+ "ev5_e01")
+
+(define_insn_reservation "ev5_iadd" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "iadd"))
+ "ev5_e01")
+
+(define_insn_reservation "ev5_ilogcmp" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "ilog,icmp"))
+ "ev5_e01")
+
+; Conditional move and branch can issue the same cycle as the test.
+(define_bypass 0 "ev5_ilogcmp" "ev5_ibr,ev5_cmov" "if_test_bypass_p")
+
+; Multiplies use a non-piplined imul unit. Also, "no insn can be issued
+; to E0 exactly two cycles before an integer multiply completes".
+
+(define_insn_reservation "ev5_imull" 8
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "si")))
+ "ev5_e0+ev5_imul,ev5_imul*3,nothing,ev5_e0")
+
+(define_insn_reservation "ev5_imulq" 12
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "di")))
+ "ev5_e0+ev5_imul,ev5_imul*7,nothing,ev5_e0")
+
+(define_insn_reservation "ev5_imulh" 14
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "udi")))
+ "ev5_e0+ev5_imul,ev5_imul*7,nothing*3,ev5_e0")
+
+; The multiplier is unable to receive data from Ebox bypass paths. The
+; instruction issues at the expected time, but its latency is increased
+; by the time it takes for the input data to become available to the
+; multiplier. For example, an IMULL instruction issued one cycle later
+; than an ADDL instruction, which produced one of its operands, has a
+; latency of 10 (8 + 2). If the IMULL instruction is issued two cycles
+; later than the ADDL instruction, the latency is 9 (8 + 1).
+;
+; Model this instead with increased latency on the input instruction.
+
+(define_bypass 3
+ "ev5_ld,ev5_shiftmvi,ev5_cmov,ev5_iadd,ev5_ilogcmp"
+ "ev5_imull,ev5_imulq,ev5_imulh")
+
+(define_bypass 9 "ev5_imull" "ev5_imull,ev5_imulq,ev5_imulh")
+(define_bypass 13 "ev5_imulq" "ev5_imull,ev5_imulq,ev5_imulh")
+(define_bypass 15 "ev5_imulh" "ev5_imull,ev5_imulq,ev5_imulh")
+
+; Similarly for the FPU we have two asymetric units.
+
+(define_insn_reservation "ev5_fadd" 4
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "fadd,fcmov"))
+ "ev5_fa")
+
+(define_insn_reservation "ev5_fbr" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "fbr"))
+ "ev5_fa")
+
+(define_insn_reservation "ev5_fcpys" 4
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "fcpys"))
+ "ev5_fam")
+
+(define_insn_reservation "ev5_fmul" 4
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "fmul"))
+ "ev5_fm")
+
+; The floating point divider is not pipelined. Also, "no insn can be issued
+; to FA exactly five before an fdiv insn completes".
+;
+; ??? Do not model this late reservation due to the enormously increased
+; size of the resulting DFA.
+;
+; ??? Putting ev5_fa and ev5_fdiv alone into the same automata produces
+; a DFA of acceptable size, but putting ev5_fm and ev5_fa into separate
+; automata produces incorrect results for insns that can choose one or
+; the other, i.e. ev5_fcpys.
+
+(define_insn_reservation "ev5_fdivsf" 15
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "si")))
+ ; "ev5_fa+ev5_fdiv,ev5_fdiv*9,ev5_fa+ev5_fdiv,ev5_fdiv*4"
+ "ev5_fa+ev5_fdiv,ev5_fdiv*14")
+
+(define_insn_reservation "ev5_fdivdf" 22
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "di")))
+ ; "ev5_fa+ev5_fdiv,ev5_fdiv*17,ev5_fa+ev5_fdiv,ev5_fdiv*4"
+ "ev5_fa+ev5_fdiv,ev5_fdiv*21")
+
+; Traps don't consume or produce data; rpcc is latency 2 if we ever add it.
+(define_insn_reservation "ev5_misc" 2
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "misc"))
+ "ev5_e0")
--- /dev/null
+;; Scheduling description for Alpha EV6.
+;; Copyright (C) 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of GNU CC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+; EV6 can issue 4 insns per clock. It's out-of-order, so this isn't
+; expected to help over-much, but a precise description can be important
+; for software pipelining.
+;
+; EV6 has two symmetric pairs ("clusters") of two asymetric integer
+; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
+;
+; ??? The clusters have independent register files that are re-synced
+; every cycle. Thus there is one additional cycle of latency between
+; insns issued on different clusters. Possibly model that by duplicating
+; all EBOX insn_reservations that can issue to either cluster, increasing
+; all latencies by one, and adding bypasses within the cluster.
+;
+; ??? In addition, instruction order affects cluster issue.
+
+(define_automaton "ev6_0,ev6_1")
+(define_cpu_unit "ev6_u0,ev6_u1,ev6_l0,ev6_l1" "ev6_0")
+(define_reservation "ev6_u" "ev6_u0|ev6_u1")
+(define_reservation "ev6_l" "ev6_l0|ev6_l1")
+(define_reservation "ev6_ebox" "ev6_u|ev6_l")
+
+(define_cpu_unit "ev6_fa" "ev6_1")
+(define_cpu_unit "ev6_fm,ev6_fst0,ev6_fst1" "ev6_0")
+(define_reservation "ev6_fst" "ev6_fst0|ev6_fst1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev6_multi" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "multi"))
+ "ev6_u0+ev6_u1+ev6_l0+ev6_l1+ev6_fa+ev6_fm+ev6_fst0+ev6_fst1")
+
+; Integer loads take at least 3 clocks, and only issue to lower units.
+; adjust_cost still factors in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev6_ild" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "ild,ldsym"))
+ "ev6_l")
+
+(define_insn_reservation "ev6_ist" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "ist"))
+ "ev6_l")
+
+; FP loads take at least 4 clocks. adjust_cost still factors
+; in user-specified memory latency, so return 2 here.
+(define_insn_reservation "ev6_fld" 2
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fld"))
+ "ev6_l")
+
+; The FPU communicates with memory and the integer register file
+; via two fp store units. We need a slot in the fst immediately, and
+; a slot in LOW after the operand data is ready. At which point the
+; data may be moved either to the store queue or the integer register
+; file and the insn retired.
+
+(define_insn_reservation "ev6_fst" 3
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fst"))
+ "ev6_fst,nothing,ev6_l")
+
+; Arithmetic goes anywhere.
+(define_insn_reservation "ev6_arith" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "iadd,ilog,icmp"))
+ "ev6_ebox")
+
+; Motion video insns also issue only to U0, and take three ticks.
+(define_insn_reservation "ev6_mvi" 3
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "mvi"))
+ "ev6_u0")
+
+; Shifts issue to upper units.
+(define_insn_reservation "ev6_shift" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "shift"))
+ "ev6_u")
+
+; Multiplies issue only to U1, and all take 7 ticks.
+(define_insn_reservation "ev6_imul" 7
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "imul"))
+ "ev6_u1")
+
+; Conditional moves decompose into two independent primitives, each taking
+; one cycle. Since ev6 is out-of-order, we can't see anything but two cycles.
+(define_insn_reservation "ev6_icmov" 2
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "icmov"))
+ "ev6_ebox,ev6_ebox")
+
+; Integer branches issue to upper units
+(define_insn_reservation "ev6_ibr" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "ibr"))
+ "ev6_u")
+
+; Calls only issue to L0.
+(define_insn_reservation "ev6_jsr" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "jsr"))
+ "ev6_l0")
+
+; Ftoi/itof only issue to lower pipes.
+(define_insn_reservation "ev6_itof" 3
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "itof"))
+ "ev6_l")
+
+(define_insn_reservation "ev6_ftoi" 3
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "ftoi"))
+ "ev6_fst,nothing,ev6_l")
+
+(define_insn_reservation "ev6_fmul" 4
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fmul"))
+ "ev6_fm")
+
+(define_insn_reservation "ev6_fadd" 4
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fadd,fcpys,fbr"))
+ "ev6_fa")
+
+(define_insn_reservation "ev6_fcmov" 8
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fcmov"))
+ "ev6_fa,nothing*3,ev6_fa")
+
+(define_insn_reservation "ev6_fdivsf" 12
+ (and (eq_attr "cpu" "ev6")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "si")))
+ "ev6_fa*9")
+
+(define_insn_reservation "ev6_fdivdf" 15
+ (and (eq_attr "cpu" "ev6")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "di")))
+ "ev6_fa*12")
+
+(define_insn_reservation "ev6_sqrtsf" 18
+ (and (eq_attr "cpu" "ev6")
+ (and (eq_attr "type" "fsqrt")
+ (eq_attr "opsize" "si")))
+ "ev6_fa*15")
+
+(define_insn_reservation "ev6_sqrtdf" 33
+ (and (eq_attr "cpu" "ev6")
+ (and (eq_attr "type" "fsqrt")
+ (eq_attr "opsize" "di")))
+ "ev6_fa*30")