two dfa problems
Richard Henderson
rth@redhat.com
Fri May 3 15:39:00 GMT 2002
Both of these issues show up with -mcpu=ev5, with the patch attached,
on the fortran test case also attached.
(1) Exponential behaviour in max_issue.
While compiling (iirc) iqtest_, we have 18 instructions ready at
the beginning of the block, which as far as I can see causes us
to try 2**18 alternatives. Certainly the compiler hangs for
longer than I cared to wait.
I think the algorithm here is wrong, but I don't know how to fix
it exactly. The hack attached prevents the exponential runaway,
but I think we should not be trying alternatives with instructions
of the same type. That is, once we've tried an instruction of
type TYPE_IADD at position 0, we should not try a different insn
of type TYPE_IADD there, since it won't help produce a better
sloting.
(2) Too many instructions issued per cycle.
The .28.sched2 dump contains for MAIN__
16--> 1144 $29=unspec[$29,0x3e] 10 :ev5_e01
16--> 34 $16=$9 :ev5_e01
16--> 38 $18=0x18 :ev5_e01
16--> 40 $19=0x18 :ev5_e01
which has overcommitted both E0 and E1. The issue rate is
set to 4 because we can issue to E0,E1,FA,FM in one cycle,
but that is not the case here. I'm not sure what is going on.
r~
-------------- next part --------------
A non-text attachment was scrubbed...
Name: lfk537m.f.gz
Type: application/x-gzip
Size: 84788 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-bugs/attachments/20020503/d9916bfc/attachment.bin>
-------------- next part --------------
Index: haifa-sched.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/haifa-sched.c,v
retrieving revision 1.197
diff -c -p -d -r1.197 haifa-sched.c
*** haifa-sched.c 3 May 2002 00:05:50 -0000 1.197
--- haifa-sched.c 3 May 2002 22:25:06 -0000
*************** static rtx move_insn PARAMS ((rtx, rtx))
*** 371,377 ****
on the first cycle. It is used only for DFA based scheduler. */
static rtx ready_element PARAMS ((struct ready_list *, int));
static rtx ready_remove PARAMS ((struct ready_list *, int));
! static int max_issue PARAMS ((struct ready_list *, state_t, int *));
static rtx choose_ready PARAMS ((struct ready_list *));
--- 371,377 ----
on the first cycle. It is used only for DFA based scheduler. */
static rtx ready_element PARAMS ((struct ready_list *, int));
static rtx ready_remove PARAMS ((struct ready_list *, int));
! static int max_issue PARAMS ((struct ready_list *, state_t, int *, int));
static rtx choose_ready PARAMS ((struct ready_list *));
*************** move_insn (insn, last)
*** 1807,1821 ****
first cycle multipass scheduling. */
static int
! max_issue (ready, state, index)
struct ready_list *ready;
state_t state;
int *index;
{
int i, best, n, temp_index, delay;
state_t temp_state;
rtx insn;
- int max_lookahead = (*targetm.sched.first_cycle_multipass_dfa_lookahead) ();
if (state_dead_lock_p (state))
return 0;
--- 1807,1821 ----
first cycle multipass scheduling. */
static int
! max_issue (ready, state, index, max_lookahead)
struct ready_list *ready;
state_t state;
int *index;
+ int max_lookahead;
{
int i, best, n, temp_index, delay;
state_t temp_state;
rtx insn;
if (state_dead_lock_p (state))
return 0;
*************** max_issue (ready, state, index)
*** 1858,1871 ****
else if (delay > 0)
continue;
! --max_lookahead;
!
! if (max_lookahead < 0)
break;
ready_try [i] = 1;
! n = max_issue (ready, temp_state, &temp_index);
if (n > 0 || ready_try[0])
n += 1;
--- 1858,1869 ----
else if (delay > 0)
continue;
! if (--max_lookahead < 0)
break;
ready_try [i] = 1;
! n = max_issue (ready, temp_state, &temp_index, max_lookahead);
if (n > 0 || ready_try[0])
n += 1;
*************** static rtx
*** 1888,1906 ****
choose_ready (ready)
struct ready_list *ready;
{
! if (!targetm.sched.first_cycle_multipass_dfa_lookahead
! || (*targetm.sched.first_cycle_multipass_dfa_lookahead) () <= 0)
! return ready_remove_first (ready);
else
! {
! /* Try to choose the better insn. */
! int index;
! if (max_issue (ready, curr_state, &index) == 0)
! return ready_remove_first (ready);
! else
! return ready_remove (ready, index);
! }
}
/* Called from backends from targetm.sched.reorder to emit stuff into
--- 1886,1903 ----
choose_ready (ready)
struct ready_list *ready;
{
! int max_lookahead, index;
!
! if (!targetm.sched.first_cycle_multipass_dfa_lookahead)
! max_lookahead = 0;
else
! max_lookahead = (*targetm.sched.first_cycle_multipass_dfa_lookahead) ();
! if (max_lookahead <= 0
! || max_issue (ready, curr_state, &index, max_lookahead) == 0)
! return ready_remove_first (ready);
! else
! return ready_remove (ready, index);
}
/* Called from backends from targetm.sched.reorder to emit stuff into
Index: config/alpha/alpha.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/alpha/alpha.c,v
retrieving revision 1.245
diff -c -p -d -r1.245 alpha.c
*** config/alpha/alpha.c 10 Apr 2002 05:22:32 -0000 1.245
--- config/alpha/alpha.c 3 May 2002 22:25:06 -0000
*************** static int alpha_adjust_cost
*** 157,164 ****
PARAMS ((rtx, rtx, rtx, int));
static int alpha_issue_rate
PARAMS ((void));
! static int alpha_variable_issue
! PARAMS ((FILE *, int, rtx, int));
#if TARGET_ABI_UNICOSMK
static void alpha_init_machine_status
--- 157,166 ----
PARAMS ((rtx, rtx, rtx, int));
static int alpha_issue_rate
PARAMS ((void));
! static int alpha_use_dfa_pipeline_interface
! PARAMS ((void));
! static int alpha_multipass_dfa_lookahead
! PARAMS ((void));
#if TARGET_ABI_UNICOSMK
static void alpha_init_machine_status
*************** static unsigned int unicosmk_section_typ
*** 231,238 ****
#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
! #undef TARGET_SCHED_VARIABLE_ISSUE
! #define TARGET_SCHED_VARIABLE_ISSUE alpha_variable_issue
struct gcc_target targetm = TARGET_INITIALIZER;
--- 233,244 ----
#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
! #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
! #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
! alpha_use_dfa_pipeline_interface
! #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
! #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
! alpha_multipass_dfa_lookahead
struct gcc_target targetm = TARGET_INITIALIZER;
*************** alpha_adjust_cost (insn, link, dep_insn,
*** 4822,4836 ****
rtx dep_insn;
int cost;
{
- rtx set, set_src;
enum attr_type insn_type, dep_insn_type;
/* If the dependence is an anti-dependence, there is no cost. For an
output dependence, there is sometimes a cost, but it doesn't seem
worth handling those few cases. */
-
if (REG_NOTE_KIND (link) != 0)
! return 0;
/* If we can't recognize the insns, we can't really do anything. */
if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
--- 4828,4840 ----
rtx dep_insn;
int cost;
{
enum attr_type insn_type, dep_insn_type;
/* If the dependence is an anti-dependence, there is no cost. For an
output dependence, there is sometimes a cost, but it doesn't seem
worth handling those few cases. */
if (REG_NOTE_KIND (link) != 0)
! return cost;
/* If we can't recognize the insns, we can't really do anything. */
if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
*************** alpha_adjust_cost (insn, link, dep_insn,
*** 4845,4966 ****
|| dep_insn_type == TYPE_LDSYM)
cost += alpha_memory_latency-1;
! switch (alpha_cpu)
! {
! case PROCESSOR_EV4:
! /* On EV4, if INSN is a store insn and DEP_INSN is setting the data
! being stored, we can sometimes lower the cost. */
!
! if ((insn_type == TYPE_IST || insn_type == TYPE_FST)
! && (set = single_set (dep_insn)) != 0
! && GET_CODE (PATTERN (insn)) == SET
! && rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
! {
! switch (dep_insn_type)
! {
! case TYPE_ILD:
! case TYPE_FLD:
! /* No savings here. */
! return cost;
!
! case TYPE_IMUL:
! /* In these cases, we save one cycle. */
! return cost - 1;
!
! default:
! /* In all other cases, we save two cycles. */
! return MAX (0, cost - 2);
! }
! }
!
! /* Another case that needs adjustment is an arithmetic or logical
! operation. It's cost is usually one cycle, but we default it to
! two in the MD file. The only case that it is actually two is
! for the address in loads, stores, and jumps. */
!
! if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG)
! {
! switch (insn_type)
! {
! case TYPE_ILD:
! case TYPE_IST:
! case TYPE_FLD:
! case TYPE_FST:
! case TYPE_JSR:
! return cost;
! default:
! return 1;
! }
! }
!
! /* The final case is when a compare feeds into an integer branch;
! the cost is only one cycle in that case. */
!
! if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR)
! return 1;
! break;
!
! case PROCESSOR_EV5:
! /* And the lord DEC saith: "A special bypass provides an effective
! latency of 0 cycles for an ICMP or ILOG insn producing the test
! operand of an IBR or ICMOV insn." */
!
! if ((dep_insn_type == TYPE_ICMP || dep_insn_type == TYPE_ILOG)
! && (set = single_set (dep_insn)) != 0)
! {
! /* A branch only has one input. This must be it. */
! if (insn_type == TYPE_IBR)
! return 0;
! /* A conditional move has three, make sure it is the test. */
! if (insn_type == TYPE_ICMOV
! && GET_CODE (set_src = PATTERN (insn)) == SET
! && GET_CODE (set_src = SET_SRC (set_src)) == IF_THEN_ELSE
! && rtx_equal_p (SET_DEST (set), XEXP (set_src, 0)))
! return 0;
! }
!
! /* "The multiplier is unable to receive data from IEU bypass paths.
! The instruction issues at the expected time, but its latency is
! increased by the time it takes for the input data to become
! available to the multiplier" -- which happens in pipeline stage
! six, when results are comitted to the register file. */
!
! if (insn_type == TYPE_IMUL)
! {
! switch (dep_insn_type)
! {
! /* These insns produce their results in pipeline stage five. */
! case TYPE_ILD:
! case TYPE_ICMOV:
! case TYPE_IMUL:
! case TYPE_MVI:
! return cost + 1;
!
! /* Other integer insns produce results in pipeline stage four. */
! default:
! return cost + 2;
! }
! }
! break;
!
! case PROCESSOR_EV6:
! /* There is additional latency to move the result of (most) FP
! operations anywhere but the FP register file. */
!
! if ((insn_type == TYPE_FST || insn_type == TYPE_FTOI)
! && (dep_insn_type == TYPE_FADD ||
! dep_insn_type == TYPE_FMUL ||
! dep_insn_type == TYPE_FCMOV))
! return cost + 2;
!
! break;
! }
- /* Otherwise, return the default cost. */
return cost;
}
! /* Function to initialize the issue rate used by the scheduler. */
static int
alpha_issue_rate ()
{
--- 4849,4861 ----
|| dep_insn_type == TYPE_LDSYM)
cost += alpha_memory_latency-1;
! /* Everything else handled in DFA bypasses now. */
return cost;
}
! /* The number of instructions that can be issued per cycle. */
!
static int
alpha_issue_rate ()
{
*************** alpha_issue_rate ()
*** 4968,4985 ****
}
static int
! alpha_variable_issue (dump, verbose, insn, cim)
! FILE *dump ATTRIBUTE_UNUSED;
! int verbose ATTRIBUTE_UNUSED;
! rtx insn;
! int cim;
{
! if (recog_memoized (insn) < 0 || get_attr_type (insn) == TYPE_MULTI)
! return 0;
!
! return cim - 1;
}
/* Register global variables and machine-specific functions with the
garbage collector. */
--- 4863,4886 ----
}
static int
! alpha_use_dfa_pipeline_interface ()
{
! return 1;
}
+ /* How many alternative schedules to try. This should be as wide as the
+ scheduling freedom in the DFA, but no wider. Making this value too
+ large results in exponential behaviour in the scheduler.
+
+ For EV4, loads can be issued to either IB0 or IB1, thus we have 2
+ alternative schedules. For EV5, we can choose between E0/E1 and
+ FA/FM. For EV6, an arithmatic insn can be issued to U0/U1/L0/L1. */
+
+ static int
+ alpha_multipass_dfa_lookahead ()
+ {
+ return (alpha_cpu == PROCESSOR_EV6 ? 4 : 2);
+ }
/* Register global variables and machine-specific functions with the
garbage collector. */
Index: config/alpha/alpha.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/alpha/alpha.md,v
retrieving revision 1.186
diff -c -p -d -r1.186 alpha.md
*** config/alpha/alpha.md 10 Apr 2002 05:22:33 -0000 1.186
--- config/alpha/alpha.md 3 May 2002 22:25:07 -0000
***************
*** 78,84 ****
(define_attr "type"
"ild,fld,ldsym,ist,fst,ibr,fbr,jsr,iadd,ilog,shift,icmov,fcmov,icmp,imul,\
! fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi"
(const_string "iadd"))
;; Describe a user's asm statement.
--- 78,84 ----
(define_attr "type"
"ild,fld,ldsym,ist,fst,ibr,fbr,jsr,iadd,ilog,shift,icmov,fcmov,icmp,imul,\
! fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
(const_string "iadd"))
;; Describe a user's asm statement.
*************** fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi
*** 123,435 ****
(define_attr "length" ""
(const_int 4))
! ;; On EV4 there are two classes of resources to consider: resources needed
! ;; to issue, and resources needed to execute. IBUS[01] are in the first
! ;; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
! ;; (There are a few other register-like resources, but ...)
!
! ; First, describe all of the issue constraints with single cycle delays.
! ; All insns need a bus, but all except loads require one or the other.
! (define_function_unit "ev4_ibus0" 1 0
! (and (eq_attr "cpu" "ev4")
! (eq_attr "type" "fst,fbr,iadd,imul,ilog,shift,icmov,icmp"))
! 1 1)
!
! (define_function_unit "ev4_ibus1" 1 0
! (and (eq_attr "cpu" "ev4")
! (eq_attr "type" "ist,ibr,jsr,fadd,fcmov,fcpys,fmul,fdiv,misc"))
! 1 1)
!
! ; Memory delivers its result in three cycles. Actually return one and
! ; take care of this in adjust_cost, since we want to handle user-defined
! ; memory latencies.
! (define_function_unit "ev4_abox" 1 0
! (and (eq_attr "cpu" "ev4")
! (eq_attr "type" "ild,fld,ldsym,ist,fst"))
! 1 1)
!
! ; Branches have no delay cost, but do tie up the unit for two cycles.
! (define_function_unit "ev4_bbox" 1 1
! (and (eq_attr "cpu" "ev4")
! (eq_attr "type" "ibr,fbr,jsr"))
! 2 2)
!
! ; Arithmetic insns are normally have their results available after
! ; two cycles. There are a number of exceptions. They are encoded in
! ; ADJUST_COST. Some of the other insns have similar exceptions.
! (define_function_unit "ev4_ebox" 1 0
! (and (eq_attr "cpu" "ev4")
! (eq_attr "type" "iadd,ilog,shift,icmov,icmp,misc"))
! 2 1)
!
! (define_function_unit "imul" 1 0
! (and (eq_attr "cpu" "ev4")
! (and (eq_attr "type" "imul")
! (eq_attr "opsize" "si")))
! 21 19)
!
! (define_function_unit "imul" 1 0
! (and (eq_attr "cpu" "ev4")
! (and (eq_attr "type" "imul")
! (eq_attr "opsize" "!si")))
! 23 21)
!
! (define_function_unit "ev4_fbox" 1 0
! (and (eq_attr "cpu" "ev4")
! (eq_attr "type" "fadd,fmul,fcpys,fcmov"))
! 6 1)
!
! (define_function_unit "fdiv" 1 0
! (and (eq_attr "cpu" "ev4")
! (and (eq_attr "type" "fdiv")
! (eq_attr "opsize" "si")))
! 34 30)
!
! (define_function_unit "fdiv" 1 0
! (and (eq_attr "cpu" "ev4")
! (and (eq_attr "type" "fdiv")
! (eq_attr "opsize" "di")))
! 63 59)
!
! ;; EV5 scheduling. EV5 can issue 4 insns per clock.
! ;;
! ;; EV5 has two asymetric integer units. Model this with E0 & E1 along
! ;; with the combined resource EBOX.
!
! (define_function_unit "ev5_ebox" 2 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv"))
! 1 1)
!
! ; Memory takes at least 2 clocks. Return one from here and fix up with
! ; user-defined latencies in adjust_cost.
! (define_function_unit "ev5_ebox" 2 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "ild,fld,ldsym"))
! 1 1)
!
! ; Loads can dual issue with one another, but loads and stores do not mix.
! (define_function_unit "ev5_e0" 1 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "ild,fld,ldsym"))
! 1 1
! [(eq_attr "type" "ist,fst")])
!
! ; Stores, shifts, multiplies can only issue to E0
! (define_function_unit "ev5_e0" 1 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "ist,fst,shift,imul"))
! 1 1)
!
! ; Motion video insns also issue only to E0, and take two ticks.
! (define_function_unit "ev5_e0" 1 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "mvi"))
! 2 1)
!
! ; Conditional moves always take 2 ticks.
! (define_function_unit "ev5_ebox" 2 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "icmov"))
! 2 1)
!
! ; Branches can only issue to E1
! (define_function_unit "ev5_e1" 1 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "ibr,jsr"))
! 1 1)
!
! ; Multiplies also use the integer multiplier.
! ; ??? How to: "No instruction can be issued to pipe E0 exactly two
! ; cycles before an integer multiplication completes."
! (define_function_unit "imul" 1 0
! (and (eq_attr "cpu" "ev5")
! (and (eq_attr "type" "imul")
! (eq_attr "opsize" "si")))
! 8 4)
!
! (define_function_unit "imul" 1 0
! (and (eq_attr "cpu" "ev5")
! (and (eq_attr "type" "imul")
! (eq_attr "opsize" "di")))
! 12 8)
!
! (define_function_unit "imul" 1 0
! (and (eq_attr "cpu" "ev5")
! (and (eq_attr "type" "imul")
! (eq_attr "opsize" "udi")))
! 14 8)
!
! ;; Similarly for the FPU we have two asymetric units. But fcpys can issue
! ;; on either so we have to play the game again.
!
! (define_function_unit "ev5_fbox" 2 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "fadd,fcmov,fmul,fcpys,fbr,fdiv"))
! 4 1)
!
! (define_function_unit "ev5_fm" 1 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "fmul"))
! 4 1)
!
! ; Add and cmov as you would expect; fbr never produces a result;
! ; fdiv issues through fa to the divider,
! (define_function_unit "ev5_fa" 1 0
! (and (eq_attr "cpu" "ev5")
! (eq_attr "type" "fadd,fcmov,fbr,fdiv"))
! 4 1)
!
! ; ??? How to: "No instruction can be issued to pipe FA exactly five
! ; cycles before a floating point divide completes."
! (define_function_unit "fdiv" 1 0
! (and (eq_attr "cpu" "ev5")
! (and (eq_attr "type" "fdiv")
! (eq_attr "opsize" "si")))
! 15 15) ; 15 to 31 data dependent
!
! (define_function_unit "fdiv" 1 0
! (and (eq_attr "cpu" "ev5")
! (and (eq_attr "type" "fdiv")
! (eq_attr "opsize" "di")))
! 22 22) ; 22 to 60 data dependent
!
! ;; EV6 scheduling. EV6 can issue 4 insns per clock.
! ;;
! ;; EV6 has two symmetric pairs ("clusters") of two asymetric integer units
! ;; ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
!
! ;; Conditional moves decompose into two independent primitives, each
! ;; taking one cycle. Since ev6 is out-of-order, we can't see anything
! ;; but two cycles.
! (define_function_unit "ev6_ebox" 4 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "icmov"))
! 2 1)
!
! (define_function_unit "ev6_ebox" 4 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv,fsqrt"))
! 1 1)
!
! ;; Integer loads take at least 3 clocks, and only issue to lower units.
! ;; Return one from here and fix up with user-defined latencies in adjust_cost.
! (define_function_unit "ev6_l" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "ild,ldsym,ist,fst"))
! 1 1)
!
! ;; FP loads take at least 4 clocks. Return two from here...
! (define_function_unit "ev6_l" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "fld"))
! 2 1)
!
! ;; Motion video insns also issue only to U0, and take three ticks.
! (define_function_unit "ev6_u0" 1 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "mvi"))
! 3 1)
!
! (define_function_unit "ev6_u" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "mvi"))
! 3 1)
!
! ;; Shifts issue to either upper pipe.
! (define_function_unit "ev6_u" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "shift"))
! 1 1)
!
! ;; Multiplies issue only to U1, and all take 7 ticks.
! ;; Rather than create a new function unit just for U1, reuse IMUL
! (define_function_unit "imul" 1 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "imul"))
! 7 1)
!
! (define_function_unit "ev6_u" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "imul"))
! 7 1)
!
! ;; Branches issue to either upper pipe
! (define_function_unit "ev6_u" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "ibr"))
! 3 1)
!
! ;; Calls only issue to L0.
! (define_function_unit "ev6_l0" 1 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "jsr"))
! 1 1)
!
! (define_function_unit "ev6_l" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "jsr"))
! 1 1)
!
! ;; Ftoi/itof only issue to lower pipes
! (define_function_unit "ev6_l" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "ftoi"))
! 3 1)
!
! (define_function_unit "ev6_l" 2 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "itof"))
! 4 1)
!
! ;; For the FPU we are very similar to EV5, except there's no insn that
! ;; can issue to fm & fa, so we get to leave that out.
!
! (define_function_unit "ev6_fm" 1 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "fmul"))
! 4 1)
!
! (define_function_unit "ev6_fa" 1 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "fadd,fcpys,fbr,fdiv,fsqrt"))
! 4 1)
!
! (define_function_unit "ev6_fa" 1 0
! (and (eq_attr "cpu" "ev6")
! (eq_attr "type" "fcmov"))
! 8 1)
!
! (define_function_unit "fdiv" 1 0
! (and (eq_attr "cpu" "ev6")
! (and (eq_attr "type" "fdiv")
! (eq_attr "opsize" "si")))
! 12 10)
!
! (define_function_unit "fdiv" 1 0
! (and (eq_attr "cpu" "ev6")
! (and (eq_attr "type" "fdiv")
! (eq_attr "opsize" "di")))
! 15 13)
!
! (define_function_unit "fsqrt" 1 0
! (and (eq_attr "cpu" "ev6")
! (and (eq_attr "type" "fsqrt")
! (eq_attr "opsize" "si")))
! 16 14)
!
! (define_function_unit "fsqrt" 1 0
! (and (eq_attr "cpu" "ev6")
! (and (eq_attr "type" "fsqrt")
! (eq_attr "opsize" "di")))
! 32 30)
!
! ; ??? The FPU communicates with memory and the integer register file
! ; via two fp store units. We need a slot in the fst immediately, and
! ; a slot in LOW after the operand data is ready. At which point the
! ; data may be moved either to the store queue or the integer register
! ; file and the insn retired.
;; First define the arithmetic insns. Note that the 32-bit forms also
;; sign-extend.
--- 123,133 ----
(define_attr "length" ""
(const_int 4))
! ;; Include scheduling descriptions.
+ (include "ev4.md")
+ (include "ev5.md")
+ (include "ev6.md")
;; First define the arithmetic insns. Note that the 32-bit forms also
;; sign-extend.
*************** fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi
*** 5018,5024 ****
[(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
""
""
! [(set_attr "length" "0")])
(define_insn "jump"
[(set (pc)
--- 4716,4723 ----
[(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
""
""
! [(set_attr "length" "0")
! (set_attr "type" "none")])
(define_insn "jump"
[(set (pc)
Index: config/alpha/ev4.md
===================================================================
RCS file: config/alpha/ev4.md
diff -N config/alpha/ev4.md
*** /dev/null 1 Jan 1970 00:00:00 -0000
--- config/alpha/ev4.md 3 May 2002 22:25:07 -0000
***************
*** 0 ****
--- 1,142 ----
+ ;; Scheduling description for Alpha EV4.
+ ;; Copyright (C) 2002 Free Software Foundation, Inc.
+ ;;
+ ;; This file is part of GNU CC.
+ ;;
+ ;; GNU CC is free software; you can redistribute it and/or modify
+ ;; it under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ ;;
+ ;; GNU CC is distributed in the hope that it will be useful,
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ;; GNU General Public License for more details.
+ ;;
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GNU CC; see the file COPYING. If not, write to
+ ;; the Free Software Foundation, 59 Temple Place - Suite 330,
+ ;; Boston, MA 02111-1307, USA.
+
+ ; On EV4 there are two classes of resources to consider: resources needed
+ ; to issue, and resources needed to execute. IBUS[01] are in the first
+ ; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
+ ; (There are a few other register-like resources, but ...)
+
+ (define_automaton "ev4_0,ev4_1,ev4_2")
+ (define_cpu_unit "ev4_ib0,ev4_ib1,ev4_abox,ev4_bbox" "ev4_0")
+ (define_cpu_unit "ev4_ebox,ev4_imul" "ev4_1")
+ (define_cpu_unit "ev4_fbox,ev4_fdiv" "ev4_2")
+ (define_reservation "ev4_ib01" "ev4_ib0|ev4_ib1")
+
+ ; Assume type "multi" single issues.
+ (define_insn_reservation "ev4_multi" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "multi"))
+ "ev4_ib0+ev4_ib1")
+
+ ; Loads from L0 completes in three cycles. adjust_cost still factors
+ ; in user-specified memory latency, so return 1 here.
+ (define_insn_reservation "ev4_ld" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "ild,fld,ldsym"))
+ "ev4_ib01+ev4_abox")
+
+ ; Stores can issue before the data (but not address) is ready.
+ (define_insn_reservation "ev4_ist" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "ist"))
+ "ev4_ib1+ev4_abox")
+
+ (define_insn_reservation "ev4_fst" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "fst"))
+ "ev4_ib0+ev4_abox")
+
+ ; Branches have no delay cost, but do tie up the unit for two cycles.
+ (define_insn_reservation "ev4_ibr" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "ibr,jsr"))
+ "ev4_ib1+ev4_bbox,ev4_bbox")
+
+ (define_insn_reservation "ev4_fbr" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "fbr"))
+ "ev4_ib0+ev4_bbox,ev4_bbox")
+
+ ; Arithmetic insns are normally have their results available after
+ ; two cycles. There are a number of exceptions.
+
+ (define_insn_reservation "ev4_iaddlog" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "iadd,ilog"))
+ "ev4_ib0+ev4_ebox")
+
+ (define_bypass 1
+ "ev4_iaddlog"
+ "ev4_ibr,ev4_iaddlog,ev4_shiftcm,ev4_icmp,ev4_imulsi,ev4_imuldi")
+
+ (define_insn_reservation "ev4_shiftcm" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "shift,icmov"))
+ "ev4_ib0+ev4_ebox")
+
+ (define_insn_reservation "ev4_icmp" 2
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "icmp"))
+ "ev4_ib0+ev4_ebox")
+
+ (define_bypass 1 "ev4_icmp" "ev4_ibr")
+
+ (define_bypass 0
+ "ev4_iaddlog,ev4_shiftcm,ev4_icmp"
+ "ev4_ist"
+ "store_data_bypass_p")
+
+ ; Multiplies use a non-piplined imul unit. Also, "no [ebox] insn can
+ ; be issued exactly three cycles before an integer multiply completes".
+
+ (define_insn_reservation "ev4_imulsi" 21
+ (and (eq_attr "cpu" "ev4")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "si")))
+ "ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox")
+
+ (define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p")
+
+ (define_insn_reservation "ev4_imuldi" 23
+ (and (eq_attr "cpu" "ev4")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "!si")))
+ "ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox")
+
+ (define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p")
+
+ ; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in.
+ (define_insn_reservation "ev4_fpop" 6
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "fadd,fmul,fcpys,fcmov"))
+ "ev4_ib1+ev4_fbox")
+
+ (define_bypass 4 "ev4_fpop" "ev4_fpop")
+
+ ; The floating point divider is not pipelined. Also, "no FPOP insn can be
+ ; issued exactly five or exactly six cycles before an fdiv insn completes".
+
+ (define_insn_reservation "ev4_fdivsf" 34
+ (and (eq_attr "cpu" "ev4")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "si")))
+ "ev4_ib1+ev4_fdiv,ev4_fdiv*28,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+ (define_insn_reservation "ev4_fdivdf" 63
+ (and (eq_attr "cpu" "ev4")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "di")))
+ "ev4_ib1+ev4_fdiv,ev4_fdiv*57,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+ ; Traps don't consume or produce data.
+ (define_insn_reservation "ev4_misc" 1
+ (and (eq_attr "cpu" "ev4")
+ (eq_attr "type" "misc"))
+ "ev4_ib1")
Index: config/alpha/ev5.md
===================================================================
RCS file: config/alpha/ev5.md
diff -N config/alpha/ev5.md
*** /dev/null 1 Jan 1970 00:00:00 -0000
--- config/alpha/ev5.md 3 May 2002 22:25:07 -0000
***************
*** 0 ****
--- 1,170 ----
+ ;; Scheduling description for Alpha EV5.
+ ;; Copyright (C) 2002 Free Software Foundation, Inc.
+ ;;
+ ;; This file is part of GNU CC.
+ ;;
+ ;; GNU CC is free software; you can redistribute it and/or modify
+ ;; it under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ ;;
+ ;; GNU CC is distributed in the hope that it will be useful,
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ;; GNU General Public License for more details.
+ ;;
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GNU CC; see the file COPYING. If not, write to
+ ;; the Free Software Foundation, 59 Temple Place - Suite 330,
+ ;; Boston, MA 02111-1307, USA.
+
+ ;; EV5 has two asymetric integer units, E0 and E1, plus separate
+ ;; FP add and multiply units.
+
+ (define_automaton "ev5_0,ev5_1,ev5_2")
+ (define_cpu_unit "ev5_e1,ev5_fm" "ev5_0")
+ (define_cpu_unit "ev5_e0,ev5_imul" "ev5_1")
+ (define_cpu_unit "ev5_fa,ev5_fdiv" "ev5_2")
+ (define_reservation "ev5_e01" "ev5_e0|ev5_e1")
+ (define_reservation "ev5_fam" "ev5_fa|ev5_fm")
+
+ ; Assume type "multi" single issues.
+ (define_insn_reservation "ev5_multi" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "multi"))
+ "ev5_e0+ev5_e1+ev5_fa+ev5_fm")
+
+ ; Stores can only issue to E0, and may not issue with loads.
+ ; Model this with some fake units.
+
+ (define_cpu_unit "ev5_l0,ev5_l1,ev5_st" "ev5_0")
+ (define_reservation "ev5_ld" "ev5_l0|ev5_l1")
+ (exclusion_set "ev5_l0,ev5_l1" "ev5_st")
+
+ (define_insn_reservation "ev5_st" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "ist,fst"))
+ "ev5_e0+ev5_st")
+
+ ; Loads from L0 complete in two cycles. adjust_cost still factors
+ ; in user-specified memory latency, so return 1 here.
+ (define_insn_reservation "ev5_ld" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "ild,fld,ldsym"))
+ "ev5_e01+ev5_ld")
+
+ ; Integer branches slot only to E1.
+ (define_insn_reservation "ev5_ibr" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "ibr"))
+ "ev5_e1")
+
+ (define_insn_reservation "ev5_jsr" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "jsr"))
+ "ev5_e1")
+
+ (define_insn_reservation "ev5_shiftmvi" 2
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "shift,mvi"))
+ "ev5_e0")
+
+ (define_insn_reservation "ev5_cmov" 2
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "icmov"))
+ "ev5_e01")
+
+ (define_insn_reservation "ev5_iadd" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "iadd"))
+ "ev5_e01")
+
+ (define_insn_reservation "ev5_ilogcmp" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "ilog,icmp"))
+ "ev5_e01")
+
+ ; Conditional move and branch can issue the same cycle as the test.
+ (define_bypass 0 "ev5_ilogcmp" "ev5_ibr,ev5_cmov" "if_test_bypass_p")
+
+ ; Multiplies use a non-piplined imul unit. Also, "no insn can be issued
+ ; to E0 exactly two cycles before an integer multiply completes".
+
+ (define_insn_reservation "ev5_imull" 8
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "si")))
+ "ev5_e0+ev5_imul,ev5_imul*3,nothing,ev5_e0")
+
+ (define_insn_reservation "ev5_imulq" 12
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "di")))
+ "ev5_e0+ev5_imul,ev5_imul*7,nothing,ev5_e0")
+
+ (define_insn_reservation "ev5_imulh" 14
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "imul")
+ (eq_attr "opsize" "udi")))
+ "ev5_e0+ev5_imul,ev5_imul*7,nothing*3,ev5_e0")
+
+ ; The multiplier is unable to receive data from Ebox bypass paths. The
+ ; instruction issues at the expected time, but its latency is increased
+ ; by the time it takes for the input data to become available to the
+ ; multiplier. For example, an IMULL instruction issued one cycle later
+ ; than an ADDL instruction, which produced one of its operands, has a
+ ; latency of 10 (8 + 2). If the IMULL instruction is issued two cycles
+ ; later than the ADDL instruction, the latency is 9 (8 + 1).
+ ;
+ ; Model this instead with increased latency on the input instruction.
+
+ (define_bypass 3
+ "ev5_ld,ev5_shiftmvi,ev5_cmov,ev5_iadd,ev5_ilogcmp"
+ "ev5_imull,ev5_imulq,ev5_imulh")
+
+ (define_bypass 9 "ev5_imull" "ev5_imull,ev5_imulq,ev5_imulh")
+ (define_bypass 13 "ev5_imulq" "ev5_imull,ev5_imulq,ev5_imulh")
+ (define_bypass 15 "ev5_imulh" "ev5_imull,ev5_imulq,ev5_imulh")
+
+ ; Similarly for the FPU we have two asymetric units.
+
+ (define_insn_reservation "ev5_fadd" 4
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "fadd,fcmov"))
+ "ev5_fa")
+
+ (define_insn_reservation "ev5_fbr" 1
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "fbr"))
+ "ev5_fa")
+
+ (define_insn_reservation "ev5_fcpys" 4
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "fcpys"))
+ "ev5_fam")
+
+ (define_insn_reservation "ev5_fmul" 4
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "fmul"))
+ "ev5_fm")
+
+ ; The floating point divider is not pipelined. Also, "no insn can be issued
+ ; to FA exactly five before an fdiv insn completes".
+
+ (define_insn_reservation "ev5_fdivsf" 15
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "si")))
+ "ev5_fa+ev5_fdiv,ev5_fdiv*9,ev5_fa+ev5_fdiv,ev5_fdiv*4")
+
+ (define_insn_reservation "ev5_fdivdf" 22
+ (and (eq_attr "cpu" "ev5")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "di")))
+ "ev5_fa+ev5_fdiv,ev5_fdiv*17,ev5_fa+ev5_fdiv,ev5_fdiv*4")
+
+ ; Traps don't consume or produce data; rpcc is latency 2 if we ever add it.
+ (define_insn_reservation "ev5_misc" 2
+ (and (eq_attr "cpu" "ev5")
+ (eq_attr "type" "misc"))
+ "ev5_e0")
Index: config/alpha/ev6.md
===================================================================
RCS file: config/alpha/ev6.md
diff -N config/alpha/ev6.md
*** /dev/null 1 Jan 1970 00:00:00 -0000
--- config/alpha/ev6.md 3 May 2002 22:25:07 -0000
***************
*** 0 ****
--- 1,173 ----
+ ;; Scheduling description for Alpha EV6.
+ ;; Copyright (C) 2002 Free Software Foundation, Inc.
+ ;;
+ ;; This file is part of GNU CC.
+ ;;
+ ;; GNU CC is free software; you can redistribute it and/or modify
+ ;; it under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ ;;
+ ;; GNU CC is distributed in the hope that it will be useful,
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ;; GNU General Public License for more details.
+ ;;
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GNU CC; see the file COPYING. If not, write to
+ ;; the Free Software Foundation, 59 Temple Place - Suite 330,
+ ;; Boston, MA 02111-1307, USA.
+
+ ; EV6 can issue 4 insns per clock. It's out-of-order, so this isn't
+ ; expected to help over-much, but a precise description can be important
+ ; for software pipelining.
+ ;
+ ; EV6 has two symmetric pairs ("clusters") of two asymetric integer
+ ; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
+ ;
+ ; ??? The clusters have independent register files that are re-synced
+ ; every cycle. Thus there is one additional cycle of latency between
+ ; insns issued on different clusters. Possibly model that by duplicating
+ ; all EBOX insn_reservations that can issue to either cluster, increasing
+ ; all latencies by one, and adding bypasses within the cluster.
+ ;
+ ; ??? In addition, instruction order affects cluster issue.
+
+ (define_automaton "ev6_0,ev6_1")
+ (define_cpu_unit "ev6_u0,ev6_u1,ev6_l0,ev6_l1" "ev6_0")
+ (define_reservation "ev6_u" "ev6_u0|ev6_u1")
+ (define_reservation "ev6_l" "ev6_l0|ev6_l1")
+ (define_reservation "ev6_ebox" "ev6_u|ev6_l")
+
+ (define_cpu_unit "ev6_fa" "ev6_1")
+ (define_cpu_unit "ev6_fm,ev6_fst0,ev6_fst1" "ev6_0")
+ (define_reservation "ev6_fst" "ev6_fst0|ev6_fst1")
+
+ ; Assume type "multi" single issues.
+ (define_insn_reservation "ev6_multi" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "multi"))
+ "ev6_u0+ev6_u1+ev6_l0+ev6_l1+ev6_fa+ev6_fm+ev6_fst0+ev6_fst1")
+
+ ; Integer loads take at least 3 clocks, and only issue to lower units.
+ ; adjust_cost still factors in user-specified memory latency, so return 1 here.
+ (define_insn_reservation "ev6_ild" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "ild,ldsym"))
+ "ev6_l")
+
+ (define_insn_reservation "ev6_ist" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "ist"))
+ "ev6_l")
+
+ ; FP loads take at least 4 clocks. adjust_cost still factors
+ ; in user-specified memory latency, so return 2 here.
+ (define_insn_reservation "ev6_fld" 2
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fld"))
+ "ev6_l")
+
+ ; The FPU communicates with memory and the integer register file
+ ; via two fp store units. We need a slot in the fst immediately, and
+ ; a slot in LOW after the operand data is ready. At which point the
+ ; data may be moved either to the store queue or the integer register
+ ; file and the insn retired.
+
+ (define_insn_reservation "ev6_fst" 3
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fst"))
+ "ev6_fst,nothing,ev6_l")
+
+ ; Arithmetic goes anywhere.
+ (define_insn_reservation "ev6_arith" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "iadd,ilog,icmp"))
+ "ev6_ebox")
+
+ ; Motion video insns also issue only to U0, and take three ticks.
+ (define_insn_reservation "ev6_mvi" 3
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "mvi"))
+ "ev6_u0")
+
+ ; Shifts issue to upper units.
+ (define_insn_reservation "ev6_shift" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "shift"))
+ "ev6_u")
+
+ ; Multiplies issue only to U1, and all take 7 ticks.
+ (define_insn_reservation "ev6_imul" 7
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "imul"))
+ "ev6_u1")
+
+ ; Conditional moves decompose into two independent primitives, each taking
+ ; one cycle. Since ev6 is out-of-order, we can't see anything but two cycles.
+ (define_insn_reservation "ev6_icmov" 2
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "icmov"))
+ "ev6_ebox,ev6_ebox")
+
+ ; Integer branches issue to upper units
+ (define_insn_reservation "ev6_ibr" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "ibr"))
+ "ev6_u")
+
+ ; Calls only issue to L0.
+ (define_insn_reservation "ev6_jsr" 1
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "jsr"))
+ "ev6_l0")
+
+ ; Ftoi/itof only issue to lower pipes.
+ (define_insn_reservation "ev6_itof" 3
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "itof"))
+ "ev6_l")
+
+ (define_insn_reservation "ev6_ftoi" 3
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "ftoi"))
+ "ev6_fst,nothing,ev6_l")
+
+ (define_insn_reservation "ev6_fmul" 4
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fmul"))
+ "ev6_fm")
+
+ (define_insn_reservation "ev6_fadd" 4
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fadd,fcpys,fbr"))
+ "ev6_fa")
+
+ (define_insn_reservation "ev6_fcmov" 8
+ (and (eq_attr "cpu" "ev6")
+ (eq_attr "type" "fcmov"))
+ "ev6_fa,nothing*3,ev6_fa")
+
+ (define_insn_reservation "ev6_fdivsf" 12
+ (and (eq_attr "cpu" "ev6")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "si")))
+ "ev6_fa*9")
+
+ (define_insn_reservation "ev6_fdivdf" 15
+ (and (eq_attr "cpu" "ev6")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "opsize" "di")))
+ "ev6_fa*12")
+
+ (define_insn_reservation "ev6_sqrtsf" 18
+ (and (eq_attr "cpu" "ev6")
+ (and (eq_attr "type" "fsqrt")
+ (eq_attr "opsize" "si")))
+ "ev6_fa*15")
+
+ (define_insn_reservation "ev6_sqrtdf" 33
+ (and (eq_attr "cpu" "ev6")
+ (and (eq_attr "type" "fsqrt")
+ (eq_attr "opsize" "di")))
+ "ev6_fa*30")
More information about the Gcc-bugs
mailing list