alpha.c (alpha_adjust_cost): Remove everything but memory latency adjustments.

author Richard Henderson <rth@redhat.com>

Sun, 5 May 2002 21:54:39 +0000 (14:54 -0700)

committer Richard Henderson <rth@gcc.gnu.org>

Sun, 5 May 2002 21:54:39 +0000 (14:54 -0700)
author Richard Henderson <rth@redhat.com>
Sun, 5 May 2002 21:54:39 +0000 (14:54 -0700)
committer Richard Henderson <rth@gcc.gnu.org>
Sun, 5 May 2002 21:54:39 +0000 (14:54 -0700)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index fd879138c04d7115521ce76a18f89005d74badab..8dfaae729a12c279017fd40353af991d74a76505 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2002-05-05  Richard Henderson  <rth@redhat.com>
+
+       * config/alpha/alpha.c (alpha_adjust_cost): Remove everything but
+       memory latency adjustments.
+       (alpha_variable_issue): Remove.
+       (alpha_use_dfa_pipeline_interface): New.
+       (alpha_multipass_dfa_lookahead): New.
+       * config/alpha/alpha.md: Remove define_function_unit scheduling;
+       include new dfa scheduling.
+       (attr type): Add none.
+       (blockage): Use it.
+       * config/alpha/ev4.md: New.
+       * config/alpha/ev5.md: New.
+       * config/alpha/ev6.md: New.
+
  2002-05-05  David S. Miller  <davem@redhat.com>
  
         * recog.c (store_data_bypass_p): Handle CLOBBER inside PARALLEL.
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c

index 915d6e48996563fc89b25d23be4e0cdb8d21a3ec..c40636a4bb13225ef869414a9166bafbac0b4085 100644 (file)
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -157,8 +157,10 @@ static int alpha_adjust_cost
    PARAMS ((rtx, rtx, rtx, int));
  static int alpha_issue_rate
    PARAMS ((void));
-static int alpha_variable_issue
-  PARAMS ((FILE *, int, rtx, int));
+static int alpha_use_dfa_pipeline_interface
+  PARAMS ((void));
+static int alpha_multipass_dfa_lookahead
+  PARAMS ((void));
  
  #if TARGET_ABI_UNICOSMK
  static void alpha_init_machine_status
@@ -231,8 +233,12 @@ static unsigned int unicosmk_section_type_flags PARAMS ((tree, const char *,
  #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
  #undef TARGET_SCHED_ISSUE_RATE
  #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
-#undef TARGET_SCHED_VARIABLE_ISSUE
-#define TARGET_SCHED_VARIABLE_ISSUE alpha_variable_issue
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
+  alpha_use_dfa_pipeline_interface
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  alpha_multipass_dfa_lookahead
  
  struct gcc_target targetm = TARGET_INITIALIZER;
  \f
@@ -4828,9 +4834,8 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
    /* If the dependence is an anti-dependence, there is no cost.  For an
       output dependence, there is sometimes a cost, but it doesn't seem
       worth handling those few cases.  */
-
    if (REG_NOTE_KIND (link) != 0)
-    return 0;
+    return cost;
  
    /* If we can't recognize the insns, we can't really do anything.  */
    if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
@@ -4845,122 +4850,13 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
        || dep_insn_type == TYPE_LDSYM)
      cost += alpha_memory_latency-1;
  
-  switch (alpha_cpu)
-    {
-    case PROCESSOR_EV4:
-      /* On EV4, if INSN is a store insn and DEP_INSN is setting the data
-        being stored, we can sometimes lower the cost.  */
-
-      if ((insn_type == TYPE_IST || insn_type == TYPE_FST)
-         && (set = single_set (dep_insn)) != 0
-         && GET_CODE (PATTERN (insn)) == SET
-         && rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
-       {
-         switch (dep_insn_type)
-           {
-           case TYPE_ILD:
-           case TYPE_FLD:
-             /* No savings here.  */
-             return cost;
-
-           case TYPE_IMUL:
-             /* In these cases, we save one cycle.  */
-             return cost - 1;
-
-           default:
-             /* In all other cases, we save two cycles.  */
-             return MAX (0, cost - 2);
-           }
-       }
+  /* Everything else handled in DFA bypasses now.  */
  
-      /* Another case that needs adjustment is an arithmetic or logical
-        operation.  It's cost is usually one cycle, but we default it to
-        two in the MD file.  The only case that it is actually two is
-        for the address in loads, stores, and jumps.  */
-
-      if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG)
-       {
-         switch (insn_type)
-           {
-           case TYPE_ILD:
-           case TYPE_IST:
-           case TYPE_FLD:
-           case TYPE_FST:
-           case TYPE_JSR:
-             return cost;
-           default:
-             return 1;
-           }
-       }
-
-      /* The final case is when a compare feeds into an integer branch;
-        the cost is only one cycle in that case.  */
-
-      if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR)
-       return 1;
-      break;
-
-    case PROCESSOR_EV5:
-      /* And the lord DEC saith:  "A special bypass provides an effective
-        latency of 0 cycles for an ICMP or ILOG insn producing the test
-        operand of an IBR or ICMOV insn." */
-
-      if ((dep_insn_type == TYPE_ICMP || dep_insn_type == TYPE_ILOG)
-         && (set = single_set (dep_insn)) != 0)
-       {
-         /* A branch only has one input.  This must be it.  */
-         if (insn_type == TYPE_IBR)
-           return 0;
-         /* A conditional move has three, make sure it is the test.  */
-         if (insn_type == TYPE_ICMOV
-             && GET_CODE (set_src = PATTERN (insn)) == SET
-             && GET_CODE (set_src = SET_SRC (set_src)) == IF_THEN_ELSE
-             && rtx_equal_p (SET_DEST (set), XEXP (set_src, 0)))
-           return 0;
-       }
-
-      /* "The multiplier is unable to receive data from IEU bypass paths.
-        The instruction issues at the expected time, but its latency is
-        increased by the time it takes for the input data to become
-        available to the multiplier" -- which happens in pipeline stage
-        six, when results are comitted to the register file.  */
-
-      if (insn_type == TYPE_IMUL)
-       {
-         switch (dep_insn_type)
-           {
-           /* These insns produce their results in pipeline stage five.  */
-           case TYPE_ILD:
-           case TYPE_ICMOV:
-           case TYPE_IMUL:
-           case TYPE_MVI:
-             return cost + 1;
-
-           /* Other integer insns produce results in pipeline stage four.  */
-           default:
-             return cost + 2;
-           }
-       }
-      break;
-
-    case PROCESSOR_EV6:
-      /* There is additional latency to move the result of (most) FP 
-         operations anywhere but the FP register file.  */
-
-      if ((insn_type == TYPE_FST || insn_type == TYPE_FTOI)
-         && (dep_insn_type == TYPE_FADD ||
-             dep_insn_type == TYPE_FMUL ||
-             dep_insn_type == TYPE_FCMOV))
-        return cost + 2;
-
-      break;
-    }
-
-  /* Otherwise, return the default cost.  */
    return cost;
  }
  
-/* Function to initialize the issue rate used by the scheduler.  */
+/* The number of instructions that can be issued per cycle.  */
+
  static int
  alpha_issue_rate ()
  {
@@ -4968,18 +4864,24 @@ alpha_issue_rate ()
  }
  
  static int
-alpha_variable_issue (dump, verbose, insn, cim)
-     FILE *dump ATTRIBUTE_UNUSED;
-     int verbose ATTRIBUTE_UNUSED;
-     rtx insn;
-     int cim;
+alpha_use_dfa_pipeline_interface ()
  {
-    if (recog_memoized (insn) < 0 || get_attr_type (insn) == TYPE_MULTI)
-      return 0;
-
-    return cim - 1;
+  return true;
  }
  
+/* How many alternative schedules to try.  This should be as wide as the
+   scheduling freedom in the DFA, but no wider.  Making this value too
+   large results extra work for the scheduler.
+
+   For EV4, loads can be issued to either IB0 or IB1, thus we have 2
+   alternative schedules.  For EV5, we can choose between E0/E1 and
+   FA/FM.  For EV6, an arithmatic insn can be issued to U0/U1/L0/L1.  */
+
+static int
+alpha_multipass_dfa_lookahead ()
+{
+  return (alpha_cpu == PROCESSOR_EV6 ? 4 : 2);
+}
  \f
  /* Register global variables and machine-specific functions with the
     garbage collector.  */
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md

index 785a61df01efca0806862f31a154c69b1679a3fa..1df715ad0c27e78bc04422484cf4832e1aba30ad 100644 (file)
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -78,7 +78,7 @@
  
  (define_attr "type"
    "ild,fld,ldsym,ist,fst,ibr,fbr,jsr,iadd,ilog,shift,icmov,fcmov,icmp,imul,\
-fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi"
+fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
    (const_string "iadd"))
  
  ;; Describe a user's asm statement.
@@ -123,313 +123,11 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi"
  (define_attr "length" ""
    (const_int 4))
  \f
-;; On EV4 there are two classes of resources to consider: resources needed
-;; to issue, and resources needed to execute.  IBUS[01] are in the first
-;; category.  ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
-;; (There are a few other register-like resources, but ...)
-
-; First, describe all of the issue constraints with single cycle delays.
-; All insns need a bus, but all except loads require one or the other.
-(define_function_unit "ev4_ibus0" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (eq_attr "type" "fst,fbr,iadd,imul,ilog,shift,icmov,icmp"))
-  1 1)
-
-(define_function_unit "ev4_ibus1" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (eq_attr "type" "ist,ibr,jsr,fadd,fcmov,fcpys,fmul,fdiv,misc"))
-  1 1)
-
-; Memory delivers its result in three cycles.  Actually return one and
-; take care of this in adjust_cost, since we want to handle user-defined
-; memory latencies.
-(define_function_unit "ev4_abox" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (eq_attr "type" "ild,fld,ldsym,ist,fst"))
-  1 1)
-
-; Branches have no delay cost, but do tie up the unit for two cycles.
-(define_function_unit "ev4_bbox" 1 1
-  (and (eq_attr "cpu" "ev4")
-       (eq_attr "type" "ibr,fbr,jsr"))
-  2 2)
-
-; Arithmetic insns are normally have their results available after
-; two cycles.  There are a number of exceptions.  They are encoded in
-; ADJUST_COST.  Some of the other insns have similar exceptions.
-(define_function_unit "ev4_ebox" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (eq_attr "type" "iadd,ilog,shift,icmov,icmp,misc"))
-  2 1)
-
-(define_function_unit "imul" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (and (eq_attr "type" "imul")
-           (eq_attr "opsize" "si")))
-  21 19)
-
-(define_function_unit "imul" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (and (eq_attr "type" "imul")
-           (eq_attr "opsize" "!si")))
-  23 21)
-
-(define_function_unit "ev4_fbox" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (eq_attr "type" "fadd,fmul,fcpys,fcmov"))
-  6 1)
-
-(define_function_unit "fdiv" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (and (eq_attr "type" "fdiv")
-           (eq_attr "opsize" "si")))
-  34 30)
-
-(define_function_unit "fdiv" 1 0
-  (and (eq_attr "cpu" "ev4")
-       (and (eq_attr "type" "fdiv")
-           (eq_attr "opsize" "di")))
-  63 59)
-\f
-;; EV5 scheduling.  EV5 can issue 4 insns per clock.
-;;
-;; EV5 has two asymetric integer units.  Model this with E0 & E1 along
-;; with the combined resource EBOX.
-
-(define_function_unit "ev5_ebox" 2 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv"))
-  1 1)
-
-; Memory takes at least 2 clocks.  Return one from here and fix up with
-; user-defined latencies in adjust_cost.
-(define_function_unit "ev5_ebox" 2 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "ild,fld,ldsym"))
-  1 1)
-
-; Loads can dual issue with one another, but loads and stores do not mix.
-(define_function_unit "ev5_e0" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "ild,fld,ldsym"))
-  1 1
-  [(eq_attr "type" "ist,fst")])
-
-; Stores, shifts, multiplies can only issue to E0
-(define_function_unit "ev5_e0" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "ist,fst,shift,imul"))
-  1 1)
-
-; Motion video insns also issue only to E0, and take two ticks.
-(define_function_unit "ev5_e0" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "mvi"))
-  2 1)
-
-; Conditional moves always take 2 ticks.
-(define_function_unit "ev5_ebox" 2 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "icmov"))
-  2 1)
-
-; Branches can only issue to E1
-(define_function_unit "ev5_e1" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "ibr,jsr"))
-  1 1)
-
-; Multiplies also use the integer multiplier.
-; ??? How to: "No instruction can be issued to pipe E0 exactly two
-; cycles before an integer multiplication completes."
-(define_function_unit "imul" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (and (eq_attr "type" "imul")
-           (eq_attr "opsize" "si")))
-  8 4)
-
-(define_function_unit "imul" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (and (eq_attr "type" "imul")
-           (eq_attr "opsize" "di")))
-  12 8)
-
-(define_function_unit "imul" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (and (eq_attr "type" "imul")
-           (eq_attr "opsize" "udi")))
-  14 8)
-
-;; Similarly for the FPU we have two asymetric units.  But fcpys can issue
-;; on either so we have to play the game again.
-
-(define_function_unit "ev5_fbox" 2 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "fadd,fcmov,fmul,fcpys,fbr,fdiv"))
-  4 1)
-
-(define_function_unit "ev5_fm" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "fmul"))
-  4 1)
-
-; Add and cmov as you would expect; fbr never produces a result;
-; fdiv issues through fa to the divider,
-(define_function_unit "ev5_fa" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (eq_attr "type" "fadd,fcmov,fbr,fdiv"))
-  4 1)
-
-; ??? How to: "No instruction can be issued to pipe FA exactly five
-; cycles before a floating point divide completes."
-(define_function_unit "fdiv" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (and (eq_attr "type" "fdiv")
-           (eq_attr "opsize" "si")))
-  15 15)                               ; 15 to 31 data dependent
-
-(define_function_unit "fdiv" 1 0
-  (and (eq_attr "cpu" "ev5")
-       (and (eq_attr "type" "fdiv")
-           (eq_attr "opsize" "di")))
-  22 22)                               ; 22 to 60 data dependent
-\f
-;; EV6 scheduling.  EV6 can issue 4 insns per clock.
-;;
-;; EV6 has two symmetric pairs ("clusters") of two asymetric integer units
-;; ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
-
-;; Conditional moves decompose into two independent primitives, each
-;; taking one cycle.  Since ev6 is out-of-order, we can't see anything
-;; but two cycles.
-(define_function_unit "ev6_ebox" 4 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "icmov"))
-  2 1)
-
-(define_function_unit "ev6_ebox" 4 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv,fsqrt"))
-  1 1)
-
-;; Integer loads take at least 3 clocks, and only issue to lower units.
-;; Return one from here and fix up with user-defined latencies in adjust_cost.
-(define_function_unit "ev6_l" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "ild,ldsym,ist,fst"))
-  1 1)
-
-;; FP loads take at least 4 clocks.  Return two from here...
-(define_function_unit "ev6_l" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "fld"))
-  2 1)
-
-;; Motion video insns also issue only to U0, and take three ticks.
-(define_function_unit "ev6_u0" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "mvi"))
-  3 1)
-
-(define_function_unit "ev6_u" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "mvi"))
-  3 1)
-
-;; Shifts issue to either upper pipe.
-(define_function_unit "ev6_u" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "shift"))
-  1 1)
-
-;; Multiplies issue only to U1, and all take 7 ticks.
-;; Rather than create a new function unit just for U1, reuse IMUL
-(define_function_unit "imul" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "imul"))
-  7 1)
-
-(define_function_unit "ev6_u" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "imul"))
-  7 1)
-
-;; Branches issue to either upper pipe
-(define_function_unit "ev6_u" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "ibr"))
-  3 1)
-
-;; Calls only issue to L0.
-(define_function_unit "ev6_l0" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "jsr"))
-  1 1)
-
-(define_function_unit "ev6_l" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "jsr"))
-  1 1)
-
-;; Ftoi/itof only issue to lower pipes
-(define_function_unit "ev6_l" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "ftoi"))
-  3 1)
-
-(define_function_unit "ev6_l" 2 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "itof"))
-  4 1)
-
-;; For the FPU we are very similar to EV5, except there's no insn that
-;; can issue to fm & fa, so we get to leave that out.
-
-(define_function_unit "ev6_fm" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "fmul"))
-  4 1)
-
-(define_function_unit "ev6_fa" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "fadd,fcpys,fbr,fdiv,fsqrt"))
-  4 1)
-
-(define_function_unit "ev6_fa" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (eq_attr "type" "fcmov"))
-  8 1)
-
-(define_function_unit "fdiv" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (and (eq_attr "type" "fdiv")
-           (eq_attr "opsize" "si")))
-  12 10)
-
-(define_function_unit "fdiv" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (and (eq_attr "type" "fdiv")
-           (eq_attr "opsize" "di")))
-  15 13)
-
-(define_function_unit "fsqrt" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (and (eq_attr "type" "fsqrt")
-           (eq_attr "opsize" "si")))
-  16 14)
-
-(define_function_unit "fsqrt" 1 0
-  (and (eq_attr "cpu" "ev6")
-       (and (eq_attr "type" "fsqrt")
-           (eq_attr "opsize" "di")))
-  32 30)
-
-; ??? The FPU communicates with memory and the integer register file
-; via two fp store units.  We need a slot in the fst immediately, and
-; a slot in LOW after the operand data is ready.  At which point the
-; data may be moved either to the store queue or the integer register
-; file and the insn retired.
-
+;; Include scheduling descriptions.
+  
+(include "ev4.md")
+(include "ev5.md")
+(include "ev6.md")
  \f
  ;; First define the arithmetic insns.  Note that the 32-bit forms also
  ;; sign-extend.
@@ -5018,7 +4716,8 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi"
    [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
    ""
    ""
-  [(set_attr "length" "0")])
+  [(set_attr "length" "0")
+   (set_attr "type" "none")])
  
  (define_insn "jump"
    [(set (pc)
diff --git a/gcc/config/alpha/ev4.md b/gcc/config/alpha/ev4.md

new file mode 100644 (file)

index 0000000..6816e44
--- /dev/null
+++ b/gcc/config/alpha/ev4.md
@@ -0,0 +1,142 @@
+;; Scheduling description for Alpha EV4.
+;;   Copyright (C) 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of GNU CC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+; On EV4 there are two classes of resources to consider: resources needed
+; to issue, and resources needed to execute.  IBUS[01] are in the first
+; category.  ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
+; (There are a few other register-like resources, but ...)
+
+(define_automaton "ev4_0,ev4_1,ev4_2")
+(define_cpu_unit "ev4_ib0,ev4_ib1,ev4_abox,ev4_bbox" "ev4_0")
+(define_cpu_unit "ev4_ebox,ev4_imul" "ev4_1")
+(define_cpu_unit "ev4_fbox,ev4_fdiv" "ev4_2")
+(define_reservation "ev4_ib01" "ev4_ib0|ev4_ib1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev4_multi" 1
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "multi"))
+  "ev4_ib0+ev4_ib1")
+
+; Loads from L0 completes in three cycles.  adjust_cost still factors
+; in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev4_ld" 1
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "ild,fld,ldsym"))
+  "ev4_ib01+ev4_abox")
+
+; Stores can issue before the data (but not address) is ready.
+(define_insn_reservation "ev4_ist" 1
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "ist"))
+  "ev4_ib1+ev4_abox")
+
+(define_insn_reservation "ev4_fst" 1
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "fst"))
+  "ev4_ib0+ev4_abox")
+
+; Branches have no delay cost, but do tie up the unit for two cycles.
+(define_insn_reservation "ev4_ibr" 2
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "ibr,jsr"))
+  "ev4_ib1+ev4_bbox,ev4_bbox")
+
+(define_insn_reservation "ev4_fbr" 2
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "fbr"))
+  "ev4_ib0+ev4_bbox,ev4_bbox")
+
+; Arithmetic insns are normally have their results available after
+; two cycles.  There are a number of exceptions.
+
+(define_insn_reservation "ev4_iaddlog" 2
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "iadd,ilog"))
+  "ev4_ib0+ev4_ebox")
+
+(define_bypass 1
+  "ev4_iaddlog"
+  "ev4_ibr,ev4_iaddlog,ev4_shiftcm,ev4_icmp,ev4_imulsi,ev4_imuldi")
+
+(define_insn_reservation "ev4_shiftcm" 2
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "shift,icmov"))
+  "ev4_ib0+ev4_ebox")
+
+(define_insn_reservation "ev4_icmp" 2
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "icmp"))
+  "ev4_ib0+ev4_ebox")
+
+(define_bypass 1 "ev4_icmp" "ev4_ibr")
+
+(define_bypass 0
+  "ev4_iaddlog,ev4_shiftcm,ev4_icmp"
+  "ev4_ist"
+  "store_data_bypass_p")
+
+; Multiplies use a non-piplined imul unit.  Also, "no [ebox] insn can
+; be issued exactly three cycles before an integer multiply completes".
+
+(define_insn_reservation "ev4_imulsi" 21
+  (and (eq_attr "cpu" "ev4")
+       (and (eq_attr "type" "imul")
+           (eq_attr "opsize" "si")))
+  "ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox")
+
+(define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p")
+
+(define_insn_reservation "ev4_imuldi" 23
+  (and (eq_attr "cpu" "ev4")
+       (and (eq_attr "type" "imul")
+           (eq_attr "opsize" "!si")))
+  "ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox")
+
+(define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p")
+
+; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in.
+(define_insn_reservation "ev4_fpop" 6
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "fadd,fmul,fcpys,fcmov"))
+  "ev4_ib1+ev4_fbox")
+
+(define_bypass 4 "ev4_fpop" "ev4_fpop")
+
+; The floating point divider is not pipelined.  Also, "no FPOP insn can be
+; issued exactly five or exactly six cycles before an fdiv insn completes".
+
+(define_insn_reservation "ev4_fdivsf" 34
+  (and (eq_attr "cpu" "ev4")
+       (and (eq_attr "type" "fdiv")
+           (eq_attr "opsize" "si")))
+  "ev4_ib1+ev4_fdiv,ev4_fdiv*28,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+(define_insn_reservation "ev4_fdivdf" 63
+  (and (eq_attr "cpu" "ev4")
+       (and (eq_attr "type" "fdiv")
+           (eq_attr "opsize" "di")))
+  "ev4_ib1+ev4_fdiv,ev4_fdiv*57,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+; Traps don't consume or produce data.
+(define_insn_reservation "ev4_misc" 1
+  (and (eq_attr "cpu" "ev4")
+       (eq_attr "type" "misc"))
+  "ev4_ib1")
diff --git a/gcc/config/alpha/ev5.md b/gcc/config/alpha/ev5.md

new file mode 100644 (file)

index 0000000..f0dfbdf
--- /dev/null
+++ b/gcc/config/alpha/ev5.md
@@ -0,0 +1,180 @@
+;; Scheduling description for Alpha EV5.
+;;   Copyright (C) 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of GNU CC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;; EV5 has two asymetric integer units, E0 and E1, plus separate
+;; FP add and multiply units.
+
+(define_automaton "ev5_0,ev5_1")
+(define_cpu_unit "ev5_e0,ev5_e1,ev5_fa,ev5_fm" "ev5_0")
+(define_reservation "ev5_e01" "ev5_e0|ev5_e1")
+(define_reservation "ev5_fam" "ev5_fa|ev5_fm")
+(define_cpu_unit "ev5_imul" "ev5_0")
+(define_cpu_unit "ev5_fdiv" "ev5_1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev5_multi" 1
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "multi"))
+  "ev5_e0+ev5_e1+ev5_fa+ev5_fm")
+
+; Stores can only issue to E0, and may not issue with loads.
+; Model this with some fake units.
+
+(define_cpu_unit "ev5_l0,ev5_l1,ev5_st" "ev5_0")
+(define_reservation "ev5_ld" "ev5_l0|ev5_l1")
+(exclusion_set "ev5_l0,ev5_l1" "ev5_st")
+
+(define_insn_reservation "ev5_st" 1
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "ist,fst"))
+  "ev5_e0+ev5_st")
+
+; Loads from L0 complete in two cycles.  adjust_cost still factors
+; in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev5_ld" 1
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "ild,fld,ldsym"))
+  "ev5_e01+ev5_ld")
+
+; Integer branches slot only to E1.
+(define_insn_reservation "ev5_ibr" 1
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "ibr"))
+  "ev5_e1")
+
+(define_insn_reservation "ev5_jsr" 1
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "jsr"))
+  "ev5_e1")
+
+(define_insn_reservation "ev5_shiftmvi" 2
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "shift,mvi"))
+  "ev5_e0")
+
+(define_insn_reservation "ev5_cmov" 2
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "icmov"))
+  "ev5_e01")
+
+(define_insn_reservation "ev5_iadd" 1
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "iadd"))
+  "ev5_e01")
+
+(define_insn_reservation "ev5_ilogcmp" 1
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "ilog,icmp"))
+  "ev5_e01")
+
+; Conditional move and branch can issue the same cycle as the test.
+(define_bypass 0 "ev5_ilogcmp" "ev5_ibr,ev5_cmov" "if_test_bypass_p")
+
+; Multiplies use a non-piplined imul unit.  Also, "no insn can be issued
+; to E0 exactly two cycles before an integer multiply completes".
+
+(define_insn_reservation "ev5_imull" 8
+  (and (eq_attr "cpu" "ev5")
+       (and (eq_attr "type" "imul")
+           (eq_attr "opsize" "si")))
+  "ev5_e0+ev5_imul,ev5_imul*3,nothing,ev5_e0")
+
+(define_insn_reservation "ev5_imulq" 12
+  (and (eq_attr "cpu" "ev5")
+       (and (eq_attr "type" "imul")
+           (eq_attr "opsize" "di")))
+  "ev5_e0+ev5_imul,ev5_imul*7,nothing,ev5_e0")
+
+(define_insn_reservation "ev5_imulh" 14
+  (and (eq_attr "cpu" "ev5")
+       (and (eq_attr "type" "imul")
+           (eq_attr "opsize" "udi")))
+  "ev5_e0+ev5_imul,ev5_imul*7,nothing*3,ev5_e0")
+
+; The multiplier is unable to receive data from Ebox bypass paths.  The
+; instruction issues at the expected time, but its latency is increased
+; by the time it takes for the input data to become available to the
+; multiplier.  For example, an IMULL instruction issued one cycle later
+; than an ADDL instruction, which produced one of its operands, has a
+; latency of 10 (8 + 2).  If the IMULL instruction is issued two cycles
+; later than the ADDL instruction, the latency is 9 (8 + 1).
+;
+; Model this instead with increased latency on the input instruction.
+
+(define_bypass 3
+  "ev5_ld,ev5_shiftmvi,ev5_cmov,ev5_iadd,ev5_ilogcmp"
+  "ev5_imull,ev5_imulq,ev5_imulh")
+
+(define_bypass  9 "ev5_imull" "ev5_imull,ev5_imulq,ev5_imulh")
+(define_bypass 13 "ev5_imulq" "ev5_imull,ev5_imulq,ev5_imulh")
+(define_bypass 15 "ev5_imulh" "ev5_imull,ev5_imulq,ev5_imulh")
+
+; Similarly for the FPU we have two asymetric units.
+
+(define_insn_reservation "ev5_fadd" 4
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "fadd,fcmov"))
+  "ev5_fa")
+
+(define_insn_reservation "ev5_fbr" 1
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "fbr"))
+  "ev5_fa")
+
+(define_insn_reservation "ev5_fcpys" 4
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "fcpys"))
+  "ev5_fam")
+
+(define_insn_reservation "ev5_fmul" 4
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "fmul"))
+  "ev5_fm")
+
+; The floating point divider is not pipelined.  Also, "no insn can be issued
+; to FA exactly five before an fdiv insn completes".
+;
+; ??? Do not model this late reservation due to the enormously increased
+; size of the resulting DFA.
+;
+; ??? Putting ev5_fa and ev5_fdiv alone into the same automata produces
+; a DFA of acceptable size, but putting ev5_fm and ev5_fa into separate
+; automata produces incorrect results for insns that can choose one or
+; the other, i.e. ev5_fcpys.
+
+(define_insn_reservation "ev5_fdivsf" 15
+  (and (eq_attr "cpu" "ev5")
+       (and (eq_attr "type" "fdiv")
+           (eq_attr "opsize" "si")))
+  ; "ev5_fa+ev5_fdiv,ev5_fdiv*9,ev5_fa+ev5_fdiv,ev5_fdiv*4"
+  "ev5_fa+ev5_fdiv,ev5_fdiv*14")
+
+(define_insn_reservation "ev5_fdivdf" 22
+  (and (eq_attr "cpu" "ev5")
+       (and (eq_attr "type" "fdiv")
+           (eq_attr "opsize" "di")))
+  ; "ev5_fa+ev5_fdiv,ev5_fdiv*17,ev5_fa+ev5_fdiv,ev5_fdiv*4"
+  "ev5_fa+ev5_fdiv,ev5_fdiv*21")
+
+; Traps don't consume or produce data; rpcc is latency 2 if we ever add it.
+(define_insn_reservation "ev5_misc" 2
+  (and (eq_attr "cpu" "ev5")
+       (eq_attr "type" "misc"))
+  "ev5_e0")
diff --git a/gcc/config/alpha/ev6.md b/gcc/config/alpha/ev6.md

new file mode 100644 (file)

index 0000000..78bb51f
--- /dev/null
+++ b/gcc/config/alpha/ev6.md
@@ -0,0 +1,173 @@
+;; Scheduling description for Alpha EV6.
+;;   Copyright (C) 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of GNU CC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+; EV6 can issue 4 insns per clock.  It's out-of-order, so this isn't
+; expected to help over-much, but a precise description can be important
+; for software pipelining.
+;
+; EV6 has two symmetric pairs ("clusters") of two asymetric integer
+; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
+;
+; ??? The clusters have independent register files that are re-synced
+; every cycle.  Thus there is one additional cycle of latency between
+; insns issued on different clusters.  Possibly model that by duplicating
+; all EBOX insn_reservations that can issue to either cluster, increasing
+; all latencies by one, and adding bypasses within the cluster.
+;
+; ??? In addition, instruction order affects cluster issue.
+
+(define_automaton "ev6_0,ev6_1")
+(define_cpu_unit "ev6_u0,ev6_u1,ev6_l0,ev6_l1" "ev6_0")
+(define_reservation "ev6_u" "ev6_u0|ev6_u1")
+(define_reservation "ev6_l" "ev6_l0|ev6_l1")
+(define_reservation "ev6_ebox" "ev6_u|ev6_l")
+
+(define_cpu_unit "ev6_fa" "ev6_1")
+(define_cpu_unit "ev6_fm,ev6_fst0,ev6_fst1" "ev6_0")
+(define_reservation "ev6_fst" "ev6_fst0|ev6_fst1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev6_multi" 1
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "multi"))
+  "ev6_u0+ev6_u1+ev6_l0+ev6_l1+ev6_fa+ev6_fm+ev6_fst0+ev6_fst1")
+
+; Integer loads take at least 3 clocks, and only issue to lower units.
+; adjust_cost still factors in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev6_ild" 1
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "ild,ldsym"))
+  "ev6_l")
+
+(define_insn_reservation "ev6_ist" 1
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "ist"))
+  "ev6_l")
+
+; FP loads take at least 4 clocks.  adjust_cost still factors
+; in user-specified memory latency, so return 2 here.
+(define_insn_reservation "ev6_fld" 2
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "fld"))
+  "ev6_l")
+
+; The FPU communicates with memory and the integer register file
+; via two fp store units.  We need a slot in the fst immediately, and
+; a slot in LOW after the operand data is ready.  At which point the
+; data may be moved either to the store queue or the integer register
+; file and the insn retired.
+
+(define_insn_reservation "ev6_fst" 3
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "fst"))
+  "ev6_fst,nothing,ev6_l")
+
+; Arithmetic goes anywhere.
+(define_insn_reservation "ev6_arith" 1
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "iadd,ilog,icmp"))
+  "ev6_ebox")
+
+; Motion video insns also issue only to U0, and take three ticks.
+(define_insn_reservation "ev6_mvi" 3
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "mvi"))
+  "ev6_u0")
+
+; Shifts issue to upper units.
+(define_insn_reservation "ev6_shift" 1
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "shift"))
+  "ev6_u")
+
+; Multiplies issue only to U1, and all take 7 ticks.
+(define_insn_reservation "ev6_imul" 7
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "imul"))
+  "ev6_u1")
+
+; Conditional moves decompose into two independent primitives, each taking
+; one cycle.  Since ev6 is out-of-order, we can't see anything but two cycles.
+(define_insn_reservation "ev6_icmov" 2
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "icmov"))
+  "ev6_ebox,ev6_ebox")
+
+; Integer branches issue to upper units
+(define_insn_reservation "ev6_ibr" 1
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "ibr"))
+  "ev6_u")
+
+; Calls only issue to L0.
+(define_insn_reservation "ev6_jsr" 1
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "jsr"))
+  "ev6_l0")
+
+; Ftoi/itof only issue to lower pipes.
+(define_insn_reservation "ev6_itof" 3
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "itof"))
+  "ev6_l")
+
+(define_insn_reservation "ev6_ftoi" 3
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "ftoi"))
+  "ev6_fst,nothing,ev6_l")
+
+(define_insn_reservation "ev6_fmul" 4
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "fmul"))
+  "ev6_fm")
+
+(define_insn_reservation "ev6_fadd" 4
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "fadd,fcpys,fbr"))
+  "ev6_fa")
+
+(define_insn_reservation "ev6_fcmov" 8
+  (and (eq_attr "cpu" "ev6")
+       (eq_attr "type" "fcmov"))
+  "ev6_fa,nothing*3,ev6_fa")
+
+(define_insn_reservation "ev6_fdivsf" 12
+  (and (eq_attr "cpu" "ev6")
+       (and (eq_attr "type" "fdiv")
+           (eq_attr "opsize" "si")))
+  "ev6_fa*9")
+
+(define_insn_reservation "ev6_fdivdf" 15
+  (and (eq_attr "cpu" "ev6")
+       (and (eq_attr "type" "fdiv")
+           (eq_attr "opsize" "di")))
+  "ev6_fa*12")
+
+(define_insn_reservation "ev6_sqrtsf" 18
+  (and (eq_attr "cpu" "ev6")
+       (and (eq_attr "type" "fsqrt")
+           (eq_attr "opsize" "si")))
+  "ev6_fa*15")
+
+(define_insn_reservation "ev6_sqrtdf" 33
+  (and (eq_attr "cpu" "ev6")
+       (and (eq_attr "type" "fsqrt")
+           (eq_attr "opsize" "di")))
+  "ev6_fa*30")
author	Richard Henderson <rth@redhat.com>
	Sun, 5 May 2002 21:54:39 +0000 (14:54 -0700)
committer	Richard Henderson <rth@gcc.gnu.org>
	Sun, 5 May 2002 21:54:39 +0000 (14:54 -0700)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/alpha/alpha.c		patch \| blob \| blame \| history
gcc/config/alpha/alpha.md		patch \| blob \| blame \| history
gcc/config/alpha/ev4.md	[new file with mode: 0644]	patch \| blob
gcc/config/alpha/ev5.md	[new file with mode: 0644]	patch \| blob
gcc/config/alpha/ev6.md	[new file with mode: 0644]	patch \| blob