[PATCH]: GCC Scheduler support for R10000 on MIPS

Kumba kumba@gentoo.org
Sat Aug 2 04:29:00 GMT 2008


Kumba wrote:
> 
> Hi all,
> 
> Included is a patch that adds support to GCC for scheduling on the 
> R10000 MIPS processor family, including the R12000 & R14000 (which I've 
> tested on for quite some time).  R16000 should similarly function quite 
> well with this patch, as they're all just die shrinks of the original 
> R10000.
> 
> It adds the 'r10000' through 'r16000' params (which all mean the same 
> thing) to the -march and -mtune arguments.
> 
> I've sent this up before, but I believe it just got lost in the 
> shuffle.  Is there anything else that I can do to assist in getting this 
> into GCC?


My bad, I sent the wrong patch up.  That one was against gcc-4.3.1.  Here's the 
version against gcc-trunk.

gcc/
	* config/mips/10000.md: Add R10000 scheduler
	* config/mips/mips.c: Add r1x000 params & costs
	* config/mips/mips.h: Add constants
	* config/mips/mips.md: Add r1x000 params & incl 10000.md


diff -Naurp gcc.orig/gcc/config/mips/10000.md gcc/gcc/config/mips/10000.md
--- gcc.orig/gcc/config/mips/10000.md	1969-12-31 19:00:00.000000000 -0500
+++ gcc/gcc/config/mips/10000.md	2008-08-01 22:33:46.000000000 -0400
@@ -0,0 +1,246 @@
+;; VR1x000 pipeline description.
+;;   Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 2, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to the
+;; Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+;; MA 02110-1301, USA.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+
+
+;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
+;; until specific tuning for each is added
+
+
+;; R10000 has int queue, fp queue, address queue
+(define_automaton "r10k_int, r10k_fp, r10k_addr")
+
+;; R10000 has 2 integer ALUs, fp-adder and fp-multiplier, load/store
+(define_cpu_unit "r10k_alu1" "r10k_int")
+(define_cpu_unit "r10k_alu2" "r10k_int")
+(define_cpu_unit "r10k_fpadd" "r10k_fp")
+(define_cpu_unit "r10k_fpmpy" "r10k_fp")
+(define_cpu_unit "r10k_loadstore" "r10k_addr")
+
+;; R10000 has separate fp-div and fp-sqrt units as well and these can
+;; execute in parallel, however their issue & completion logic is shared
+;; by the fp-multiplier
+(define_cpu_unit "r10k_fpdiv" "r10k_fp")
+(define_cpu_unit "r10k_fpsqrt" "r10k_fp")
+
+
+
+
+;; loader
+(define_insn_reservation "r10k_load" 2
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "load,prefetch,prefetchx"))
+  "r10k_loadstore")
+
+(define_insn_reservation "r10k_store" 0
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "r10k_loadstore")
+
+(define_insn_reservation "r10k_fpload" 3
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r10k_loadstore")
+
+
+
+
+;; Integer add/sub + logic ops, and mf/mt hi/lo can be done by alu1 or alu2
+;; Miscellaneous arith goes here too (this is a guess)
+(define_insn_reservation "r10k_arith" 1
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "arith,mfhilo,mthilo,slt,clz,const,nop,trap"))
+  "r10k_alu1 | r10k_alu2")
+
+
+
+
+;; ALU1 handles shifts, branch eval, and condmove
+;;
+;; Brancher is separate, but part of ALU1, but can only
+;; do one branch per cycle (needs implementing??)
+;;
+;; jump, call - unsure if brancher handles these too (added for now)
+(define_insn_reservation "r10k_shift" 1
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "shift,branch,jump,call"))
+  "r10k_alu1")
+
+(define_insn_reservation "r10k_int_cmove" 1
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "condmove")
+            (eq_attr "mode" "SI,DI")))
+  "r10k_alu1")
+
+
+
+
+;; Coprocessor Moves
+;; mtc1/dmtc1 are handled by ALU1
+;; mfc1/dmfc1 are handled by the fp-multiplier
+(define_insn_reservation "r10k_mt_xfer" 3
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "mtc"))
+  "r10k_alu1")
+
+(define_insn_reservation "r10k_mf_xfer" 2
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "mfc"))
+  "r10k_fpmpy")
+
+
+
+
+;; Only ALU2 does int multiplications and divisions
+;; R10K allows an int insn using register Lo to be issued
+;; one cycle earlier than an insn using register Hi for
+;; the insns below, however, we skip on doing this
+;; for now until correct usage of lo_operand() is figured
+;; out.
+;;
+;; Divides keep ALU2 busy, but this isn't expressed here (I think...?)
+(define_insn_reservation "r10k_imul_single" 6
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+            (eq_attr "mode" "SI")))
+  "r10k_alu2 * 6")
+
+(define_insn_reservation "r10k_imul_double" 10
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+            (eq_attr "mode" "DI")))
+  "r10k_alu2 * 10")
+
+(define_insn_reservation "r10k_idiv_single" 35
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "r10k_alu2 * 35")
+
+(define_insn_reservation "r10k_idiv_double" 67
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "r10k_alu2 * 67")
+
+
+
+
+;; FP add/sub, mul, abs value, neg, comp, & moves
+(define_insn_reservation "r10k_fp_miscadd" 2
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "fadd,fabs,fneg,fcmp"))
+  "r10k_fpadd")
+
+(define_insn_reservation "r10k_fp_miscmul" 2
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "fmul,fmove"))
+  "r10k_fpmpy")
+
+(define_insn_reservation "r10k_fp_cmove" 2
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "condmove")
+            (eq_attr "mode" "SF,DF")))
+  "r10k_fpmpy")
+
+
+
+
+;; fcvt.s.[wl] has latency 4, repeat 2
+;; All other fcvt have latency 2, repeat 1
+(define_insn_reservation "r10k_fcvt_single" 4
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "cnv_mode" "I2S")))
+  "r10k_fpadd * 2")
+
+(define_insn_reservation "r10k_fcvt_other" 2
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "cnv_mode" "!I2S")))
+  "r10k_fpadd")
+
+
+
+
+;; fmadd -  Runs through fp-adder first, then fp-multiplier
+;;
+;; The latency for fmadd is 2 cycles if the result is used
+;; by another fmadd instruction
+(define_insn_reservation "r10k_fmadd" 4
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "fmadd"))
+  "r10k_fpadd, r10k_fpmpy")
+
+(define_bypass 2 "r10k_fmadd" "r10k_fmadd")
+
+
+
+
+;; fp Divisions & square roots
+(define_insn_reservation "r10k_fdiv_single" 12
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "fdiv,frdiv")
+            (eq_attr "mode" "SF")))
+  "r10k_fpdiv * 14")
+
+(define_insn_reservation "r10k_fdiv_double" 19
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "fdiv,frdiv")
+            (eq_attr "mode" "DF")))
+  "r10k_fpdiv * 21")
+
+(define_insn_reservation "r10k_fsqrt_single" 18
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "fsqrt")
+            (eq_attr "mode" "SF")))
+  "r10k_fpsqrt * 20")
+
+(define_insn_reservation "r10k_fsqrt_double" 33
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "fsqrt")
+            (eq_attr "mode" "DF")))
+  "r10k_fpsqrt * 35")
+
+(define_insn_reservation "r10k_frsqrt_single" 30
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "SF")))
+  "r10k_fpsqrt * 20")
+
+(define_insn_reservation "r10k_frsqrt_double" 52
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "DF")))
+  "r10k_fpsqrt * 35")
+
+
+
+
+;; Unknown/multi (this is a guess)
+(define_insn_reservation "r10k_unknown" 1
+  (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
+       (eq_attr "type" "unknown,multi"))
+  "r10k_alu1 + r10k_alu2")
+
diff -Naurp gcc.orig/gcc/config/mips/mips.c gcc/gcc/config/mips/mips.c
--- gcc.orig/gcc/config/mips/mips.c	2008-08-01 21:55:41.000000000 -0400
+++ gcc/gcc/config/mips/mips.c	2008-08-01 22:33:46.000000000 -0400
@@ -593,6 +593,10 @@ static const struct mips_cpu_info mips_c

    /* MIPS IV processors. */
    { "r8000", PROCESSOR_R8000, 4, 0 },
+  { "r10000", PROCESSOR_R10000, 4, 0 },
+  { "r12000", PROCESSOR_R12000, 4, 0 },
+  { "r14000", PROCESSOR_R14000, 4, 0 },
+  { "r16000", PROCESSOR_R16000, 4, 0 },
    { "vr5000", PROCESSOR_R5000, 4, 0 },
    { "vr5400", PROCESSOR_R5400, 4, 0 },
    { "vr5500", PROCESSOR_R5500, 4, PTF_AVOID_BRANCHLIKELY },
@@ -988,6 +992,58 @@ static const struct mips_rtx_cost_data m
  		     1,           /* branch_cost */
  		     4            /* memory_latency */
    },
+  { /* R10000 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (2),            /* fp_mult_sf */
+    COSTS_N_INSNS (2),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (6),            /* int_mult_si */
+    COSTS_N_INSNS (10),           /* int_mult_di */
+    COSTS_N_INSNS (35),           /* int_div_si */
+    COSTS_N_INSNS (67),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R12000 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (2),            /* fp_mult_sf */
+    COSTS_N_INSNS (2),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (6),            /* int_mult_si */
+    COSTS_N_INSNS (10),           /* int_mult_di */
+    COSTS_N_INSNS (35),           /* int_div_si */
+    COSTS_N_INSNS (67),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R14000 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (2),            /* fp_mult_sf */
+    COSTS_N_INSNS (2),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (6),            /* int_mult_si */
+    COSTS_N_INSNS (10),           /* int_mult_di */
+    COSTS_N_INSNS (35),           /* int_div_si */
+    COSTS_N_INSNS (67),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R16000 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (2),            /* fp_mult_sf */
+    COSTS_N_INSNS (2),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (6),            /* int_mult_si */
+    COSTS_N_INSNS (10),           /* int_mult_di */
+    COSTS_N_INSNS (35),           /* int_div_si */
+    COSTS_N_INSNS (67),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
    { /* SB1 */
      /* These costs are the same as the SB-1A below.  */
      COSTS_N_INSNS (4),            /* fp_add */
@@ -9872,7 +9928,13 @@ mips_issue_rate (void)
  	 but in reality only a maximum of 3 insns can be issued as
  	 floating-point loads and stores also require a slot in the
  	 AGEN pipe.  */
-     return 4;
+    case PROCESSOR_R10000:
+    case PROCESSOR_R12000:
+    case PROCESSOR_R14000:
+    case PROCESSOR_R16000:
+      /* All R10K Processors are quad-issue (being the first MIPS
+         processors to support this feature). */
+      return 4;

      case PROCESSOR_20KC:
      case PROCESSOR_R4130:
diff -Naurp gcc.orig/gcc/config/mips/mips.h gcc/gcc/config/mips/mips.h
--- gcc.orig/gcc/config/mips/mips.h	2008-08-01 21:55:41.000000000 -0400
+++ gcc/gcc/config/mips/mips.h	2008-08-01 22:33:46.000000000 -0400
@@ -66,6 +66,10 @@ enum processor_type {
    PROCESSOR_R7000,
    PROCESSOR_R8000,
    PROCESSOR_R9000,
+  PROCESSOR_R10000,
+  PROCESSOR_R12000,
+  PROCESSOR_R14000,
+  PROCESSOR_R16000,
    PROCESSOR_SB1,
    PROCESSOR_SB1A,
    PROCESSOR_SR71000,
@@ -241,6 +245,10 @@ enum mips_code_readable_setting {
  #define TARGET_MIPS5500             (mips_arch == PROCESSOR_R5500)
  #define TARGET_MIPS7000             (mips_arch == PROCESSOR_R7000)
  #define TARGET_MIPS9000             (mips_arch == PROCESSOR_R9000)
+#define TARGET_MIPS10000            (mips_arch == PROCESSOR_R10000)
+#define TARGET_MIPS12000            (mips_arch == PROCESSOR_R12000)
+#define TARGET_MIPS14000            (mips_arch == PROCESSOR_R14000)
+#define TARGET_MIPS16000            (mips_arch == PROCESSOR_R16000)
  #define TARGET_SB1                  (mips_arch == PROCESSOR_SB1		\
  				     || mips_arch == PROCESSOR_SB1A)
  #define TARGET_SR71K                (mips_arch == PROCESSOR_SR71000)
@@ -267,6 +275,10 @@ enum mips_code_readable_setting {
  #define TUNE_MIPS6000               (mips_tune == PROCESSOR_R6000)
  #define TUNE_MIPS7000               (mips_tune == PROCESSOR_R7000)
  #define TUNE_MIPS9000               (mips_tune == PROCESSOR_R9000)
+#define TUNE_MIPS10000              (mips_tune == PROCESSOR_R10000)
+#define TUNE_MIPS12000              (mips_tune == PROCESSOR_R12000)
+#define TUNE_MIPS14000              (mips_tune == PROCESSOR_R14000)
+#define TUNE_MIPS16000              (mips_tune == PROCESSOR_R16000)
  #define TUNE_SB1                    (mips_tune == PROCESSOR_SB1		\
  				     || mips_tune == PROCESSOR_SB1A)

diff -Naurp gcc.orig/gcc/config/mips/mips.md gcc/gcc/config/mips/mips.md
--- gcc.orig/gcc/config/mips/mips.md	2008-08-01 21:55:41.000000000 -0400
+++ gcc/gcc/config/mips/mips.md	2008-08-01 23:05:01.000000000 -0400
@@ -553,7 +553,7 @@
  ;; Attribute describing the processor.  This attribute must match exactly
  ;; with the processor_type enumeration in mips.h.
  (define_attr "cpu"
- 
"r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,loongson_2e,loongson_2f,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000,xlr"
+ 
"r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,loongson_2e,loongson_2f,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,r10000,r12000,r14000,r16000,sb1,sb1a,sr71000,xlr"
    (const (symbol_ref "mips_tune")))

  ;; The type of hardware hazard associated with this instruction.
@@ -903,6 +903,7 @@
  (include "6000.md")
  (include "7000.md")
  (include "9000.md")
+(include "10000.md")
  (include "sb1.md")
  (include "sr71k.md")
  (include "xlr.md")







-- 
Joshua Kinard
Gentoo/MIPS
kumba@gentoo.org

"The past tempts us, the present confuses us, the future frightens us.  And our 
lives slip away, moment by moment, lost in that vast, terrible in-between."

--Emperor Turhan, Centauri Republic



More information about the Gcc-patches mailing list