This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[committed] Tweak config/mips/5500.md for the VR5500A


This patch changes the multiplication handling in config/mips/5500.md
so that it performs better on the newer VR5500A.  It also changes the
latency of stores to zero, which seemed to give a consistent improvement
in benchmark results.

The original idea behind setting the store latency to zero was that most
store-load dependencies are conservative (being based on alias sets, etc.).
Few of them actually turn out to be real dependencies at run time.
It seemed that for most cases, assigning a nonzero cost to store-load
dependencies was basically saying "this store has a higher cost because
this load can't issue before it".  But the same is true for general
output and anti dependencies, which mips_adjust_cost says have a cost
of zero.  It seemed more consistent to give store-load dependencies
a cost of zero as well.

So anyway, I tried it, and it produced good results, so I decided to
keep it.  I don't really know how much the improvement has to do with
the original motivation though. ;)  The VR5500 is an aggressively
out-of-order target, so the compiler isn't really able to predict
execution profiles that accurately.

Tested on mips64vrel-elf and mips64el-linux-gnu.  Applied to head.

Richard


	* config/mips/5500.md (ir_vr55_store): Set latency to 0.
	(ir_vr55_hilo): Split into...
	(ir_vr55_mfhilo, ir_vr55_mthilo): ...these new reservations.
	(ir_vr55_imul_si, ir_vr55_imadd): Change latency to 5.
	(ir_vr55_imul_di): Change latency to 9.  Reserve vr55_mac for 4 cycles.
	Add various multiplication bypasses.
	* config/mips/mips.c (mips_rtx_costs): Adjust VR5500 costs for integer
	multiplication.

Index: config/mips/5500.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/mips/5500.md,v
retrieving revision 1.6
diff -u -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.6 5500.md
--- config/mips/5500.md	18 Mar 2004 23:47:31 -0000	1.6
+++ config/mips/5500.md	6 May 2004 08:08:33 -0000
@@ -31,7 +31,12 @@ (define_insn_reservation "ir_vr55_load" 
        (eq_attr "type" "load,fpload,fpidxload"))
   "vr55_mem")
 
-(define_insn_reservation "ir_vr55_store" 1
+(define_bypass 4
+  "ir_vr55_load"
+  "ir_vr55_mthilo,ir_vr55_imul_si,ir_vr55_imul_di,ir_vr55_imadd,
+   ir_vr55_idiv_si,ir_vr55_idiv_di")
+
+(define_insn_reservation "ir_vr55_store" 0
   (and (eq_attr "cpu" "r5500")
        (eq_attr "type" "store,fpstore,fpidxstore"))
   "vr55_mem")
@@ -49,32 +54,65 @@ (define_insn_reservation "ir_vr55_xfer" 
        (eq_attr "type" "xfer"))
   "vr55_dp0|vr55_dp1")
 
-(define_insn_reservation "ir_vr55_hilo" 2
-  (and (eq_attr "cpu" "r5500")
-       (eq_attr "type" "mthilo,mfhilo"))
-  "vr55_dp0|vr55_dp1")
-
 (define_insn_reservation "ir_vr55_arith" 1
   (and (eq_attr "cpu" "r5500")
        (eq_attr "type" "arith,shift,slt,clz,const,nop,trap"))
   "vr55_dp0|vr55_dp1")
 
-(define_insn_reservation "ir_vr55_imul_si" 3
+(define_bypass 2
+  "ir_vr55_arith"
+  "ir_vr55_mthilo,ir_vr55_imul_si,ir_vr55_imul_di,ir_vr55_imadd,
+   ir_vr55_idiv_si,ir_vr55_idiv_di")
+
+(define_insn_reservation "ir_vr55_mthilo" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "mthilo"))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_mfhilo" 5
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "mfhilo"))
+  "vr55_mac")
+
+;; The default latency is for the GPR result of a mul.  Bypasses handle the
+;; latency of {mul,mult}->{mfhi,mflo}.
+(define_insn_reservation "ir_vr55_imul_si" 5
   (and (eq_attr "cpu" "r5500")
        (and (eq_attr "type" "imul")
             (eq_attr "mode" "SI")))
   "vr55_mac")
 
-(define_insn_reservation "ir_vr55_imul_di" 4
+;; The default latency is for pre-reload scheduling and handles the case
+;; where a pseudo destination will be stored in a GPR (as it usually is).
+;; The delay includes the latency of the dmult itself and the anticipated
+;; mflo or mfhi.
+;;
+;; Once the mflo or mfhi has been created, bypasses handle the latency
+;; between it and the dmult.
+(define_insn_reservation "ir_vr55_imul_di" 9
   (and (eq_attr "cpu" "r5500")
        (and (eq_attr "type" "imul")
             (eq_attr "mode" "DI")))
-  "vr55_mac")
+  "vr55_mac*4")
 
-(define_insn_reservation "ir_vr55_imadd_si" 3
+;; The default latency is as for ir_vr55_imul_si.
+(define_insn_reservation "ir_vr55_imadd" 5
   (and (eq_attr "cpu" "r5500")
-       (eq_attr "type" "imul"))
+       (eq_attr "type" "imadd"))
   "vr55_mac")
+
+(define_bypass 1
+  "ir_vr55_imul_si,ir_vr55_imadd"
+  "ir_vr55_imadd"
+  "mips_linked_madd_p")
+
+(define_bypass 2
+  "ir_vr55_imul_si,ir_vr55_imadd"
+  "ir_vr55_mfhilo")
+
+(define_bypass 4
+  "ir_vr55_imul_di"
+  "ir_vr55_mfhilo")
 
 ;; Divide algorithm is early out with best latency of 7 pcycles.
 ;; Use worst case for scheduling purposes.
Index: config/mips/mips.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/mips/mips.c,v
retrieving revision 1.409
diff -u -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.409 mips.c
--- config/mips/mips.c	4 May 2004 23:57:42 -0000	1.409
+++ config/mips/mips.c	6 May 2004 08:08:35 -0000
@@ -2347,9 +2347,9 @@ mips_rtx_costs (rtx x, int code, int out
         *total = COSTS_N_INSNS (12);
       else if (TUNE_MIPS3900)
         *total = COSTS_N_INSNS (2);
-      else if (TUNE_MIPS5400 || TUNE_MIPS5500 || TUNE_SB1)
-        *total = COSTS_N_INSNS ((mode == DImode) ? 4 : 3);
-      else if (TUNE_MIPS7000)
+      else if (TUNE_MIPS5400 || TUNE_SB1)
+        *total = COSTS_N_INSNS (mode == DImode ? 4 : 3);
+      else if (TUNE_MIPS5500 || TUNE_MIPS7000)
         *total = COSTS_N_INSNS (mode == DImode ? 9 : 5);
       else if (TUNE_MIPS9000)
         *total = COSTS_N_INSNS (mode == DImode ? 8 : 3);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]