This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, committed] POWER4/POWER5 scheduling tweak


	After recent scheduler changes, removing the delay in the POWER4
and POWER5 scheduler descriptions between slot 3 and 4 group dispatch and
instruction issue shows a small but consistent performance benefit.

David


	* config/rs6000/power4.md: Remove delay between dispatch and issue
	associated with dispatch group slots 3 and 4.
	* config/rs6000/power5.md: Same.

Index: power4.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/power4.md,v
retrieving revision 1.19
diff -c -p -u -r1.19 power4.md
--- power4.md	10 Nov 2004 21:57:13 -0000	1.19
+++ power4.md	22 Nov 2004 17:13:39 -0000
@@ -38,38 +38,37 @@
 (define_reservation "lsq_power4"
 		    "(du1_power4,lsu1_power4)\
 		    |(du2_power4,lsu2_power4)\
-		    |(du3_power4,nothing,lsu2_power4)\
-		    |(du4_power4,nothing,lsu1_power4)")
+		    |(du3_power4,lsu2_power4)\
+		    |(du4_power4,lsu1_power4)")
 
 (define_reservation "lsuq_power4"
 		    "(du1_power4+du2_power4,lsu1_power4+iu2_power4)\
 		    |(du2_power4+du3_power4,lsu2_power4+iu2_power4)\
 		    |(du3_power4+du4_power4,lsu2_power4+iu1_power4)")
-;		    |(du2_power4+du3_power4,nothing,lsu2_power4,iu2_power4)
 
 (define_reservation "iq_power4"
 		    "(du1_power4,iu1_power4)\
 		    |(du2_power4,iu2_power4)\
-		    |(du3_power4,nothing,iu2_power4)\
-		    |(du4_power4,nothing,iu1_power4)")
+		    |(du3_power4,iu2_power4)\
+		    |(du4_power4,iu1_power4)")
 
 (define_reservation "fpq_power4"
 		    "(du1_power4,fpu1_power4)\
 		    |(du2_power4,fpu2_power4)\
-		    |(du3_power4,nothing,fpu2_power4)\
-		    |(du4_power4,nothing,fpu1_power4)")
+		    |(du3_power4,fpu2_power4)\
+		    |(du4_power4,fpu1_power4)")
 
 (define_reservation "vq_power4"
 		    "(du1_power4,vec_power4)\
 		    |(du2_power4,vec_power4)\
-		    |(du3_power4,nothing,vec_power4)\
-		    |(du4_power4,nothing,vec_power4)")
+		    |(du3_power4,vec_power4)\
+		    |(du4_power4,vec_power4)")
 
 (define_reservation "vpq_power4"
 		    "(du1_power4,vecperm_power4)\
 		    |(du2_power4,vecperm_power4)\
-		    |(du3_power4,nothing,vecperm_power4)\
-		    |(du4_power4,nothing,vecperm_power4)")
+		    |(du3_power4,vecperm_power4)\
+		    |(du4_power4,vecperm_power4)")
 
 
 ; Dispatch slots are allocated in order conforming to program order.
@@ -135,8 +134,8 @@
        (eq_attr "cpu" "power4"))
   "(du1_power4,lsu1_power4,iu1_power4)\
   |(du2_power4,lsu2_power4,iu2_power4)\
-  |(du3_power4,lsu2_power4,nothing,iu2_power4)\
-  |(du4_power4,lsu1_power4,nothing,iu1_power4)")
+  |(du3_power4,lsu2_power4,iu2_power4)\
+  |(du4_power4,lsu1_power4,iu1_power4)")
 
 (define_insn_reservation "power4-store-update" 12
   (and (eq_attr "type" "store_u")
@@ -157,8 +156,8 @@
        (eq_attr "cpu" "power4"))
   "(du1_power4,lsu1_power4,fpu1_power4)\
   |(du2_power4,lsu2_power4,fpu2_power4)\
-  |(du3_power4,lsu2_power4,nothing,fpu2_power4)\
-  |(du4_power4,lsu1_power4,nothing,fpu1_power4)")
+  |(du3_power4,lsu2_power4,fpu2_power4)\
+  |(du4_power4,lsu1_power4,fpu1_power4)")
 
 (define_insn_reservation "power4-fpstore-update" 12
   (and (eq_attr "type" "fpstore_u,fpstore_ux")
@@ -166,15 +165,14 @@
   "(du1_power4+du2_power4,lsu1_power4+iu2_power4,fpu1_power4)\
   |(du2_power4+du3_power4,lsu2_power4+iu2_power4,fpu2_power4)\
   |(du3_power4+du4_power4,lsu2_power4+iu1_power4,fpu2_power4)")
-;  |(du3_power4+du4_power4,nothing,lsu2_power4+iu1_power4,fpu2_power4)")
 
 (define_insn_reservation "power4-vecstore" 12
   (and (eq_attr "type" "vecstore")
        (eq_attr "cpu" "power4"))
   "(du1_power4,lsu1_power4,vec_power4)\
   |(du2_power4,lsu2_power4,vec_power4)\
-  |(du3_power4,lsu2_power4,nothing,vec_power4)\
-  |(du4_power4,lsu1_power4,nothing,vec_power4)")
+  |(du3_power4,lsu2_power4,vec_power4)\
+  |(du4_power4,lsu1_power4,vec_power4)")
 
 
 ; Integer latency is 2 cycles
@@ -220,7 +218,7 @@
        (eq_attr "cpu" "power4"))
   "(du1_power4+du2_power4,iu1_power4,iu2_power4)\
   |(du2_power4+du3_power4,iu2_power4,iu2_power4)\
-  |(du3_power4+du4_power4,nothing,iu2_power4,iu1_power4)")
+  |(du3_power4+du4_power4,iu2_power4,iu1_power4)")
 
 (define_bypass 4 "power4-compare" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
 
@@ -230,7 +228,6 @@
   "(du1_power4+du2_power4,iu1_power4*6,iu2_power4)\
   |(du2_power4+du3_power4,iu2_power4*6,iu2_power4)\
   |(du3_power4+du4_power4,iu2_power4*6,iu1_power4)")
-;  |(du3_power4+du4_power4,nothing,iu2_power4*6,iu1_power4)")
 
 (define_bypass 10 "power4-lmul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
 
@@ -240,7 +237,6 @@
   "(du1_power4+du2_power4,iu1_power4*4,iu2_power4)\
   |(du2_power4+du3_power4,iu2_power4*4,iu2_power4)\
   |(du3_power4+du4_power4,iu2_power4*4,iu1_power4)")
-;  |(du3_power4+du4_power4,nothing,iu2_power4*4,iu1_power4)")
 
 (define_bypass 8 "power4-imul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
 
@@ -251,8 +247,6 @@
   |(du2_power4,iu2_power4*6)\
   |(du3_power4,iu2_power4*6)\
   |(du4_power4,iu1_power4*6)")
-;  |(du3_power4,nothing,iu2_power4*6)\
-;  |(du4_power4,nothing,iu1_power4*6)")
 
 (define_insn_reservation "power4-imul" 5
   (and (eq_attr "type" "imul")
@@ -261,8 +255,6 @@
   |(du2_power4,iu2_power4*4)\
   |(du3_power4,iu2_power4*4)\
   |(du4_power4,iu1_power4*4)")
-;  |(du3_power4,nothing,iu2_power4*4)\
-;  |(du4_power4,nothing,iu1_power4*4)")
 
 (define_insn_reservation "power4-imul3" 4
   (and (eq_attr "type" "imul2,imul3")
@@ -271,8 +263,6 @@
   |(du2_power4,iu2_power4*3)\
   |(du3_power4,iu2_power4*3)\
   |(du4_power4,iu1_power4*3)")
-;  |(du3_power4,nothing,iu2_power4*3)\
-;  |(du4_power4,nothing,iu1_power4*3)")
 
 
 ; SPR move only executes in first IU.
@@ -355,8 +345,6 @@
   |(du2_power4,fpu2_power4*28)\
   |(du3_power4,fpu2_power4*28)\
   |(du4_power4,fpu1_power4*28)")
-;  |(du3_power4,nothing,fpu2_power4*28)\
-;  |(du4_power4,nothing,fpu1_power4*28)")
 
 (define_insn_reservation "power4-sqrt" 40
   (and (eq_attr "type" "ssqrt,dsqrt")
@@ -365,8 +353,6 @@
   |(du2_power4,fpu2_power4*35)\
   |(du3_power4,fpu2_power4*35)\
   |(du4_power4,fpu2_power4*35)")
-;  |(du3_power4,nothing,fpu2_power4*35)\
-;  |(du4_power4,nothing,fpu2_power4*35)")
 
 
 ; VMX
Index: power5.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/power5.md,v
retrieving revision 1.4
diff -c -p -u -r1.4 power5.md
--- power5.md	10 Nov 2004 21:57:13 -0000	1.4
+++ power5.md	22 Nov 2004 17:13:39 -0000
@@ -37,20 +37,20 @@
 (define_reservation "lsq_power5"
 		    "(du1_power5,lsu1_power5)\
 		    |(du2_power5,lsu2_power5)\
-		    |(du3_power5,nothing,lsu2_power5)\
-		    |(du4_power5,nothing,lsu1_power5)")
+		    |(du3_power5,lsu2_power5)\
+		    |(du4_power5,lsu1_power5)")
 
 (define_reservation "iq_power5"
 		    "(du1_power5,iu1_power5)\
 		    |(du2_power5,iu2_power5)\
-		    |(du3_power5,nothing,iu2_power5)\
-		    |(du4_power5,nothing,iu1_power5)")
+		    |(du3_power5,iu2_power5)\
+		    |(du4_power5,iu1_power5)")
 
 (define_reservation "fpq_power5"
 		    "(du1_power5,fpu1_power5)\
 		    |(du2_power5,fpu2_power5)\
-		    |(du3_power5,nothing,fpu2_power5)\
-		    |(du4_power5,nothing,fpu1_power5)")
+		    |(du3_power5,fpu2_power5)\
+		    |(du4_power5,fpu1_power5)")
 
 ; Dispatch slots are allocated in order conforming to program order.
 (absence_set "du1_power5" "du2_power5,du3_power5,du4_power5,du5_power5")
@@ -108,8 +108,8 @@
        (eq_attr "cpu" "power5"))
   "(du1_power5,lsu1_power5,iu1_power5)\
   |(du2_power5,lsu2_power5,iu2_power5)\
-  |(du3_power5,lsu2_power5,nothing,iu2_power5)\
-  |(du4_power5,lsu1_power5,nothing,iu1_power5)")
+  |(du3_power5,lsu2_power5,iu2_power5)\
+  |(du4_power5,lsu1_power5,iu1_power5)")
 
 (define_insn_reservation "power5-store-update" 12
   (and (eq_attr "type" "store_u")
@@ -127,8 +127,8 @@
        (eq_attr "cpu" "power5"))
   "(du1_power5,lsu1_power5,fpu1_power5)\
   |(du2_power5,lsu2_power5,fpu2_power5)\
-  |(du3_power5,lsu2_power5,nothing,fpu2_power5)\
-  |(du4_power5,lsu1_power5,nothing,fpu1_power5)")
+  |(du3_power5,lsu2_power5,fpu2_power5)\
+  |(du4_power5,lsu1_power5,fpu1_power5)")
 
 (define_insn_reservation "power5-fpstore-update" 12
   (and (eq_attr "type" "fpstore_u,fpstore_ux")
@@ -200,8 +200,6 @@
   |(du2_power5,iu2_power5*6)\
   |(du3_power5,iu2_power5*6)\
   |(du4_power5,iu1_power5*6)")
-;  |(du3_power5,nothing,iu2_power5*6)\
-;  |(du4_power5,nothing,iu1_power5*6)")
 
 (define_insn_reservation "power5-imul" 5
   (and (eq_attr "type" "imul")
@@ -210,8 +208,6 @@
   |(du2_power5,iu2_power5*4)\
   |(du3_power5,iu2_power5*4)\
   |(du4_power5,iu1_power5*4)")
-;  |(du3_power5,nothing,iu2_power5*4)\
-;  |(du4_power5,nothing,iu1_power5*4)")
 
 (define_insn_reservation "power5-imul3" 4
   (and (eq_attr "type" "imul2,imul3")
@@ -220,8 +216,6 @@
   |(du2_power5,iu2_power5*3)\
   |(du3_power5,iu2_power5*3)\
   |(du4_power5,iu1_power5*3)")
-;  |(du3_power5,nothing,iu2_power5*3)\
-;  |(du4_power5,nothing,iu1_power5*3)")
 
 
 ; SPR move only executes in first IU.
@@ -304,8 +298,6 @@
   |(du2_power5,fpu2_power5*28)\
   |(du3_power5,fpu2_power5*28)\
   |(du4_power5,fpu1_power5*28)")
-;  |(du3_power5,nothing,fpu2_power5*28)\
-;  |(du4_power5,nothing,fpu1_power5*28)")
 
 (define_insn_reservation "power5-sqrt" 40
   (and (eq_attr "type" "ssqrt,dsqrt")
@@ -314,6 +306,4 @@
   |(du2_power5,fpu2_power5*35)\
   |(du3_power5,fpu2_power5*35)\
   |(du4_power5,fpu2_power5*35)")
-;  |(du3_power5,nothing,fpu2_power5*35)\
-;  |(du4_power5,nothing,fpu2_power5*35)")
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]