This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix 2 moderately simple sparc PRs



First, PR 6496, we currently kill performance on Sparc V9 and later
processors by emitting the "call then jump" peephole.  When a call
instruction is executed, the "call-return stack" is pushed with the
return address.  This call-return stack is used to predict the
instruction stream on returns from a procedure.  The peephole in
question modifies the return address so we return to the branch
destination, and this causes the prediction to miss every time this
peephole is used on such a processor.

Furthermore, the call-return stack only is popped on a hit, so we
guarentee every prediction in the call-return stack will be wasted
by modifying the return address in this way.

Next, PR 6500, the prefetch implementation had three problems:

1) It erroneously used "prefetch page" which prefetches virtual
   memory translations, not the data.  Just use the "prefetch for
   several reads/writes" function code for all non-zero locality
   values.

   The V9 architecture manually clearly states that this is to
   be used to prefetch TLB translations, not data into caches.

   Implementation wise, for some reason UltraSPARC-II treats
   prefetch page like a prefetch for "several reads", however
   UltraSPARC-III treats it as a NOP so us ignoring this
   UltraSSPARC-II'ism wrt. prefetch page is a really good idea.

2) Did not emit proper RTL for both 64-bit and 32-bit cases.  Using an
   expander to call the right gen_prefetch_{32,64}, we fix this.

3) PREFETCH_BLOCK and SIMULTANEOUS_PREFETCHES not defined and thus
   totally wrong for UltraSPARC.

To make sure this all actually worked I did a "-O2 -mcpu=v9 \
-mtune=ultrasparc-fprefetch-loop-arrays" bootstrap on
sparc-linux-gnu and a "-O2 -mcpu=ultrasparc -fprefetch-loop-arrays"
bootstrap on sparc64-linux-gnu.

Mark, ok for 3.1 branch?

2002-04-28  David S. Miller  <davem@redhat.com>

	PR target/6496
	* config/sparc/sparc.md (call followed by jump peephole): Do not
	match for TARGET_V9, kill TARGET_ARCH64 variants.

	PR target/6500
	* config/sparc/sparc.md (prefetch): Emit properly for 32-bit vs.
	64-bit TARGET_V9.  Do not use prefetch page, use prefetch for
	several {reads,writes} instead.
	* config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
	Define.

--- config/sparc/sparc.md.~1~	Sun Apr 21 18:12:24 2002
+++ config/sparc/sparc.md	Sun Apr 28 06:28:07 2002
@@ -9354,6 +9354,9 @@
    (set_attr "length" "2")])
 
 ;; Now peepholes to do a call followed by a jump.
+;; Do not match this on V9 and later processors, which have a call-return
+;; stack as this corrupts it and causes the code to run slower not faster.
+;; There are not TARGET_ARCH64 patterns because that implies TARGET_V9.
 
 (define_peephole
   [(parallel [(set (match_operand 0 "" "")
@@ -9361,7 +9364,8 @@
 			 (match_operand 2 "" "")))
 	      (clobber (reg:SI 15))])
    (set (pc) (label_ref (match_operand 3 "" "")))]
-  "short_branch (INSN_UID (insn), INSN_UID (operands[3]))
+  "! TARGET_V9
+   && short_branch (INSN_UID (insn), INSN_UID (operands[3]))
    && (USING_SJLJ_EXCEPTIONS || ! can_throw_internal (ins1))"
   "call\\t%a1, %2\\n\\tadd\\t%%o7, (%l3-.-4), %%o7")
 
@@ -9370,49 +9374,70 @@
 		    (match_operand 1 "" ""))
 	      (clobber (reg:SI 15))])
    (set (pc) (label_ref (match_operand 2 "" "")))]
-  "short_branch (INSN_UID (insn), INSN_UID (operands[2]))
-   && (USING_SJLJ_EXCEPTIONS || ! can_throw_internal (ins1))"
-  "call\\t%a0, %1\\n\\tadd\\t%%o7, (%l2-.-4), %%o7")
-
-(define_peephole
-  [(parallel [(set (match_operand 0 "" "")
-		   (call (mem:SI (match_operand:DI 1 "call_operand_address" "ps"))
-			 (match_operand 2 "" "")))
-	      (clobber (reg:DI 15))])
-   (set (pc) (label_ref (match_operand 3 "" "")))]
-  "TARGET_ARCH64
-   && short_branch (INSN_UID (insn), INSN_UID (operands[3]))
-   && (USING_SJLJ_EXCEPTIONS || ! can_throw_internal (ins1))"
-  "call\\t%a1, %2\\n\\tadd\\t%%o7, (%l3-.-4), %%o7")
-
-(define_peephole
-  [(parallel [(call (mem:SI (match_operand:DI 0 "call_operand_address" "ps"))
-		    (match_operand 1 "" ""))
-	      (clobber (reg:DI 15))])
-   (set (pc) (label_ref (match_operand 2 "" "")))]
-  "TARGET_ARCH64
+  "! TARGET_V9
    && short_branch (INSN_UID (insn), INSN_UID (operands[2]))
    && (USING_SJLJ_EXCEPTIONS || ! can_throw_internal (ins1))"
   "call\\t%a0, %1\\n\\tadd\\t%%o7, (%l2-.-4), %%o7")
 
-(define_insn "prefetch"
+;; ??? UltraSPARC-III note: A memory operation loading into the floating point register
+;; ??? file, if it hits the prefetch cache, has a chance to dual-issue with other memory
+;; ??? operations.  With DFA we might be able to model this, but it requires a lot of
+;; ??? state.
+(define_expand "prefetch"
+  [(match_operand 0 "address_operand" "")
+   (match_operand 1 "const_int_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_V9"
+  "
+{
+  if (TARGET_ARCH64)
+    emit_insn (gen_prefetch_64 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_prefetch_32 (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "prefetch_64"
   [(prefetch (match_operand:DI 0 "address_operand" "p")
 	     (match_operand:DI 1 "const_int_operand" "n")
 	     (match_operand:DI 2 "const_int_operand" "n"))]
-  "TARGET_V9"
+  ""
+{
+  static const char * const prefetch_instr[2][2] = {
+    {
+      "prefetch\\t[%a0], 1", /* no locality: prefetch for one read */
+      "prefetch\\t[%a0], 0", /* medium to high locality: prefetch for several reads */
+    },
+    {
+      "prefetch\\t[%a0], 3", /* no locality: prefetch for one write */
+      "prefetch\\t[%a0], 2", /* medium to high locality: prefetch for several writes */
+    }
+  };
+  int read_or_write = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
+
+  if (read_or_write != 0 && read_or_write != 1)
+    abort ();
+  if (locality < 0 || locality > 3)
+    abort ();
+  return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
+}
+  [(set_attr "type" "load")])
+
+(define_insn "prefetch_32"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  ""
 {
-  static const char * const prefetch_instr[2][4] = {
+  static const char * const prefetch_instr[2][2] = {
     {
       "prefetch\\t[%a0], 1", /* no locality: prefetch for one read */
-      "prefetch\\t[%a0], 0", /* medium locality: prefetch for several reads */
-      "prefetch\\t[%a0], 0", /* medium locality: prefetch for several reads */
-      "prefetch\\t[%a0], 4", /* high locality: prefetch page */
+      "prefetch\\t[%a0], 0", /* medium to high locality: prefetch for several reads */
     },
     {
       "prefetch\\t[%a0], 3", /* no locality: prefetch for one write */
-      "prefetch\\t[%a0], 2", /* medium locality: prefetch for several writes */
-      "prefetch\\t[%a0], 2", /* medium locality: prefetch for several writes */
-      "prefetch\\t[%a0], 4", /* high locality: prefetch page */
+      "prefetch\\t[%a0], 2", /* medium to high locality: prefetch for several writes */
     }
   };
   int read_or_write = INTVAL (operands[1]);
@@ -9422,7 +9447,7 @@
     abort ();
   if (locality < 0 || locality > 3)
     abort ();
-  return prefetch_instr [read_or_write][locality];
+  return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
 }
   [(set_attr "type" "load")])
 
--- config/sparc/sparc.h.~1~	Thu Apr 25 18:58:52 2002
+++ config/sparc/sparc.h	Sun Apr 28 06:13:53 2002
@@ -2679,6 +2679,13 @@ do {                                    
   case FLOAT:						\
   case FIX:						\
     return 19;
+
+#define PREFETCH_BLOCK \
+	((sparc_cpu == PROCESSOR_ULTRASPARC) ? 64 : 32)
+
+/* ??? UltraSPARC-III note: Can set this to 8 for ultra3.  */
+#define SIMULTANEOUS_PREFETCHES \
+	((sparc_cpu == PROCESSOR_ULTRASPARC) ? 2 : 3)
 
 /* Control the assembler format that we output.  */
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]