This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Fix 2 moderately simple sparc PRs
- From: "David S. Miller" <davem at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: mark at codesourcery dot com
- Date: Sun, 28 Apr 2002 07:14:09 -0700 (PDT)
- Subject: Fix 2 moderately simple sparc PRs
First, PR 6496, we currently kill performance on Sparc V9 and later
processors by emitting the "call then jump" peephole. When a call
instruction is executed, the "call-return stack" is pushed with the
return address. This call-return stack is used to predict the
instruction stream on returns from a procedure. The peephole in
question modifies the return address so we return to the branch
destination, and this causes the prediction to miss every time this
peephole is used on such a processor.
Furthermore, the call-return stack only is popped on a hit, so we
guarentee every prediction in the call-return stack will be wasted
by modifying the return address in this way.
Next, PR 6500, the prefetch implementation had three problems:
1) It erroneously used "prefetch page" which prefetches virtual
memory translations, not the data. Just use the "prefetch for
several reads/writes" function code for all non-zero locality
values.
The V9 architecture manually clearly states that this is to
be used to prefetch TLB translations, not data into caches.
Implementation wise, for some reason UltraSPARC-II treats
prefetch page like a prefetch for "several reads", however
UltraSPARC-III treats it as a NOP so us ignoring this
UltraSSPARC-II'ism wrt. prefetch page is a really good idea.
2) Did not emit proper RTL for both 64-bit and 32-bit cases. Using an
expander to call the right gen_prefetch_{32,64}, we fix this.
3) PREFETCH_BLOCK and SIMULTANEOUS_PREFETCHES not defined and thus
totally wrong for UltraSPARC.
To make sure this all actually worked I did a "-O2 -mcpu=v9 \
-mtune=ultrasparc-fprefetch-loop-arrays" bootstrap on
sparc-linux-gnu and a "-O2 -mcpu=ultrasparc -fprefetch-loop-arrays"
bootstrap on sparc64-linux-gnu.
Mark, ok for 3.1 branch?
2002-04-28 David S. Miller <davem@redhat.com>
PR target/6496
* config/sparc/sparc.md (call followed by jump peephole): Do not
match for TARGET_V9, kill TARGET_ARCH64 variants.
PR target/6500
* config/sparc/sparc.md (prefetch): Emit properly for 32-bit vs.
64-bit TARGET_V9. Do not use prefetch page, use prefetch for
several {reads,writes} instead.
* config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
Define.
--- config/sparc/sparc.md.~1~ Sun Apr 21 18:12:24 2002
+++ config/sparc/sparc.md Sun Apr 28 06:28:07 2002
@@ -9354,6 +9354,9 @@
(set_attr "length" "2")])
;; Now peepholes to do a call followed by a jump.
+;; Do not match this on V9 and later processors, which have a call-return
+;; stack as this corrupts it and causes the code to run slower not faster.
+;; There are not TARGET_ARCH64 patterns because that implies TARGET_V9.
(define_peephole
[(parallel [(set (match_operand 0 "" "")
@@ -9361,7 +9364,8 @@
(match_operand 2 "" "")))
(clobber (reg:SI 15))])
(set (pc) (label_ref (match_operand 3 "" "")))]
- "short_branch (INSN_UID (insn), INSN_UID (operands[3]))
+ "! TARGET_V9
+ && short_branch (INSN_UID (insn), INSN_UID (operands[3]))
&& (USING_SJLJ_EXCEPTIONS || ! can_throw_internal (ins1))"
"call\\t%a1, %2\\n\\tadd\\t%%o7, (%l3-.-4), %%o7")
@@ -9370,49 +9374,70 @@
(match_operand 1 "" ""))
(clobber (reg:SI 15))])
(set (pc) (label_ref (match_operand 2 "" "")))]
- "short_branch (INSN_UID (insn), INSN_UID (operands[2]))
- && (USING_SJLJ_EXCEPTIONS || ! can_throw_internal (ins1))"
- "call\\t%a0, %1\\n\\tadd\\t%%o7, (%l2-.-4), %%o7")
-
-(define_peephole
- [(parallel [(set (match_operand 0 "" "")
- (call (mem:SI (match_operand:DI 1 "call_operand_address" "ps"))
- (match_operand 2 "" "")))
- (clobber (reg:DI 15))])
- (set (pc) (label_ref (match_operand 3 "" "")))]
- "TARGET_ARCH64
- && short_branch (INSN_UID (insn), INSN_UID (operands[3]))
- && (USING_SJLJ_EXCEPTIONS || ! can_throw_internal (ins1))"
- "call\\t%a1, %2\\n\\tadd\\t%%o7, (%l3-.-4), %%o7")
-
-(define_peephole
- [(parallel [(call (mem:SI (match_operand:DI 0 "call_operand_address" "ps"))
- (match_operand 1 "" ""))
- (clobber (reg:DI 15))])
- (set (pc) (label_ref (match_operand 2 "" "")))]
- "TARGET_ARCH64
+ "! TARGET_V9
&& short_branch (INSN_UID (insn), INSN_UID (operands[2]))
&& (USING_SJLJ_EXCEPTIONS || ! can_throw_internal (ins1))"
"call\\t%a0, %1\\n\\tadd\\t%%o7, (%l2-.-4), %%o7")
-(define_insn "prefetch"
+;; ??? UltraSPARC-III note: A memory operation loading into the floating point register
+;; ??? file, if it hits the prefetch cache, has a chance to dual-issue with other memory
+;; ??? operations. With DFA we might be able to model this, but it requires a lot of
+;; ??? state.
+(define_expand "prefetch"
+ [(match_operand 0 "address_operand" "")
+ (match_operand 1 "const_int_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_V9"
+ "
+{
+ if (TARGET_ARCH64)
+ emit_insn (gen_prefetch_64 (operands[0], operands[1], operands[2]));
+ else
+ emit_insn (gen_prefetch_32 (operands[0], operands[1], operands[2]));
+ DONE;
+}")
+
+(define_insn "prefetch_64"
[(prefetch (match_operand:DI 0 "address_operand" "p")
(match_operand:DI 1 "const_int_operand" "n")
(match_operand:DI 2 "const_int_operand" "n"))]
- "TARGET_V9"
+ ""
+{
+ static const char * const prefetch_instr[2][2] = {
+ {
+ "prefetch\\t[%a0], 1", /* no locality: prefetch for one read */
+ "prefetch\\t[%a0], 0", /* medium to high locality: prefetch for several reads */
+ },
+ {
+ "prefetch\\t[%a0], 3", /* no locality: prefetch for one write */
+ "prefetch\\t[%a0], 2", /* medium to high locality: prefetch for several writes */
+ }
+ };
+ int read_or_write = INTVAL (operands[1]);
+ int locality = INTVAL (operands[2]);
+
+ if (read_or_write != 0 && read_or_write != 1)
+ abort ();
+ if (locality < 0 || locality > 3)
+ abort ();
+ return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
+}
+ [(set_attr "type" "load")])
+
+(define_insn "prefetch_32"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (match_operand:SI 2 "const_int_operand" "n"))]
+ ""
{
- static const char * const prefetch_instr[2][4] = {
+ static const char * const prefetch_instr[2][2] = {
{
"prefetch\\t[%a0], 1", /* no locality: prefetch for one read */
- "prefetch\\t[%a0], 0", /* medium locality: prefetch for several reads */
- "prefetch\\t[%a0], 0", /* medium locality: prefetch for several reads */
- "prefetch\\t[%a0], 4", /* high locality: prefetch page */
+ "prefetch\\t[%a0], 0", /* medium to high locality: prefetch for several reads */
},
{
"prefetch\\t[%a0], 3", /* no locality: prefetch for one write */
- "prefetch\\t[%a0], 2", /* medium locality: prefetch for several writes */
- "prefetch\\t[%a0], 2", /* medium locality: prefetch for several writes */
- "prefetch\\t[%a0], 4", /* high locality: prefetch page */
+ "prefetch\\t[%a0], 2", /* medium to high locality: prefetch for several writes */
}
};
int read_or_write = INTVAL (operands[1]);
@@ -9422,7 +9447,7 @@
abort ();
if (locality < 0 || locality > 3)
abort ();
- return prefetch_instr [read_or_write][locality];
+ return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
}
[(set_attr "type" "load")])
--- config/sparc/sparc.h.~1~ Thu Apr 25 18:58:52 2002
+++ config/sparc/sparc.h Sun Apr 28 06:13:53 2002
@@ -2679,6 +2679,13 @@ do {
case FLOAT: \
case FIX: \
return 19;
+
+#define PREFETCH_BLOCK \
+ ((sparc_cpu == PROCESSOR_ULTRASPARC) ? 64 : 32)
+
+/* ??? UltraSPARC-III note: Can set this to 8 for ultra3. */
+#define SIMULTANEOUS_PREFETCHES \
+ ((sparc_cpu == PROCESSOR_ULTRASPARC) ? 2 : 3)
/* Control the assembler format that we output. */