This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
K8 tweek 2
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org, rth at redhat dot com
- Date: Fri, 23 May 2003 22:50:16 +0200
- Subject: K8 tweek 2
Hi,
AMD recommends to avoid the penalty by adding rep prefix instead of nop
because it saves decode bandwidth.
Boostrapped/regtested i386-linux. OK for mainline?
Honza
Fri May 23 22:47:51 CEST 2003 Jan HUbicka <jh@suse.cz>
* i386.c (ix86_reorg): Replace the jump instead of adding nop.
* i386.md (UNSPEC_REP): New constant.
(return_internal_long): New pattern.
Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.565
diff -c -3 -p -r1.565 i386.c
*** i386.c 23 May 2003 20:25:21 -0000 1.565
--- i386.c 23 May 2003 20:47:20 -0000
*************** ix86_reorg ()
*** 15547,15555 ****
basic_block bb = e->src;
rtx ret = bb->end;
rtx prev;
! bool insert = false;
if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
continue;
for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
--- 15547,15555 ----
basic_block bb = e->src;
rtx ret = bb->end;
rtx prev;
! bool replace = false;
if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
continue;
for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
*************** ix86_reorg ()
*** 15560,15581 ****
for (e = bb->pred; e; e = e->pred_next)
if (EDGE_FREQUENCY (e) && e->src->index >= 0
&& !(e->flags & EDGE_FALLTHRU))
! insert = 1;
}
! if (!insert)
{
prev = prev_active_insn (ret);
if (prev
&& ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
|| GET_CODE (prev) == CALL_INSN))
! insert = 1;
/* Empty functions get branch misspredict even when the jump destination
is not visible to us. */
if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
! insert = 1;
}
- if (insert)
- emit_insn_before (gen_nop (), ret);
}
}
--- 15561,15585 ----
for (e = bb->pred; e; e = e->pred_next)
if (EDGE_FREQUENCY (e) && e->src->index >= 0
&& !(e->flags & EDGE_FALLTHRU))
! replace = true;
}
! if (!replace)
{
prev = prev_active_insn (ret);
if (prev
&& ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
|| GET_CODE (prev) == CALL_INSN))
! replace = true;
/* Empty functions get branch misspredict even when the jump destination
is not visible to us. */
if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
! replace = true;
! }
! if (replace)
! {
! emit_insn_before (gen_return_internal_long (), ret);
! delete_insn (ret);
}
}
}
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.460
diff -c -3 -p -r1.460 i386.md
*** i386.md 14 May 2003 21:13:45 -0000 1.460
--- i386.md 23 May 2003 20:47:23 -0000
***************
*** 113,118 ****
--- 113,121 ----
; x87 Floating point
(UNSPEC_FPATAN 65)
(UNSPEC_FYL2X 66)
+
+ ; REP instruction
+ (UNSPEC_REP 67)
])
(define_constants
***************
*** 14232,14237 ****
--- 14235,14253 ----
"ret"
[(set_attr "length" "1")
(set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
+ ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
+ ;; instruction Athlon and K8 have.
+
+ (define_insn "return_internal_long"
+ [(return)
+ (unspec [(const_int 0)] UNSPEC_REP)]
+ "reload_completed"
+ "rep {;} ret"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "prefix_rep" "1")
(set_attr "modrm" "0")])
(define_insn "return_pop_internal"