This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Disable accumulate-outgoing-args for Generic and Buldozers
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: Jan Hubicka <hubicka at ucw dot cz>
- Cc: Eric Botcazou <ebotcazou at adacore dot com>, gcc-patches at gcc dot gnu dot org
- Date: Fri, 24 Jan 2014 01:39:15 +0100
- Subject: Re: Disable accumulate-outgoing-args for Generic and Buldozers
- Authentication-results: sourceware.org; auth=none
- References: <20140101143004 dot GD26209 at kam dot mff dot cuni dot cz> <2893290 dot IYlSZnu3Ef at polaris> <20140123231639 dot GA6783 at kam dot mff dot cuni dot cz>
Hi,
this is improved patch I am testing. The basic idea is to remove push
expanders for cases where we do not have push instruction anyway.
emit_move_insns then resorts to unconditonally call move expander
with push operand. I expended ix86_expand_vector_move to handle
it gracefully and for that I borrowed emit_move_resolve_push
function from expr.c since it seemed pointless to preserve
duplicated logic in ix86_expand_push.
I can easily imagine that scheduling around function call sequences
matters, so I also updated push/pop expanders to preserve memory attributes.
Eventually I found the attributes to be blank because of logic in expr.c
that clears alias info when sibcall is enabled. We can now do better
by only disabling it in functions that actually do sibcalls.
Bootstrap/regtest running on x86_64-linux, OK for the non-i386 parts
if it passes?
Honza
* expr.c (emit_move_resolve_push): Export; be bit more selective
on when to clear alias set.
* expr.h (emit_move_resolve_push): Declare.
* function.h (struct function): Add tail_call_marked.
* tree-tailcall.c (optimize_tail_call): Set tail_call_marked.
* config/i386/i386-protos.h (ix86_expand_push): Remove.
* config/i386/i386.md (TImode move expander): De not call
ix86_expand_push.
(FP push expanders): Preserve memory attributes.
* config/i386/sse.md (push<mode>1): Remove.
* config/i386/i386.c (ix86_expand_vector_move): Handle push
operation.
(ix86_expand_push): Remove.
* config/i386/mmx.md (push<mode>1): Remove.
Index: expr.c
===================================================================
--- expr.c (revision 206946)
+++ expr.c (working copy)
@@ -3221,7 +3221,7 @@
/* A subroutine of emit_move_insn_1. X is a push_operand in MODE.
Return an equivalent MEM that does not use an auto-increment. */
-static rtx
+rtx
emit_move_resolve_push (enum machine_mode mode, rtx x)
{
enum rtx_code code = GET_CODE (XEXP (x, 0));
@@ -4070,7 +4070,7 @@
{
set_mem_attributes (dest, type, 1);
- if (flag_optimize_sibling_calls)
+ if (cfun->tail_call_marked)
/* Function incoming arguments may overlap with sibling call
outgoing arguments and we cannot allow reordering of reads
from function arguments with stores to outgoing arguments
Index: expr.h
===================================================================
--- expr.h (revision 206946)
+++ expr.h (working copy)
@@ -413,6 +413,7 @@
extern rtx emit_move_complex_push (enum machine_mode, rtx, rtx);
extern rtx emit_move_complex_parts (rtx, rtx);
+extern rtx emit_move_resolve_push (enum machine_mode, rtx);
/* Push a block of length SIZE (perhaps variable)
and return an rtx to address the beginning of the block. */
Index: function.h
===================================================================
--- function.h (revision 206946)
+++ function.h (working copy)
@@ -667,6 +667,9 @@
/* Nonzero if the current function contains any loops with
nonzero value in loop->simduid. */
unsigned int has_simduid_loops : 1;
+
+ /* Set when the tail call has been identified. */
+ unsigned int tail_call_marked : 1;
};
/* Add the decl D to the local_decls list of FUN. */
Index: tree-tailcall.c
===================================================================
--- tree-tailcall.c (revision 206946)
+++ tree-tailcall.c (working copy)
@@ -909,6 +909,7 @@
gimple stmt = gsi_stmt (t->call_gsi);
gimple_call_set_tail (stmt, true);
+ cfun->tail_call_marked = true;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Found tail call ");
Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h (revision 206946)
+++ config/i386/i386-protos.h (working copy)
@@ -84,7 +84,6 @@
extern void ix86_expand_move (enum machine_mode, rtx[]);
extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
-extern void ix86_expand_push (enum machine_mode, rtx);
extern rtx ix86_fixup_binary_operands (enum rtx_code,
enum machine_mode, rtx[]);
extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 206946)
+++ config/i386/i386.md (working copy)
@@ -1818,8 +1818,6 @@
{
if (TARGET_64BIT)
ix86_expand_move (TImode, operands);
- else if (push_operand (operands[0], TImode))
- ix86_expand_push (TImode, operands[1]);
else
ix86_expand_vector_move (TImode, operands);
DONE;
@@ -2665,7 +2663,11 @@
(match_operand:TF 1 "sse_reg_operand"))]
"TARGET_SSE && reload_completed"
[(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
- (set (mem:TF (reg:P SP_REG)) (match_dup 1))])
+ (set (match_dup 0) (match_dup 1))]
+{
+ /* Preserve memory attributes. */
+ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
(define_insn "*pushxf"
[(set (match_operand:XF 0 "push_operand" "=<,<")
@@ -2691,8 +2693,12 @@
(match_operand:XF 1 "fp_register_operand"))]
"reload_completed"
[(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
- (set (mem:XF (reg:P SP_REG)) (match_dup 1))]
- "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
+ (set (match_dup 0) (match_dup 1))]
+{
+ operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));
+ /* Preserve memory attributes. */
+ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
(define_insn "*pushdf"
[(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
@@ -2713,7 +2719,11 @@
(match_operand:DF 1 "any_fp_register_operand"))]
"reload_completed"
[(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
- (set (mem:DF (reg:P SP_REG)) (match_dup 1))])
+ (set (match_dup 0) (match_dup 1))]
+{
+ /* Preserve memory attributes. */
+ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
(define_insn "*pushsf_rex64"
[(set (match_operand:SF 0 "push_operand" "=X,X,X")
@@ -2747,8 +2757,12 @@
(match_operand:SF 1 "any_fp_register_operand"))]
"reload_completed"
[(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
- (set (mem:SF (reg:P SP_REG)) (match_dup 1))]
- "operands[2] = GEN_INT (-<P:MODE_SIZE>);")
+ (set (match_dup 0) (match_dup 1))]
+{
+ operands[2] = GEN_INT (-<P:MODE_SIZE>);
+ /* Preserve memory attributes. */
+ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
(define_split
[(set (match_operand:SF 0 "push_operand")
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 206946)
+++ config/i386/sse.md (working copy)
@@ -911,14 +911,6 @@
operands[2] = CONST0_RTX (DFmode);
})
-(define_expand "push<mode>1"
- [(match_operand:VMOVE 0 "register_operand")]
- "TARGET_SSE"
-{
- ix86_expand_push (<MODE>mode, operands[0]);
- DONE;
-})
-
(define_expand "movmisalign<mode>"
[(set (match_operand:VMOVE 0 "nonimmediate_operand")
(match_operand:VMOVE 1 "nonimmediate_operand"))]
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 206946)
+++ config/i386/i386.c (working copy)
@@ -16795,6 +16795,9 @@
rtx op0 = operands[0], op1 = operands[1];
unsigned int align = GET_MODE_ALIGNMENT (mode);
+ if (push_operand (op0, VOIDmode))
+ op0 = emit_move_resolve_push (mode, op0);
+
/* Force constants other than zero into memory. We do not know how
the instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
@@ -17222,30 +17225,6 @@
gcc_unreachable ();
}
-/* Expand a push in MODE. This is some mode for which we do not support
- proper push instructions, at least from the registers that we expect
- the value to live in. */
-
-void
-ix86_expand_push (enum machine_mode mode, rtx x)
-{
- rtx tmp;
-
- tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
- GEN_INT (-GET_MODE_SIZE (mode)),
- stack_pointer_rtx, 1, OPTAB_DIRECT);
- if (tmp != stack_pointer_rtx)
- emit_move_insn (stack_pointer_rtx, tmp);
-
- tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
-
- /* When we push an operand onto stack, it has to be aligned at least
- at the function argument boundary. However since we don't have
- the argument type, we can't determine the actual argument
- boundary. */
- emit_move_insn (tmp, x);
-}
-
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
Index: config/i386/mmx.md
===================================================================
--- config/i386/mmx.md (revision 206946)
+++ config/i386/mmx.md (working copy)
@@ -213,14 +213,6 @@
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
-(define_expand "push<mode>1"
- [(match_operand:MMXMODE 0 "register_operand")]
- "TARGET_MMX"
-{
- ix86_expand_push (<MODE>mode, operands[0]);
- DONE;
-})
-
(define_expand "movmisalign<mode>"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]