This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [Patch, MIPS] MIPS specific optimization for o32 ABI
- From: Steve Ellcey <sellcey at imgtec dot com>
- To: Joseph Myers <joseph at codesourcery dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, clm <clm at codesourcery dot com>, Matthew Fortune <Matthew dot Fortune at imgtec dot com>
- Date: Fri, 31 Jul 2015 09:51:47 -0700
- Subject: Re: [Patch, MIPS] MIPS specific optimization for o32 ABI
- Authentication-results: sourceware.org; auth=none
- References: <1438101523 dot 19674 dot 219 dot camel at ubuntu-sellcey> <alpine dot DEB dot 2 dot 10 dot 1507310032080 dot 8408 at digraph dot polyomino dot org dot uk>
- Reply-to: <sellcey at imgtec dot com>
On Fri, 2015-07-31 at 00:32 +0000, Joseph Myers wrote:
> New command-line options need documenting in invoke.texi.
Good point, thanks for catching that. Here is an updated patch with
invoke.texi. There are no other changes to the patch.
Steve Ellcey
sellcey@imgtec.com
2015-07-31 Steve Ellcey <sellcey@imgtec.com>
Zoran Jovanovic <zoran.jovanovic@imgtec.com>
Catherine Moore <clm@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
* config/mips/mips.opt (mframe-header-opt): New option.
* config/mips/mips.c (struct mips_frame_info): Add
skip_stack_frame_allocation_p field.
(struct machine_function): Add callees_use_frame_header_p,
uses_frame_header_p, and initial_total_size fields.
(mips_frame_header_usage): New hash.
(mips_find_if_frame_header_is_used): New Function.
(mips_callee_use_frame_header): New Function.
(mips_callees_use_frame_header_p): New Function.
(mips_cfun_use_frame_header_p): New Function.
(mips_get_updated_offset): New Function.
(mips_skip_stack_frame_alloc): New Function.
(mips_frame_header_update_insn): New Function.
(mips_rest_of_frame_header_opt): New function.
(mips_compute_frame_info): Add recalculate and frame arguments.
(mips_frame_pointer_required): Add new args to
mips_compute_frame_info call.
(mips_initial_elimination_offset): Ditto.
(mips_gp_expand_needed_p): New function factored out of
mips_expand_ghost_gp_insns.
(mips_expand_ghost_gp_insns): Use mips_gp_expand_needed_p.
(mips_reorg): Use mips_rest_of_frame_header_opt.
* doc/invoke.texi (MIPS Options): Document -mframe-header-opt flag.
2015-07-31 Steve Ellcey <sellcey@imgtec.com>
Tom de Vries <tom@codesourcery.com>
* gcc.target/mips/fho-1.c: New test.
* gcc.target/mips/fho-2.c: New test.
* gcc.target/mips/mips.exp: Add -mframe-header-opt to
mips_option_groups.
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index c3cd52d..7cdef89 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -77,6 +77,7 @@ along with GCC; see the file COPYING3. If not see
#include "cgraph.h"
#include "builtins.h"
#include "rtl-iter.h"
+#include "dumpfile.h"
/* This file should be included last. */
#include "target-def.h"
@@ -380,6 +381,9 @@ struct GTY(()) mips_frame_info {
/* The offset of hard_frame_pointer_rtx from the bottom of the frame. */
HOST_WIDE_INT hard_frame_pointer_offset;
+
+ /* Skip stack frame allocation if possible. */
+ bool skip_stack_frame_allocation_p;
};
/* Enumeration for masked vectored (VI) and non-masked (EIC) interrupts. */
@@ -472,6 +476,15 @@ struct GTY(()) machine_function {
/* True if this is an interrupt handler that should use DERET
instead of ERET. */
bool use_debug_exception_return_p;
+
+ /* True if some of the callees uses its frame header. */
+ bool callees_use_frame_header_p;
+
+ /* True if current function uses its frame header. */
+ bool uses_frame_header_p;
+
+ /* Frame size before updated by optimizations. */
+ HOST_WIDE_INT initial_total_size;
};
/* Information about a single argument. */
@@ -574,6 +587,8 @@ struct mips_rtx_cost_data
/* Global variables for machine-dependent things. */
+static hash_map<tree, bool> *mips_frame_header_usage;
+
/* The -G setting, or the configuration's default small-data limit if
no -G option is given. */
static unsigned int mips_small_data_threshold;
@@ -1296,6 +1311,7 @@ static const struct mips_rtx_cost_data
}
};
+static void mips_rest_of_frame_header_opt (void);
static rtx mips_find_pic_call_symbol (rtx_insn *, rtx, bool);
static int mips_register_move_cost (machine_mode, reg_class_t,
reg_class_t);
@@ -10358,6 +10374,114 @@ mips_save_reg_p (unsigned int regno)
return false;
}
+/* Try to find if function may use its incoming frame header. */
+
+static bool
+mips_find_if_frame_header_is_used (tree fndecl)
+{
+ bool *frame_header_unused;
+
+ if (mips_frame_header_usage)
+ frame_header_unused = mips_frame_header_usage->get (fndecl);
+ else
+ frame_header_unused = false;
+
+ return !frame_header_unused;
+}
+
+/* Return true if the instruction is a call and the called function may use its
+ incoming frame header. */
+
+static bool
+mips_callee_use_frame_header (rtx_insn *insn)
+{
+ rtx call_insn;
+ tree fndecl;
+
+ if (insn == NULL_RTX || !USEFUL_INSN_P (insn))
+ return false;
+
+ /* Handle sequence of instructions. */
+ if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+ {
+ rtx_insn *subinsn;
+ FOR_EACH_SUBINSN (subinsn, insn)
+ if (INSN_P (subinsn) && mips_callee_use_frame_header (subinsn))
+ return true;
+ }
+
+ if (GET_CODE (insn) != CALL_INSN)
+ return false;
+
+ if (GET_CODE (PATTERN (insn)) != PARALLEL
+ || GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) != SET)
+ return true;
+
+ call_insn = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+
+ if (GET_CODE (call_insn) != CALL
+ || GET_CODE (XEXP (call_insn, 0)) != MEM
+ || GET_CODE (XEXP (XEXP (call_insn, 0), 0)) != SYMBOL_REF)
+ return true;
+
+ fndecl = SYMBOL_REF_DECL (XEXP (XEXP (call_insn, 0), 0));
+
+ if (fndecl == current_function_decl)
+ return true;
+
+ return mips_find_if_frame_header_is_used (fndecl);
+}
+
+/* Return true if any of the callee functions may use its incoming frame
+ header. */
+
+static bool
+mips_callees_use_frame_header_p (void)
+{
+ rtx_insn *insn;
+
+ /* Iterate through all instructions in the current function and check whether
+ only already seen functions may be called. Assume that any unseen function
+ may use its incoming frame header. */
+ for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+ if (mips_callee_use_frame_header (insn))
+ return true;
+
+ return false;
+}
+
+/* Return true if the current function may use its incoming frame header.
+ If destination of memory store in format sp + offset and offset is greater
+ or equal than frame->total_size than this function returns true.
+ */
+
+static bool
+mips_cfun_use_frame_header_p (void)
+{
+ rtx_insn *insn;
+
+ for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+ {
+ if (insn != NULL_RTX && INSN_P (insn)
+ && GET_CODE (PATTERN (insn)) == SET
+ && MEM_P (XEXP (PATTERN (insn), 0)))
+ {
+ rtx mem_dst = XEXP (XEXP (PATTERN (insn), 0), 0);
+ if (GET_CODE (mem_dst) == PLUS
+ && CONST_INT_P (XEXP (mem_dst, 1))
+ && REG_P (XEXP (mem_dst, 0))
+ && REGNO (XEXP (mem_dst, 0)) == STACK_POINTER_REGNUM)
+ {
+ int offset = INTVAL (XEXP (mem_dst, 1));
+ if (offset >= cfun->machine->initial_total_size)
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
/* Populate the current function's mips_frame_info structure.
MIPS stack frames look like:
@@ -10429,9 +10553,8 @@ mips_save_reg_p (unsigned int regno)
hard_frame_pointer_rtx unchanged. */
static void
-mips_compute_frame_info (void)
+mips_compute_frame_info (bool recalculate, struct mips_frame_info *frame)
{
- struct mips_frame_info *frame;
HOST_WIDE_INT offset, size;
unsigned int regno, i;
@@ -10457,11 +10580,11 @@ mips_compute_frame_info (void)
}
}
- frame = &cfun->machine->frame;
memset (frame, 0, sizeof (*frame));
size = get_frame_size ();
cfun->machine->global_pointer = mips_global_pointer ();
+ frame->cprestore_size = 0;
/* The first two blocks contain the outgoing argument area and the $gp save
slot. This area isn't needed in leaf functions, but if the
@@ -10477,12 +10600,18 @@ mips_compute_frame_info (void)
frame->args_size = REG_PARM_STACK_SPACE (cfun->decl);
else
frame->args_size = 0;
- frame->cprestore_size = 0;
}
else
{
- frame->args_size = crtl->outgoing_args_size;
- frame->cprestore_size = MIPS_GP_SAVE_AREA_SIZE;
+ /* If recalculate do not take args_size into account. */
+ if (recalculate)
+ frame->args_size = 0;
+ else
+ frame->args_size = crtl->outgoing_args_size;
+
+ /* Check if space allocated on stack for gp will be used. */
+ if (!recalculate || mips_must_initialize_gp_p ())
+ frame->cprestore_size = MIPS_GP_SAVE_AREA_SIZE;
}
offset = frame->args_size + frame->cprestore_size;
@@ -10606,6 +10735,9 @@ mips_compute_frame_info (void)
instructions for local variables and incoming arguments. */
if (TARGET_MIPS16)
frame->hard_frame_pointer_offset = frame->args_size;
+
+ if (!recalculate)
+ cfun->machine->initial_total_size = frame->total_size;
}
/* Return the style of GP load sequence that is being used for the
@@ -10642,7 +10774,7 @@ mips_frame_pointer_required (void)
without using a second temporary register. */
if (TARGET_MIPS16)
{
- mips_compute_frame_info ();
+ mips_compute_frame_info (false, &cfun->machine->frame);
if (!SMALL_OPERAND (cfun->machine->frame.total_size))
return true;
}
@@ -10668,7 +10800,7 @@ mips_initial_elimination_offset (int from, int to)
{
HOST_WIDE_INT offset;
- mips_compute_frame_info ();
+ mips_compute_frame_info (false, &cfun->machine->frame);
/* Set OFFSET to the offset from the end-of-prologue stack pointer. */
switch (from)
@@ -16838,12 +16970,8 @@ mips_has_long_branch_p (void)
return false;
}
-/* If we are using a GOT, but have not decided to use a global pointer yet,
- see whether we need one to implement long branches. Convert the ghost
- global-pointer instructions into real ones if so. */
-
static bool
-mips_expand_ghost_gp_insns (void)
+mips_gp_expand_needed_p (void)
{
/* Quick exit if we already know that we will or won't need a
global pointer. */
@@ -16857,12 +16985,28 @@ mips_expand_ghost_gp_insns (void)
return false;
/* We've now established that we need $gp. */
- cfun->machine->must_initialize_gp_p = true;
- split_all_insns_noflow ();
-
return true;
}
+
+/* If we are using a GOT, but have not decided to use a global pointer yet,
+ see whether we need one to implement long branches. Convert the ghost
+ global-pointer instructions into real ones if so. */
+
+static bool
+mips_expand_ghost_gp_insns (void)
+{
+
+ if (mips_gp_expand_needed_p ())
+ {
+ /* We've now established that we need $gp. */
+ cfun->machine->must_initialize_gp_p = true;
+ split_all_insns_noflow ();
+ return true;
+ }
+ return false;
+}
+
/* Subroutine of mips_reorg to manage passes that require DF. */
static void
@@ -17004,6 +17148,9 @@ mips_reorg (void)
mips_df_reorg ();
free_bb_for_insn ();
}
+
+ if (flag_frame_header_optimization)
+ mips_rest_of_frame_header_opt ();
}
/* We use a machine specific pass to do a second machine dependent reorg
@@ -18802,6 +18949,164 @@ mips_prepare_pch_save (void)
mips_set_compression_mode (0);
mips16_globals = 0;
}
+
+/* Return new offset for stack load/store operations. */
+
+static int
+mips_get_updated_offset (int old_offset)
+{
+ struct mips_frame_info *frame = &cfun->machine->frame;
+ int res = old_offset;
+ int initial_total_size = cfun->machine->initial_total_size;
+
+ if (old_offset > 0 && old_offset <= frame->gp_sp_offset)
+ /* It should be only gp. */
+ res = old_offset - (initial_total_size
+ - REG_PARM_STACK_SPACE (cfun->decl));
+ else if (old_offset >= frame->gp_sp_offset
+ && old_offset <= initial_total_size)
+ /* gp registers, accumulators. */
+ res = old_offset - (initial_total_size
+ - REG_PARM_STACK_SPACE (cfun->decl));
+ else if (old_offset > initial_total_size)
+ /* Incoming args. */
+ res = old_offset - initial_total_size;
+
+ return res;
+}
+
+/* Test whether to skip frame header allocation. TODO: Try to do stack
+ frame allocation removal even if local variables are used. */
+
+static bool
+mips_skip_stack_frame_alloc (void)
+{
+ struct mips_frame_info *frame = &cfun->machine->frame;
+ struct mips_frame_info opt_frame;
+
+ if (!flag_frame_header_optimization)
+ return false;
+
+ if (cfun->calls_setjmp != 0
+ || cfun->calls_alloca != 0
+ || cfun->stdarg != 0
+ || crtl->shrink_wrapped
+ || frame->var_size != 0
+ || frame->args_size > REG_PARM_STACK_SPACE (cfun->decl)
+ || mips_abi != ABI_32
+ || TARGET_MIPS16
+ || TARGET_MICROMIPS
+ || frame_pointer_needed != 0
+ || mips_gp_expand_needed_p ())
+ return false;
+
+ if (mips_callees_use_frame_header_p ())
+ return false;
+
+ mips_compute_frame_info (true, &opt_frame);
+
+ if (opt_frame.total_size > REG_PARM_STACK_SPACE (cfun->decl)
+ || cfun->machine->uses_frame_header_p)
+ return false;
+
+ return true;
+}
+
+/* Update stack related instructions. */
+
+static void
+mips_frame_header_update_insn (rtx_insn *insn)
+{
+ rtx set_insn, src, dst;
+
+ if (insn == NULL_RTX || !USEFUL_INSN_P (insn))
+ return;
+
+ set_insn = single_set (insn);
+ if (set_insn == NULL_RTX)
+ return;
+
+ src = SET_SRC (set_insn);
+ dst = SET_DEST (set_insn);
+
+ if (GET_CODE (src) == REG && GET_CODE (dst) == MEM
+ && GET_CODE (XEXP (dst, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (dst, 0), 0)) == REG
+ && CONST_INT_P (XEXP (XEXP (dst, 0), 1))
+ && (REGNO (XEXP (XEXP (dst, 0), 0))
+ == STACK_POINTER_REGNUM))
+ {
+ /* It is a store through sp - update offset. */
+ XEXP (XEXP (dst, 0), 1)
+ = GEN_INT (mips_get_updated_offset (INTVAL (XEXP (XEXP (dst, 0), 1))));
+ return;
+ }
+
+ if (GET_CODE (src) == MEM && GET_CODE (dst) == REG
+ && GET_CODE (XEXP (src, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (src, 0), 0)) == REG
+ && CONST_INT_P (XEXP (XEXP (src, 0), 1))
+ && (REGNO (XEXP (XEXP (src, 0), 0))
+ == STACK_POINTER_REGNUM))
+ {
+ /* It is a load through sp - update offset. */
+ XEXP (XEXP (src, 0), 1)
+ = GEN_INT (mips_get_updated_offset (INTVAL (XEXP (XEXP (src, 0), 1))));
+ return;
+ }
+
+ if (GET_CODE (src) == PLUS
+ && GET_CODE (XEXP (src, 0)) == REG
+ && CONST_INT_P (XEXP (src, 1))
+ && REGNO (XEXP (src, 0)) == STACK_POINTER_REGNUM
+ && REGNO (SET_DEST (set_insn)) == STACK_POINTER_REGNUM)
+ delete_insn (insn);
+}
+
+/* Entry function for the frame header optimization. */
+
+static void
+mips_rest_of_frame_header_opt (void)
+{
+ rtx_insn *insn;
+ bool skip_stack_frame_alloc;
+ struct mips_frame_info *frame = &cfun->machine->frame;
+
+ cfun->machine->uses_frame_header_p = mips_cfun_use_frame_header_p ();
+ skip_stack_frame_alloc = mips_skip_stack_frame_alloc ();
+
+ /* Check if it is needed to recalculate stack frame info. */
+ if (skip_stack_frame_alloc)
+ mips_compute_frame_info (true, frame);
+
+ if ((skip_stack_frame_alloc && frame->total_size == 0)
+ || (!skip_stack_frame_alloc && !cfun->machine->uses_frame_header_p
+ && !cfun->stdarg))
+ {
+ /* Function does not use its incoming frame header. */
+
+ if (!mips_frame_header_usage)
+ mips_frame_header_usage = new hash_map<tree, bool>;
+
+ tree fndecl = current_function_decl;
+ bool existed;
+ bool &frame_hdr_unused = mips_frame_header_usage->get_or_insert (fndecl, &existed);
+ if (!existed)
+ frame_hdr_unused = true;
+ }
+
+ if (skip_stack_frame_alloc)
+ {
+ if (dump_file && cfun->machine->initial_total_size > frame->total_size)
+ fprintf (dump_file, "Frame size reduced by frame header optimization"
+ " from %ld to %ld.\n", cfun->machine->initial_total_size,
+ frame->total_size);
+
+ /* Update instructions. */
+ for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+ mips_frame_header_update_insn (insn);
+ }
+}
/* Generate or test for an insn that supports a constant permutation. */
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 348c6e0..3e72936 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -412,6 +412,10 @@ modd-spreg
Target Report Mask(ODD_SPREG)
Enable use of odd-numbered single-precision registers
+mframe-header-opt
+Target Report Var(flag_frame_header_optimization) Optimization
+Optimize frame header
+
noasmopt
Driver
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 413ac16..3b7b1b6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -813,7 +813,8 @@ Objective-C and Objective-C++ Dialects}.
-mbranch-cost=@var{num} -mbranch-likely -mno-branch-likely @gol
-mfp-exceptions -mno-fp-exceptions @gol
-mvr4130-align -mno-vr4130-align -msynci -mno-synci @gol
--mrelax-pic-calls -mno-relax-pic-calls -mmcount-ra-address}
+-mrelax-pic-calls -mno-relax-pic-calls -mmcount-ra-address @gol
+-mframe-header-opt}
@emph{MMIX Options}
@gccoptlist{-mlibfuncs -mno-libfuncs -mepsilon -mno-epsilon -mabi=gnu @gol
@@ -18013,6 +18014,18 @@ if @var{ra-address} is nonnull.
The default is @option{-mno-mcount-ra-address}.
+@item -mframe-header-opt
+@itemx -mno-frame-header-opt
+@opindex mframe-header-opt
+Enable (disable) frame header optimization in the O32 ABI. When using
+the O32 ABI, calling functions allocate 16 bytes on the stack in case
+the called function needs to write out register arguments to memory so
+that their address can be taken. When enabled, this optimization allows
+the called function to use those 16 bytes for other purposes if the
+arguments do not need to be written to memory.
+
+This optimization is off by default at all optimization levels.
+
@end table
@node MMIX Options
diff --git a/gcc/testsuite/gcc.target/mips/fho-1.c b/gcc/testsuite/gcc.target/mips/fho-1.c
new file mode 100644
index 0000000..e373da4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fho-1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+/* { dg-options "-mabi=32 -mframe-header-opt -fdump-rtl-mach" } */
+/* Testing -mframe-header-opt optimization option. */
+
+NOCOMPRESSION int __attribute__((noinline))
+B (int x)
+{
+ return x + 3;
+}
+
+/* We are sure that B is not using its incoming stack frame so we can skip
+ its allocation. */
+NOCOMPRESSION int __attribute__((noinline))
+A (int x)
+{
+ return B (x) + 2;
+}
+
+NOCOMPRESSION int
+main (void)
+{
+ int a;
+ void *volatile sp1, *volatile sp2;
+ register void *sp asm ("$sp");
+ sp1 = sp;
+ a = A (5);
+ sp2 = sp;
+ return !(a == 10 && sp1 == sp2);
+}
+
+/* { dg-final { scan-rtl-dump "Frame size reduced by frame header optimization" "mach" } } */
+
+/* For enabled targets, test that only one stack allocation is present, the one
+ in main. The one in A should have been removed by -mframe-header-opt. */
+/* { dg-final { scan-assembler-times "addiu\t\\\$sp,\\\$sp,-" 1 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fho-2.c b/gcc/testsuite/gcc.target/mips/fho-2.c
new file mode 100644
index 0000000..d3599b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fho-2.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-mabi=32 -mframe-header-opt" } */
+/* Testing -mframe-header-opt optimization option. */
+
+NOCOMPRESSION int __attribute__((noinline))
+B (int x)
+{
+ return x + 3;
+}
+
+/* We are sure that B is not using its incoming stack frame so we can skip
+ its allocation. */
+NOCOMPRESSION int __attribute__((noinline))
+A (int x)
+{
+ return B (x) + 2;
+}
+
+NOCOMPRESSION int
+main (void)
+{
+ int a;
+ void *volatile sp1, *volatile sp2;
+ register void *sp asm ("$sp");
+ sp1 = sp;
+ a = A (5);
+ sp2 = sp;
+ return !(a == 10 && sp1 == sp2);
+}
diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp
index b3617e4..6e6450e 100644
--- a/gcc/testsuite/gcc.target/mips/mips.exp
+++ b/gcc/testsuite/gcc.target/mips/mips.exp
@@ -237,6 +237,7 @@ set mips_option_groups {
fpu "-m(double|single)-float"
forbid_cpu "forbid_cpu=.*"
fp "-mfp(32|xx|64)"
+ frame_header_opt "-mframe-header-opt|-mno-frame-header-opt"
gp "-mgp(32|64)"
long "-mlong(32|64)"
micromips "-mmicromips|-mno-micromips"