This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch, MIPS] MIPS specific optimization for o32 ABI


On Fri, 2015-07-31 at 00:32 +0000, Joseph Myers wrote:
> New command-line options need documenting in invoke.texi.

Good point, thanks for catching that.  Here is an updated patch with
invoke.texi.  There are no other changes to the patch.

Steve Ellcey
sellcey@imgtec.com



2015-07-31  Steve Ellcey  <sellcey@imgtec.com>
	    Zoran Jovanovic  <zoran.jovanovic@imgtec.com>
	    Catherine Moore  <clm@codesourcery.com>
	    Tom de Vries  <tom@codesourcery.com>

	* config/mips/mips.opt (mframe-header-opt): New option.
	* config/mips/mips.c (struct mips_frame_info): Add
	skip_stack_frame_allocation_p field.
	(struct machine_function): Add callees_use_frame_header_p,
	uses_frame_header_p, and initial_total_size fields.
	(mips_frame_header_usage): New hash.
	(mips_find_if_frame_header_is_used): New Function.
	(mips_callee_use_frame_header): New Function.
	(mips_callees_use_frame_header_p): New Function.
	(mips_cfun_use_frame_header_p): New Function.
	(mips_get_updated_offset): New Function.
	(mips_skip_stack_frame_alloc): New Function.
	(mips_frame_header_update_insn): New Function.
	(mips_rest_of_frame_header_opt): New function.
	(mips_compute_frame_info): Add recalculate and frame arguments.
	(mips_frame_pointer_required): Add new args to
	mips_compute_frame_info call.
	(mips_initial_elimination_offset): Ditto.
	(mips_gp_expand_needed_p): New function factored out of
	mips_expand_ghost_gp_insns.
	(mips_expand_ghost_gp_insns): Use mips_gp_expand_needed_p.
	(mips_reorg): Use mips_rest_of_frame_header_opt.
	* doc/invoke.texi (MIPS Options): Document -mframe-header-opt flag.


2015-07-31  Steve Ellcey  <sellcey@imgtec.com>
	    Tom de Vries  <tom@codesourcery.com>

	* gcc.target/mips/fho-1.c: New test.
	* gcc.target/mips/fho-2.c: New test.
	* gcc.target/mips/mips.exp: Add -mframe-header-opt to
	mips_option_groups.





diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index c3cd52d..7cdef89 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -77,6 +77,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cgraph.h"
 #include "builtins.h"
 #include "rtl-iter.h"
+#include "dumpfile.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -380,6 +381,9 @@ struct GTY(())  mips_frame_info {
 
   /* The offset of hard_frame_pointer_rtx from the bottom of the frame.  */
   HOST_WIDE_INT hard_frame_pointer_offset;
+
+  /* Skip stack frame allocation if possible.  */
+  bool skip_stack_frame_allocation_p;
 };
 
 /* Enumeration for masked vectored (VI) and non-masked (EIC) interrupts.  */
@@ -472,6 +476,15 @@ struct GTY(())  machine_function {
   /* True if this is an interrupt handler that should use DERET
      instead of ERET.  */
   bool use_debug_exception_return_p;
+
+  /* True if some of the callees uses its frame header.  */
+  bool callees_use_frame_header_p;
+
+  /* True if current function uses its frame header.  */
+  bool uses_frame_header_p;
+
+  /* Frame size before updated by optimizations.  */
+  HOST_WIDE_INT initial_total_size;
 };
 
 /* Information about a single argument.  */
@@ -574,6 +587,8 @@ struct mips_rtx_cost_data
 
 /* Global variables for machine-dependent things.  */
 
+static hash_map<tree, bool> *mips_frame_header_usage;
+
 /* The -G setting, or the configuration's default small-data limit if
    no -G option is given.  */
 static unsigned int mips_small_data_threshold;
@@ -1296,6 +1311,7 @@ static const struct mips_rtx_cost_data
   }
 };
 
+static void mips_rest_of_frame_header_opt (void);
 static rtx mips_find_pic_call_symbol (rtx_insn *, rtx, bool);
 static int mips_register_move_cost (machine_mode, reg_class_t,
 				    reg_class_t);
@@ -10358,6 +10374,114 @@ mips_save_reg_p (unsigned int regno)
   return false;
 }
 
+/* Try to find if function may use its incoming frame header.  */
+
+static bool
+mips_find_if_frame_header_is_used (tree fndecl)
+{
+  bool *frame_header_unused;
+
+  if (mips_frame_header_usage)
+    frame_header_unused = mips_frame_header_usage->get (fndecl);
+  else
+    frame_header_unused = false;
+
+  return !frame_header_unused;
+}
+
+/* Return true if the instruction is a call and the called function may use its
+   incoming frame header.  */
+
+static bool
+mips_callee_use_frame_header (rtx_insn *insn)
+{
+  rtx call_insn;
+  tree fndecl;
+
+  if (insn == NULL_RTX || !USEFUL_INSN_P (insn))
+    return false;
+
+  /* Handle sequence of instructions.  */
+  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+    {
+      rtx_insn *subinsn;
+      FOR_EACH_SUBINSN (subinsn, insn)
+	if (INSN_P (subinsn) && mips_callee_use_frame_header (subinsn))
+	  return true;
+    }
+
+  if (GET_CODE (insn) != CALL_INSN)
+    return false;
+
+  if (GET_CODE (PATTERN (insn)) != PARALLEL
+      || GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) != SET)
+    return true;
+
+  call_insn = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+
+  if (GET_CODE (call_insn) != CALL
+      || GET_CODE (XEXP (call_insn, 0)) != MEM
+      || GET_CODE (XEXP (XEXP (call_insn, 0), 0)) != SYMBOL_REF)
+    return true;
+
+  fndecl = SYMBOL_REF_DECL (XEXP (XEXP (call_insn, 0), 0));
+
+  if (fndecl == current_function_decl)
+    return true;
+
+  return mips_find_if_frame_header_is_used (fndecl);
+}
+
+/* Return true if any of the callee functions may use its incoming frame
+   header.  */
+
+static bool
+mips_callees_use_frame_header_p (void)
+{
+  rtx_insn *insn;
+
+  /* Iterate through all instructions in the current function and check whether
+     only already seen functions may be called.  Assume that any unseen function
+     may use its incoming frame header.  */
+  for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+    if (mips_callee_use_frame_header (insn))
+      return true;
+
+  return false;
+}
+
+/* Return true if the current function may use its incoming frame header.
+   If destination of memory store in format sp + offset and offset is greater
+   or equal than frame->total_size than this function returns true.
+   */
+
+static bool
+mips_cfun_use_frame_header_p (void)
+{
+  rtx_insn *insn;
+
+  for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+    {
+      if (insn != NULL_RTX && INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && MEM_P (XEXP (PATTERN (insn), 0)))
+	{
+	  rtx mem_dst = XEXP (XEXP (PATTERN (insn), 0), 0);
+	  if (GET_CODE (mem_dst) == PLUS
+	      && CONST_INT_P (XEXP (mem_dst, 1))
+	      && REG_P (XEXP (mem_dst, 0))
+	      && REGNO (XEXP (mem_dst, 0)) == STACK_POINTER_REGNUM)
+	    {
+	      int offset = INTVAL (XEXP (mem_dst, 1));
+	      if (offset >= cfun->machine->initial_total_size)
+		return true;
+	    }
+	}
+    }
+
+  return false;
+}
+
 /* Populate the current function's mips_frame_info structure.
 
    MIPS stack frames look like:
@@ -10429,9 +10553,8 @@ mips_save_reg_p (unsigned int regno)
    hard_frame_pointer_rtx unchanged.  */
 
 static void
-mips_compute_frame_info (void)
+mips_compute_frame_info (bool recalculate, struct mips_frame_info *frame)
 {
-  struct mips_frame_info *frame;
   HOST_WIDE_INT offset, size;
   unsigned int regno, i;
 
@@ -10457,11 +10580,11 @@ mips_compute_frame_info (void)
 	}
     }
 
-  frame = &cfun->machine->frame;
   memset (frame, 0, sizeof (*frame));
   size = get_frame_size ();
 
   cfun->machine->global_pointer = mips_global_pointer ();
+  frame->cprestore_size = 0;
 
   /* The first two blocks contain the outgoing argument area and the $gp save
      slot.  This area isn't needed in leaf functions, but if the
@@ -10477,12 +10600,18 @@ mips_compute_frame_info (void)
 	frame->args_size = REG_PARM_STACK_SPACE (cfun->decl);
       else
 	frame->args_size = 0;
-      frame->cprestore_size = 0;
     }
   else
     {
-      frame->args_size = crtl->outgoing_args_size;
-      frame->cprestore_size = MIPS_GP_SAVE_AREA_SIZE;
+      /* If recalculate do not take args_size into account.  */
+      if (recalculate)
+	frame->args_size = 0;
+      else
+	frame->args_size = crtl->outgoing_args_size;
+
+      /* Check if space allocated on stack for gp will be used.  */
+      if (!recalculate || mips_must_initialize_gp_p ())
+	frame->cprestore_size = MIPS_GP_SAVE_AREA_SIZE;
     }
   offset = frame->args_size + frame->cprestore_size;
 
@@ -10606,6 +10735,9 @@ mips_compute_frame_info (void)
      instructions for local variables and incoming arguments.  */
   if (TARGET_MIPS16)
     frame->hard_frame_pointer_offset = frame->args_size;
+
+  if (!recalculate)
+    cfun->machine->initial_total_size = frame->total_size;
 }
 
 /* Return the style of GP load sequence that is being used for the
@@ -10642,7 +10774,7 @@ mips_frame_pointer_required (void)
      without using a second temporary register.  */
   if (TARGET_MIPS16)
     {
-      mips_compute_frame_info ();
+      mips_compute_frame_info (false, &cfun->machine->frame);
       if (!SMALL_OPERAND (cfun->machine->frame.total_size))
 	return true;
     }
@@ -10668,7 +10800,7 @@ mips_initial_elimination_offset (int from, int to)
 {
   HOST_WIDE_INT offset;
 
-  mips_compute_frame_info ();
+  mips_compute_frame_info (false, &cfun->machine->frame);
 
   /* Set OFFSET to the offset from the end-of-prologue stack pointer.  */
   switch (from)
@@ -16838,12 +16970,8 @@ mips_has_long_branch_p (void)
   return false;
 }
 
-/* If we are using a GOT, but have not decided to use a global pointer yet,
-   see whether we need one to implement long branches.  Convert the ghost
-   global-pointer instructions into real ones if so.  */
-
 static bool
-mips_expand_ghost_gp_insns (void)
+mips_gp_expand_needed_p (void)
 {
   /* Quick exit if we already know that we will or won't need a
      global pointer.  */
@@ -16857,12 +16985,28 @@ mips_expand_ghost_gp_insns (void)
     return false;
 
   /* We've now established that we need $gp.  */
-  cfun->machine->must_initialize_gp_p = true;
-  split_all_insns_noflow ();
-
   return true;
 }
 
+
+/* If we are using a GOT, but have not decided to use a global pointer yet,
+   see whether we need one to implement long branches.  Convert the ghost
+   global-pointer instructions into real ones if so.  */
+
+static bool
+mips_expand_ghost_gp_insns (void)
+{
+
+  if (mips_gp_expand_needed_p ())
+    {
+      /* We've now established that we need $gp.  */
+      cfun->machine->must_initialize_gp_p = true;
+      split_all_insns_noflow ();
+      return true;
+    }
+  return false;
+}
+
 /* Subroutine of mips_reorg to manage passes that require DF.  */
 
 static void
@@ -17004,6 +17148,9 @@ mips_reorg (void)
       mips_df_reorg ();
       free_bb_for_insn ();
     }
+
+  if (flag_frame_header_optimization)
+    mips_rest_of_frame_header_opt ();
 }
 
 /* We use a machine specific pass to do a second machine dependent reorg
@@ -18802,6 +18949,164 @@ mips_prepare_pch_save (void)
   mips_set_compression_mode (0);
   mips16_globals = 0;
 }
+
+/* Return new offset for stack load/store operations.  */
+
+static int
+mips_get_updated_offset (int old_offset)
+{
+  struct mips_frame_info *frame = &cfun->machine->frame;
+  int res = old_offset;
+  int initial_total_size = cfun->machine->initial_total_size;
+
+  if (old_offset > 0 && old_offset <= frame->gp_sp_offset)
+    /* It should be only gp.  */
+    res = old_offset - (initial_total_size
+			- REG_PARM_STACK_SPACE (cfun->decl));
+  else if (old_offset >= frame->gp_sp_offset
+	   && old_offset <= initial_total_size)
+    /* gp registers, accumulators.  */
+    res = old_offset - (initial_total_size
+			- REG_PARM_STACK_SPACE (cfun->decl));
+  else if (old_offset > initial_total_size)
+    /* Incoming args.  */
+    res = old_offset - initial_total_size;
+
+  return res;
+}
+
+/* Test whether to skip frame header allocation.  TODO: Try to do stack
+   frame allocation removal even if local variables are used.  */
+
+static bool
+mips_skip_stack_frame_alloc (void)
+{
+  struct mips_frame_info *frame = &cfun->machine->frame;
+  struct mips_frame_info opt_frame;
+
+  if (!flag_frame_header_optimization)
+    return false;
+
+  if (cfun->calls_setjmp != 0
+      || cfun->calls_alloca != 0
+      || cfun->stdarg != 0
+      || crtl->shrink_wrapped
+      || frame->var_size != 0
+      || frame->args_size > REG_PARM_STACK_SPACE (cfun->decl)
+      || mips_abi != ABI_32
+      || TARGET_MIPS16
+      || TARGET_MICROMIPS
+      || frame_pointer_needed != 0
+      || mips_gp_expand_needed_p ())
+    return false;
+
+  if (mips_callees_use_frame_header_p ())
+    return false;
+
+  mips_compute_frame_info (true, &opt_frame);
+
+  if (opt_frame.total_size > REG_PARM_STACK_SPACE (cfun->decl)
+      || cfun->machine->uses_frame_header_p)
+    return false;
+
+  return true;
+}
+
+/* Update stack related instructions.  */
+
+static void
+mips_frame_header_update_insn (rtx_insn *insn)
+{
+  rtx set_insn, src, dst;
+
+  if (insn == NULL_RTX || !USEFUL_INSN_P (insn))
+    return;
+
+  set_insn = single_set (insn);
+  if (set_insn == NULL_RTX)
+    return;
+
+  src = SET_SRC (set_insn);
+  dst = SET_DEST (set_insn);
+
+  if (GET_CODE (src) == REG && GET_CODE (dst) == MEM
+      && GET_CODE (XEXP (dst, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (dst, 0), 0)) == REG
+      && CONST_INT_P (XEXP (XEXP (dst, 0), 1))
+      && (REGNO (XEXP (XEXP (dst, 0), 0))
+          == STACK_POINTER_REGNUM))
+    {
+      /* It is a store through sp - update offset.  */
+      XEXP (XEXP (dst, 0), 1)
+        = GEN_INT (mips_get_updated_offset (INTVAL (XEXP (XEXP (dst, 0), 1))));
+      return;
+    }
+
+  if (GET_CODE (src) == MEM && GET_CODE (dst) == REG
+      && GET_CODE (XEXP (src, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (src, 0), 0)) == REG
+      && CONST_INT_P (XEXP (XEXP (src, 0), 1))
+      && (REGNO (XEXP (XEXP (src, 0), 0))
+          == STACK_POINTER_REGNUM))
+    {
+      /* It is a load through sp - update offset.  */
+      XEXP (XEXP (src, 0), 1)
+        = GEN_INT (mips_get_updated_offset (INTVAL (XEXP (XEXP (src, 0), 1))));
+      return;
+    }
+
+  if (GET_CODE (src) == PLUS
+      && GET_CODE (XEXP (src, 0)) == REG
+      && CONST_INT_P (XEXP (src, 1))
+      && REGNO (XEXP (src, 0)) == STACK_POINTER_REGNUM
+      && REGNO (SET_DEST (set_insn)) == STACK_POINTER_REGNUM)
+    delete_insn (insn);
+}
+
+/* Entry function for the frame header optimization.  */
+
+static void
+mips_rest_of_frame_header_opt (void)
+{
+  rtx_insn *insn;
+  bool skip_stack_frame_alloc;
+  struct mips_frame_info *frame = &cfun->machine->frame;
+
+  cfun->machine->uses_frame_header_p = mips_cfun_use_frame_header_p ();
+  skip_stack_frame_alloc = mips_skip_stack_frame_alloc ();
+
+  /* Check if it is needed to recalculate stack frame info.  */
+  if (skip_stack_frame_alloc)
+    mips_compute_frame_info (true, frame);
+
+  if ((skip_stack_frame_alloc && frame->total_size == 0)
+      || (!skip_stack_frame_alloc && !cfun->machine->uses_frame_header_p
+	  && !cfun->stdarg))
+    {
+      /* Function does not use its incoming frame header.  */
+
+      if (!mips_frame_header_usage)
+	mips_frame_header_usage = new hash_map<tree, bool>;
+
+      tree fndecl = current_function_decl;
+      bool existed;
+      bool &frame_hdr_unused = mips_frame_header_usage->get_or_insert (fndecl, &existed);
+      if (!existed)
+	frame_hdr_unused = true;
+    }
+
+  if (skip_stack_frame_alloc)
+    {
+      if (dump_file && cfun->machine->initial_total_size > frame->total_size)
+	fprintf (dump_file, "Frame size reduced by frame header optimization"
+		 " from %ld to %ld.\n", cfun->machine->initial_total_size,
+		 frame->total_size);
+
+      /* Update instructions.  */
+      for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+	mips_frame_header_update_insn (insn);
+    }
+}
 
 /* Generate or test for an insn that supports a constant permutation.  */
 
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 348c6e0..3e72936 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -412,6 +412,10 @@ modd-spreg
 Target Report Mask(ODD_SPREG)
 Enable use of odd-numbered single-precision registers
 
+mframe-header-opt
+Target Report Var(flag_frame_header_optimization) Optimization
+Optimize frame header
+
 noasmopt
 Driver
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 413ac16..3b7b1b6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -813,7 +813,8 @@ Objective-C and Objective-C++ Dialects}.
 -mbranch-cost=@var{num}  -mbranch-likely  -mno-branch-likely @gol
 -mfp-exceptions -mno-fp-exceptions @gol
 -mvr4130-align -mno-vr4130-align -msynci -mno-synci @gol
--mrelax-pic-calls -mno-relax-pic-calls -mmcount-ra-address}
+-mrelax-pic-calls -mno-relax-pic-calls -mmcount-ra-address @gol
+-mframe-header-opt}
 
 @emph{MMIX Options}
 @gccoptlist{-mlibfuncs  -mno-libfuncs  -mepsilon  -mno-epsilon  -mabi=gnu @gol
@@ -18013,6 +18014,18 @@ if @var{ra-address} is nonnull.
 
 The default is @option{-mno-mcount-ra-address}.
 
+@item -mframe-header-opt
+@itemx -mno-frame-header-opt
+@opindex mframe-header-opt
+Enable (disable) frame header optimization in the O32 ABI.  When using
+the O32 ABI, calling functions allocate 16 bytes on the stack in case
+the called function needs to write out register arguments to memory so
+that their address can be taken.  When enabled, this optimization allows
+the called function to use those 16 bytes for other purposes if the
+arguments do not need to be written to memory.
+
+This optimization is off by default at all optimization levels.
+
 @end table
 
 @node MMIX Options
diff --git a/gcc/testsuite/gcc.target/mips/fho-1.c b/gcc/testsuite/gcc.target/mips/fho-1.c
new file mode 100644
index 0000000..e373da4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fho-1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+/* { dg-options "-mabi=32 -mframe-header-opt -fdump-rtl-mach" } */
+/* Testing -mframe-header-opt optimization option.  */
+
+NOCOMPRESSION int __attribute__((noinline))
+B (int x)
+{
+  return x + 3;
+}
+
+/* We are sure that B is not using its incoming stack frame so we can skip
+   its allocation.  */
+NOCOMPRESSION int __attribute__((noinline))
+A (int x)
+{
+  return B (x) + 2;
+}
+
+NOCOMPRESSION int
+main (void)
+{
+  int a;
+  void *volatile sp1, *volatile sp2;
+  register void *sp asm ("$sp");
+  sp1 = sp;
+  a = A (5);
+  sp2 = sp;
+  return !(a == 10 && sp1 == sp2);
+}
+
+/* { dg-final { scan-rtl-dump "Frame size reduced by frame header optimization" "mach" } } */
+
+/* For enabled targets, test that only one stack allocation is present, the one
+   in main.  The one in A should have been removed by -mframe-header-opt.  */
+/* { dg-final { scan-assembler-times "addiu\t\\\$sp,\\\$sp,-" 1 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fho-2.c b/gcc/testsuite/gcc.target/mips/fho-2.c
new file mode 100644
index 0000000..d3599b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fho-2.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-mabi=32 -mframe-header-opt" } */
+/* Testing -mframe-header-opt optimization option.  */
+
+NOCOMPRESSION int __attribute__((noinline))
+B (int x)
+{
+  return x + 3;
+}
+
+/* We are sure that B is not using its incoming stack frame so we can skip
+   its allocation.  */
+NOCOMPRESSION int __attribute__((noinline))
+A (int x)
+{
+  return B (x) + 2;
+}
+
+NOCOMPRESSION int
+main (void)
+{
+  int a;
+  void *volatile sp1, *volatile sp2;
+  register void *sp asm ("$sp");
+  sp1 = sp;
+  a = A (5);
+  sp2 = sp;
+  return !(a == 10 && sp1 == sp2);
+}
diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp
index b3617e4..6e6450e 100644
--- a/gcc/testsuite/gcc.target/mips/mips.exp
+++ b/gcc/testsuite/gcc.target/mips/mips.exp
@@ -237,6 +237,7 @@ set mips_option_groups {
     fpu "-m(double|single)-float"
     forbid_cpu "forbid_cpu=.*"
     fp "-mfp(32|xx|64)"
+    frame_header_opt "-mframe-header-opt|-mno-frame-header-opt"
     gp "-mgp(32|64)"
     long "-mlong(32|64)"
     micromips "-mmicromips|-mno-micromips"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]