[PATCH 9/9] Add remainder of moutline-msabi-xlogues implementation

Daniel Santos daniel.santos@pobox.com
Wed Nov 23 05:16:00 GMT 2016


Adds functions emit_msabi_outlined_save and emit_msabi_outlined_restore,
which are called from ix86_expand_prologue and ix86_expand_epilogue,
respectively. Also adds the code to ix86_expand_call that enables the
optimization (setting  the machine_function's outline_ms_sysv field).
---
 gcc/config/i386/i386.c | 298 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 279 insertions(+), 19 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1dc244e..6345c61 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13882,6 +13882,103 @@ ix86_elim_entry_set_got (rtx reg)
     }
 }
 
+static rtx
+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
+{
+  rtx addr, mem;
+
+  if (offset)
+    addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
+  mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
+  return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
+}
+
+static inline rtx
+gen_frame_load (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, false);
+}
+
+static inline rtx
+gen_frame_store (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, true);
+}
+
+static void
+emit_msabi_outlined_save (const struct ix86_frame &frame)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+			  + m->outline_ms_sysv_extra_regs;
+  rtvec v = rtvec_alloc (ncregs - 1 + 3);
+  rtx insn, sym, tmp;
+  rtx rax = gen_rtx_REG (word_mode, AX_REG);
+  unsigned i = 0;
+  unsigned j;
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT stack_used = xlogue.get_stack_space_used ();
+  HOST_WIDE_INT stack_alloc_size = stack_used;
+  HOST_WIDE_INT rax_offset = xlogue.get_stub_ptr_offset ();
+
+  /* Verify that the incoming stack 16-byte alignment offset matches the
+     layout we're using.  */
+  gcc_assert ((m->fs.sp_offset & 15) == xlogue.get_stack_align_off_in ());
+
+  sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
+						  : XLOGUE_STUB_SAVE);
+  RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
+
+  /* Combine as many other allocations as possible.  */
+  if (frame.nregs == 0)
+    {
+      if (frame.nsseregs == 0)
+	/* If no other GP or SSE regs, we allocate the whole stack frame.  */
+	stack_alloc_size = frame.stack_pointer_offset - m->fs.sp_offset;
+      else
+	stack_alloc_size = frame.reg_save_offset - m->fs.sp_offset;
+
+      gcc_assert (stack_alloc_size >= stack_used);
+    }
+
+  if (crtl->stack_realign_needed)
+    {
+      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+
+      gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
+      insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, stack_pointer_rtx,
+					GEN_INT (-align_bytes)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      RTVEC_ELT (v, i++) = const1_rtx;
+    }
+  else
+      RTVEC_ELT (v, i++) = const0_rtx;
+
+  tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-rax_offset));
+  insn = emit_insn (gen_rtx_SET (rax, tmp));
+
+  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			     GEN_INT (-stack_alloc_size), -1,
+			     m->fs.cfa_reg == stack_pointer_rtx);
+
+  for (j = 0; j < ncregs; ++j)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (j);
+      rtx store;
+      rtx reg;
+
+      reg = gen_rtx_REG (SSE_REGNO_P (r.regno) ? V4SFmode : word_mode,
+			 r.regno);
+      store = gen_frame_store (reg, rax, -r.offset);
+      RTVEC_ELT (v, i++) = store;
+    }
+
+  gcc_assert (i == (unsigned)GET_NUM_ELEM (v));
+
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
+  RTX_FRAME_RELATED_P (insn) = true;
+}
+
 /* Expand the prologue into a bunch of separate insns.  */
 
 void
@@ -14095,6 +14192,11 @@ ix86_expand_prologue (void)
 	}
     }
 
+  /* Call to outlining stub occurs after pushing frame pointer (if it was
+     needed).  */
+  if (m->outline_ms_sysv)
+      emit_msabi_outlined_save (frame);
+
   if (!int_registers_saved)
     {
       /* If saving registers via PUSH, do so now.  */
@@ -14123,20 +14225,24 @@ ix86_expand_prologue (void)
       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
 
-      /* The computation of the size of the re-aligned stack frame means
-	 that we must allocate the size of the register save area before
-	 performing the actual alignment.  Otherwise we cannot guarantee
-	 that there's enough storage above the realignment point.  */
-      if (m->fs.sp_offset != frame.sse_reg_save_offset)
-        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-				   GEN_INT (m->fs.sp_offset
-					    - frame.sse_reg_save_offset),
-				   -1, false);
+      /* If using stub, stack will have already been aligned.  */
+      if (!m->outline_ms_sysv)
+	{
+	  /* The computation of the size of the re-aligned stack frame means
+	    that we must allocate the size of the register save area before
+	    performing the actual alignment.  Otherwise we cannot guarantee
+	    that there's enough storage above the realignment point.  */
+	  if (m->fs.sp_offset != frame.sse_reg_save_offset)
+	    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				      GEN_INT (m->fs.sp_offset
+						- frame.sse_reg_save_offset),
+				      -1, false);
 
-      /* Align the stack.  */
-      insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
-					stack_pointer_rtx,
-					GEN_INT (-align_bytes)));
+	  /* Align the stack.  */
+	  insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+					    stack_pointer_rtx,
+					    GEN_INT (-align_bytes)));
+	}
 
       /* For the purposes of register save area addressing, the stack
          pointer is no longer valid.  As for the value of sp_offset,
@@ -14466,17 +14572,19 @@ ix86_emit_restore_regs_using_pop (void)
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
 }
 
-/* Emit code and notes for the LEAVE instruction.  */
+/* Emit code and notes for the LEAVE instruction.  If insn is non-null,
+   omits the emit and only attaches the notes.  */
 
 static void
-ix86_emit_leave (void)
+ix86_emit_leave (rtx_insn *insn)
 {
   struct machine_function *m = cfun->machine;
-  rtx_insn *insn = emit_insn (ix86_gen_leave ());
+  if (!insn)
+    insn = emit_insn (ix86_gen_leave ());
 
   ix86_add_queued_cfa_restore_notes (insn);
 
@@ -14568,6 +14676,140 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
       }
 }
 
+static void
+emit_msabi_outlined_restore (const struct ix86_frame &frame, bool use_call,
+			     int style)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+			  + m->outline_ms_sysv_extra_regs;
+  unsigned elems_needed = ncregs + 1;
+  rtvec v;
+  rtx_insn *insn;
+  rtx sym, tmp;
+  rtx rsi = gen_rtx_REG (word_mode, SI_REG);
+  rtx r10 = NULL_RTX;
+  rtx cfa_adjust_note = NULL_RTX;
+  unsigned i = 0;
+  unsigned j;
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT stack_restore_offset;
+  HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
+  HOST_WIDE_INT rsi_offset;
+  rtx rsi_frame_load = NULL_RTX;
+  HOST_WIDE_INT rsi_restore_offset = 0x7fffffff;
+  enum xlogue_stub stub;
+
+  stack_restore_offset = m->fs.sp_offset - frame.hard_frame_pointer_offset;
+  rsi_offset = stack_restore_offset - stub_ptr_offset;
+  gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
+  tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (rsi_offset));
+  insn = emit_insn (gen_rtx_SET (rsi, tmp));
+
+  if (frame_pointer_needed)
+    stub = use_call ? XLOGUE_STUB_RESTORE_HFP
+		    : XLOGUE_STUB_RESTORE_HFP_TAIL;
+  else
+    stub = use_call ? XLOGUE_STUB_RESTORE
+		    : XLOGUE_STUB_RESTORE_TAIL;
+
+  sym = xlogue.get_stub_rtx (stub);
+
+  if (!use_call)
+    elems_needed += frame_pointer_needed ? 2 : 3;
+  v = rtvec_alloc (elems_needed);
+
+  /* If: we need to pop incoming args or a sibling call will follow, then
+     we want to call the epilogue stub instead of jumping to it.  */
+  if (use_call)
+      RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
+  else
+    {
+      RTVEC_ELT (v, i++) = ret_rtx;
+      RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
+      if (!frame_pointer_needed)
+	{
+	  gcc_assert (!m->fs.fp_valid);
+	  gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+	  gcc_assert (m->fs.sp_valid);
+
+	  tmp = GEN_INT (stub_ptr_offset);
+	  tmp = gen_rtx_PLUS (Pmode, rsi, tmp);
+	  r10 = gen_rtx_REG (DImode, R10_REG);
+	  insn = emit_insn (gen_rtx_SET (r10, tmp));
+	  RTVEC_ELT (v, i++) = const0_rtx;
+	}
+      else
+	{
+	  gcc_assert (m->fs.fp_valid);
+	  gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
+
+	  RTVEC_ELT (v, i++) = const1_rtx;
+	}
+    }
+
+  for (j = 0; j < ncregs; ++j)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (j);
+      enum machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
+      rtx reg, restore_note;
+
+      reg = gen_rtx_REG (mode, r.regno);
+      restore_note = gen_frame_load (reg, rsi, r.offset);
+
+      /* Save RSI frame load insn & note to add later.  */
+      if (r.regno == SI_REG)
+	{
+	  gcc_assert (!rsi_frame_load);
+	  rsi_frame_load = restore_note;
+	  rsi_restore_offset = r.offset;
+	}
+      else
+	{
+	  RTVEC_ELT (v, i++) = restore_note;
+	  ix86_add_cfa_restore_note (NULL, reg, r.offset);
+	}
+    }
+
+  /* Add RSI frame load & restore note at the end.  */
+  gcc_assert (rsi_frame_load);
+  RTVEC_ELT (v, i++) = rsi_frame_load;
+  ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
+			     rsi_restore_offset);
+
+  /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
+  if (!use_call && !frame_pointer_needed)
+    {
+      cfa_adjust_note = gen_rtx_SET(stack_pointer_rtx, r10);
+      RTVEC_ELT (v, i++) = cfa_adjust_note;
+      m->fs.cfa_offset -= stack_restore_offset;
+      m->fs.sp_offset -= stack_restore_offset;
+    }
+
+  gcc_assert (i == (unsigned)GET_NUM_ELEM (v));
+  tmp = gen_rtx_PARALLEL (VOIDmode, v);
+  if (use_call)
+      insn = emit_insn (tmp);
+  else
+    {
+      insn = emit_jump_insn (tmp);
+      JUMP_LABEL (insn) = ret_rtx;
+
+      if (frame_pointer_needed)
+	ix86_emit_leave (insn);
+      else
+	add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa_adjust_note);
+    }
+
+  RTX_FRAME_RELATED_P (insn) = true;
+  ix86_add_queued_cfa_restore_notes (insn);
+
+  if (use_call)
+    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (stack_restore_offset), style,
+			       m->fs.cfa_reg == stack_pointer_rtx);
+}
+
 /* Restore function stack, frame, and registers.  */
 
 void
@@ -14578,6 +14820,7 @@ ix86_expand_epilogue (int style)
   struct ix86_frame frame;
   bool restore_regs_via_mov;
   bool using_drap;
+  bool restore_stub_uses_call = false;
 
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout (&frame);
@@ -14782,6 +15025,10 @@ ix86_expand_epilogue (int style)
 					      - frame.reg_save_offset),
 				     style, false);
 	}
+      /* If using an out-of-lined stub and there are no int regs to restore
+	 inline then we want to let the stub handle the stack restore.  */
+      else if (m->outline_ms_sysv && !frame.nregs)
+	;
       else if (m->fs.sp_offset != frame.reg_save_offset)
 	{
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
@@ -14794,6 +15041,15 @@ ix86_expand_epilogue (int style)
       ix86_emit_restore_regs_using_pop ();
     }
 
+  if (m->outline_ms_sysv)
+    {
+      int popc = crtl->args.pops_args && crtl->args.size ? crtl->args.size : 0;
+
+      restore_stub_uses_call = popc || style == 0 || (m->fs.fp_valid
+			       && !crtl->stack_realign_needed);
+      emit_msabi_outlined_restore (frame, restore_stub_uses_call, style);
+    }
+
   /* If we used a stack pointer and haven't already got rid of it,
      then do so now.  */
   if (m->fs.fp_valid)
@@ -14807,7 +15063,7 @@ ix86_expand_epilogue (int style)
       else if (TARGET_USE_LEAVE
 	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
 	       || !cfun->machine->use_fast_prologue_epilogue)
-	ix86_emit_leave ();
+	ix86_emit_leave (NULL);
       else
         {
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
@@ -14917,7 +15173,7 @@ ix86_expand_epilogue (int style)
       else
 	emit_jump_insn (gen_simple_return_pop_internal (popc));
     }
-  else
+  else if (!m->outline_ms_sysv || restore_stub_uses_call)
     emit_jump_insn (gen_simple_return_internal ());
 
   /* Restore the state back to the state from the prologue,
@@ -28568,6 +28824,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 
 	  clobber_reg (&use, gen_rtx_REG (mode, regno));
 	}
+
+      /* Set here, but it may get cleared later.  */
+      if (TARGET_OUTLINE_MSABI_XLOGUES)
+	cfun->machine->outline_ms_sysv = true;
     }
 
   if (vec_len > 1)
-- 
2.9.0



More information about the Gcc-patches mailing list