[PATCH 11/12 rev1] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation

Daniel Santos daniel.santos@pobox.com
Thu May 4 22:11:00 GMT 2017


Now generates RTL with appropriate stack restore and leave patterns.  Slightly
cleaned up code that calculates the number of vector elements for clarity.

Tests are good when rebased onto gcc-7_1_0-release as HEAD currently fails to
bootstrap.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c | 287 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 278 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f2772b2d10e..e43dc819f9a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14148,6 +14148,78 @@ ix86_elim_entry_set_got (rtx reg)
     }
 }
 
+static rtx
+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
+{
+  rtx addr, mem;
+
+  if (offset)
+    addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
+  mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
+  return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
+}
+
+static inline rtx
+gen_frame_load (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, false);
+}
+
+static inline rtx
+gen_frame_store (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, true);
+}
+
+static void
+ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+			  + m->call_ms2sysv_extra_regs;
+  rtvec v = rtvec_alloc (ncregs + 1);
+  unsigned int align, i, vi = 0;
+  rtx_insn *insn;
+  rtx sym, addr;
+  rtx rax = gen_rtx_REG (word_mode, AX_REG);
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT rax_offset = xlogue.get_stub_ptr_offset () + m->fs.sp_offset;
+  HOST_WIDE_INT stack_alloc_size = frame.stack_pointer_offset - m->fs.sp_offset;
+  HOST_WIDE_INT stack_align_off_in = xlogue.get_stack_align_off_in ();
+
+  /* Verify that the incoming stack 16-byte alignment offset matches the
+     layout we're using.  */
+  gcc_assert (stack_align_off_in == (m->fs.sp_offset & UNITS_PER_WORD));
+
+  /* Get the stub symbol.  */
+  sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
+						  : XLOGUE_STUB_SAVE);
+  RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+
+  /* Setup RAX as the stub's base pointer.  */
+  align = GET_MODE_ALIGNMENT (V4SFmode);
+  addr = choose_baseaddr (rax_offset, &align);
+  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+  insn = emit_insn (gen_rtx_SET (rax, addr));
+
+  gcc_assert (stack_alloc_size >= xlogue.get_stack_space_used ());
+  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			     GEN_INT (-stack_alloc_size), -1,
+			     m->fs.cfa_reg == stack_pointer_rtx);
+  for (i = 0; i < ncregs; ++i)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+      rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
+			     r.regno);
+      RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);;
+    }
+
+  gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
+
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
+  RTX_FRAME_RELATED_P (insn) = true;
+}
+
 /* Expand the prologue into a bunch of separate insns.  */
 
 void
@@ -14395,7 +14467,7 @@ ix86_expand_prologue (void)
 	 performing the actual alignment.  Otherwise we cannot guarantee
 	 that there's enough storage above the realignment point.  */
       allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset;
-      if (allocate)
+      if (allocate && !m->call_ms2sysv)
         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				   GEN_INT (-allocate), -1, false);
 
@@ -14403,7 +14475,6 @@ ix86_expand_prologue (void)
       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
 					stack_pointer_rtx,
 					GEN_INT (-align_bytes)));
-
       /* For the purposes of register save area addressing, the stack
 	 pointer can no longer be used to access anything in the frame
 	 below m->fs.sp_realigned_offset and the frame pointer cannot be
@@ -14420,6 +14491,9 @@ ix86_expand_prologue (void)
 	m->fs.sp_valid = false;
     }
 
+  if (m->call_ms2sysv)
+    ix86_emit_outlined_ms2sysv_save (frame);
+
   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
 
   if (flag_stack_usage_info)
@@ -14740,17 +14814,19 @@ ix86_emit_restore_regs_using_pop (void)
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
 }
 
-/* Emit code and notes for the LEAVE instruction.  */
+/* Emit code and notes for the LEAVE instruction.  If insn is non-null,
+   omits the emit and only attaches the notes.  */
 
 static void
-ix86_emit_leave (void)
+ix86_emit_leave (rtx_insn *insn)
 {
   struct machine_function *m = cfun->machine;
-  rtx_insn *insn = emit_insn (ix86_gen_leave ());
+  if (!insn)
+    insn = emit_insn (ix86_gen_leave ());
 
   ix86_add_queued_cfa_restore_notes (insn);
 
@@ -14844,6 +14920,164 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
       }
 }
 
+static void
+ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
+				  bool use_call, int style)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+			  + m->call_ms2sysv_extra_regs;
+  rtvec v;
+  unsigned int elems_needed, align, i, vi = 0;
+  rtx_insn *insn;
+  rtx sym, tmp;
+  rtx rsi = gen_rtx_REG (word_mode, SI_REG);
+  rtx r10 = NULL_RTX;
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
+  HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
+  rtx rsi_frame_load = NULL_RTX;
+  HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
+  enum xlogue_stub stub;
+
+  gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
+
+  /* If using a realigned stack, we should never start with padding.  */
+  gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
+
+  /* Setup RSI as the stub's base pointer.  */
+  align = GET_MODE_ALIGNMENT (V4SFmode);
+  tmp = choose_baseaddr (rsi_offset, &align);
+  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+  emit_insn (gen_rtx_SET (rsi, tmp));
+
+  /* Get a symbol for the stub.  */
+  if (frame_pointer_needed)
+    stub = use_call ? XLOGUE_STUB_RESTORE_HFP
+		    : XLOGUE_STUB_RESTORE_HFP_TAIL;
+  else
+    stub = use_call ? XLOGUE_STUB_RESTORE
+		    : XLOGUE_STUB_RESTORE_TAIL;
+  sym = xlogue.get_stub_rtx (stub);
+
+  elems_needed = ncregs;
+  if (use_call)
+    elems_needed += 1;
+  else
+    elems_needed += frame_pointer_needed ? 5 : 3;
+  v = rtvec_alloc (elems_needed);
+
+  /* We call the epilogue stub when we need to pop incoming args or we are
+     doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
+     epilogue stub and it is the tail-call.  */
+  if (use_call)
+      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+  else
+    {
+      RTVEC_ELT (v, vi++) = ret_rtx;
+      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+      if (frame_pointer_needed)
+	{
+	  rtx rbp = gen_rtx_REG (DImode, BP_REG);
+	  gcc_assert (m->fs.fp_valid);
+	  gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
+
+	  tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
+	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
+	  RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
+	  tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+	  RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
+	}
+      else
+	{
+	  /* If no hard frame pointer, we set R10 to the SP restore value.  */
+	  gcc_assert (!m->fs.fp_valid);
+	  gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+	  gcc_assert (m->fs.sp_valid);
+
+	  r10 = gen_rtx_REG (DImode, R10_REG);
+	  tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
+	  emit_insn (gen_rtx_SET (r10, tmp));
+
+	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
+	}
+    }
+
+  /* Generate frame load insns and restore notes.  */
+  for (i = 0; i < ncregs; ++i)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+      enum machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
+      rtx reg, frame_load;
+
+      reg = gen_rtx_REG (mode, r.regno);
+      frame_load = gen_frame_load (reg, rsi, r.offset);
+
+      /* Save RSI frame load insn & note to add last.  */
+      if (r.regno == SI_REG)
+	{
+	  gcc_assert (!rsi_frame_load);
+	  rsi_frame_load = frame_load;
+	  rsi_restore_offset = r.offset;
+	}
+      else
+	{
+	  RTVEC_ELT (v, vi++) = frame_load;
+	  ix86_add_cfa_restore_note (NULL, reg, r.offset);
+	}
+    }
+
+  /* Add RSI frame load & restore note at the end.  */
+  gcc_assert (rsi_frame_load);
+  gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
+  RTVEC_ELT (v, vi++) = rsi_frame_load;
+  ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
+			     rsi_restore_offset);
+
+  /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
+  if (!use_call && !frame_pointer_needed)
+    {
+      gcc_assert (m->fs.sp_valid);
+      gcc_assert (!m->fs.sp_realigned);
+
+      /* At this point, R10 should point to frame.stack_realign_offset.  */
+      if (m->fs.cfa_reg == stack_pointer_rtx)
+	m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
+      m->fs.sp_offset = frame.stack_realign_offset;
+    }
+
+  gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
+  tmp = gen_rtx_PARALLEL (VOIDmode, v);
+  if (use_call)
+      insn = emit_insn (tmp);
+  else
+    {
+      insn = emit_jump_insn (tmp);
+      JUMP_LABEL (insn) = ret_rtx;
+
+      if (frame_pointer_needed)
+	ix86_emit_leave (insn);
+      else
+	{
+	  /* Need CFA adjust note.  */
+	  tmp = gen_rtx_SET (stack_pointer_rtx, r10);
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
+	}
+    }
+
+  RTX_FRAME_RELATED_P (insn) = true;
+  ix86_add_queued_cfa_restore_notes (insn);
+
+  /* If we're not doing a tail-call, we need to adjust the stack.  */
+  if (use_call && m->fs.sp_valid)
+    {
+      HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				GEN_INT (dealloc), style,
+				m->fs.cfa_reg == stack_pointer_rtx);
+    }
+}
+
 /* Restore function stack, frame, and registers.  */
 
 void
@@ -14854,6 +15088,7 @@ ix86_expand_epilogue (int style)
   struct ix86_frame frame;
   bool restore_regs_via_mov;
   bool using_drap;
+  bool restore_stub_is_tail = false;
 
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout (&frame);
@@ -14956,7 +15191,37 @@ ix86_expand_epilogue (int style)
     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
 					  style == 2);
 
-  if (restore_regs_via_mov)
+  if (m->call_ms2sysv)
+    {
+      int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
+
+      /* We cannot use a tail-call for the stub if:
+	 1. We have to pop incoming args,
+	 2. We have additional int regs to restore, or
+	 3. A sibling call will be the tail-call, or
+	 4. We are emitting an eh_return_internal epilogue.
+
+	 TODO: Item 4 has not yet tested!
+
+	 If any of the above are true, we will call the stub rather than
+	 jump to it.  */
+      restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
+      ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
+    }
+
+  /* If using out-of-line stub that is a tail-call, then...*/
+  if (m->call_ms2sysv && restore_stub_is_tail)
+    {
+      /* TODO: parinoid tests. (remove eventually)  */
+      gcc_assert (m->fs.sp_valid);
+      gcc_assert (!m->fs.sp_realigned);
+      gcc_assert (!m->fs.fp_valid);
+      gcc_assert (!m->fs.realigned);
+      gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
+      gcc_assert (!crtl->drap_reg);
+      gcc_assert (!frame.nregs);
+    }
+  else if (restore_regs_via_mov)
     {
       rtx t;
 
@@ -15087,7 +15352,7 @@ ix86_expand_epilogue (int style)
       else if (TARGET_USE_LEAVE
 	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
 	       || !cfun->machine->use_fast_prologue_epilogue)
-	ix86_emit_leave ();
+	ix86_emit_leave (NULL);
       else
         {
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
@@ -15198,7 +15463,7 @@ ix86_expand_epilogue (int style)
       else
 	emit_jump_insn (gen_simple_return_pop_internal (popc));
     }
-  else
+  else if (!m->call_ms2sysv || !restore_stub_is_tail)
     emit_jump_insn (gen_simple_return_internal ());
 
   /* Restore the state back to the state from the prologue,
@@ -28927,6 +29192,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 
 	  clobber_reg (&use, gen_rtx_REG (mode, regno));
 	}
+
+      /* Set here, but it may get cleared later.  */
+      if (TARGET_CALL_MS2SYSV_XLOGUES)
+	cfun->machine->call_ms2sysv = true;
     }
 
   if (vec_len > 1)
-- 
2.11.0



More information about the Gcc-patches mailing list