This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Question about sibling call epilogues & registers


It would probably be useful to post the actual code. The below function emit_msabi_outlined_restore() is is called from ix86_expand_epilogue() to emit the RTL to call the restore stub. Like ix86_expand_epilogue, it uses style == 0 to indicate that there will be a sibling call following the epilogue, so we will call the stub rather than jmp. But it also uses a call if we need to pop incoming args or are using a hard frame pointer.

The problem appears to be the lack of a function declaration causing get_call_reg_set_usage() (in final.c) to use the target default "regs_invalidated_by_call" value instead of what I've supplied with add_function_usage_to() and the gen_frame_load() insns for each register restored. I'm developing on 5.4.0 since I need a known good compiler for Wine testing and I plan to rebase it later.

static bool
emit_msabi_outlined_restore (const struct ix86_frame &frame, bool use_call,
                             int style)
{
  struct machine_function *m = cfun->machine;
  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
                          + m->outline_ms_sysv_extra_regs;
  rtvec v = rtvec_alloc (ncregs - 1 + (use_call ? 3 : 5));
  rtx insn, sym, tmp;
  rtx rsi = gen_rtx_REG (word_mode, SI_REG);
  rtx use = NULL_RTX;
  rtx note = NULL_RTX;
  unsigned i = 0;
  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
  HOST_WIDE_INT stack_restore_offset;
  HOST_WIDE_INT reg_data_offset;
  HOST_WIDE_INT rsi_offset;
  rtx rsi_frame_load = NULL_RTX;
  HOST_WIDE_INT rsi_restore_offset = 0x7fffffff;
  const typeof (xlogue.regs[0]) *ri;

  gcc_assert (m->fs.sp_valid);

  stack_restore_offset = m->fs.sp_offset - frame.hard_frame_pointer_offset;
  rsi_offset = stack_restore_offset - xlogue.get_offset ();
  reg_data_offset = stack_restore_offset;

  /* adjust for alignment */
  if (m->outline_ms_sysv_offset_in)
    reg_data_offset -= UNITS_PER_WORD;

  tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT(rsi_offset));
  insn = emit_insn (gen_rtx_SET (VOIDmode, rsi, tmp));
  use_reg (&use, rsi);

  /* construct restore_multiple/restore_multiple_and_return insn */
  sym = xlogue.get_stub_rtx (use_call ? XLOGUE_STUB_RESTORE
                                      : XLOGUE_STUB_RESTORE_RET);

  /* Verify that note queue is empty. */
  gcc_assert(!queued_cfa_restores);

  /* If:
     * we need to pop incoming args,
     * this is a sibcall, or
     * we have a hard frame pointer
     then we want to call the epilogue stub instead of jumping to it. */
  if (use_call)
    {
      tmp = gen_rtx_MEM (QImode, sym);
      RTVEC_ELT (v, i++) = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
    }
  else
    {
      rtx r10;

      RTVEC_ELT (v, i++) = ret_rtx;
      RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
      tmp = GEN_INT(stack_restore_offset);
      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, tmp);
      r10 = gen_rtx_REG (DImode, R10_REG);
      RTVEC_ELT (v, i++) = gen_rtx_SET (VOIDmode, r10, tmp);

      gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
      gcc_assert (m->fs.sp_valid);
      m->fs.sp_offset -= stack_restore_offset;

      note = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
                           GEN_INT(stack_restore_offset));
      note = gen_rtx_SET (VOIDmode, stack_pointer_rtx, note);
    }

  RTVEC_ELT (v, i++) = gen_rtx_CLOBBER (VOIDmode,
                                        gen_rtx_REG (CCmode, FLAGS_REG));

  for (ri = &xlogue.regs[0]; ri != &xlogue.regs[ncregs]; ++ri)
    {
enum machine_mode mode = SSE_REGNO_P(ri->regno) ? V4SFmode : word_mode;
      rtx reg, restore_note;
      HOST_WIDE_INT offset = ri->offset - 0x70;

      reg = gen_rtx_REG (mode, ri->regno);
      restore_note = gen_frame_load (reg, rsi, offset);

      /* Make sure RSI frame load/restore note is last */
      /* TODO: Do I really need to reorder this? */
      if (ri->regno == SI_REG)
        {
          gcc_assert (!rsi_frame_load);
          rsi_frame_load = restore_note;
          rsi_restore_offset = offset;
        }
      else
        {
          RTVEC_ELT (v, i++) = restore_note;
          ix86_add_cfa_restore_note (NULL_RTX, reg, offset);
        }
    }

  /* add frame load & restore note for RSI last */
  gcc_assert (rsi_frame_load);
  RTVEC_ELT (v, i++) = rsi_frame_load;
  ix86_add_cfa_restore_note (NULL_RTX, gen_rtx_REG (DImode, SI_REG),
                             rsi_restore_offset);

  gcc_assert (i == (unsigned)GET_NUM_ELEM (v));

  tmp = gen_rtx_PARALLEL (VOIDmode, v);
  if (use_call)
    {
      insn = emit_call_insn (tmp);
      add_reg_note (insn, REG_CALL_DECL, sym);
      add_function_usage_to (insn, use);
    }
  else
    {
      insn = emit_jump_insn (tmp);
      JUMP_LABEL (insn) = ret_rtx;
      add_reg_note(insn, REG_CFA_ADJUST_CFA, note);
    }

  RTX_FRAME_RELATED_P(insn) = true;
  ix86_add_queued_cfa_restore_notes (insn);

  if (use_call)
    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
                               GEN_INT (stack_restore_offset), style,
                               m->fs.cfa_reg == stack_pointer_rtx);

  return use_call;
}




Finally, these are the actual stubs:

#ifdef __x86_64__

# ifdef __ELF__
#  define ELFFN(fn) .type fn,@function
# else
#  define ELFFN(fn)
# endif

# define HIDDEN_FUNC(fn)        \
        .global fn;             \
        .hidden fn;             \
        ELFFN(fn);              \
fn:

# define FUNC_END(fn) .size fn,.-fn

# ifdef __AVX__
#  define MOVAPS vmovaps
# else
#  define MOVAPS movaps
# endif


/* Save SSE registers 6-15. off is the offset from the stack pointer
   where xmm6 is stored. */
.macro SSE_SAVE off=0
        MOVAPS %xmm15,(\off - 0x90)(%rax)
        MOVAPS %xmm14,(\off - 0x80)(%rax)
        MOVAPS %xmm13,(\off - 0x70)(%rax)
        MOVAPS %xmm12,(\off - 0x60)(%rax)
        MOVAPS %xmm11,(\off - 0x50)(%rax)
        MOVAPS %xmm10,(\off - 0x40)(%rax)
        MOVAPS %xmm9, (\off - 0x30)(%rax)
        MOVAPS %xmm8, (\off - 0x20)(%rax)
        MOVAPS %xmm7, (\off - 0x10)(%rax)
        MOVAPS %xmm6, \off(%rax)
.endm

/* Restore SSE registers 6-15. */
.macro SSE_RESTORE off=0
        MOVAPS (\off - 0x90)(%rsi), %xmm15
        MOVAPS (\off - 0x80)(%rsi), %xmm14
        MOVAPS (\off - 0x70)(%rsi), %xmm13
        MOVAPS (\off - 0x60)(%rsi), %xmm12
        MOVAPS (\off - 0x50)(%rsi), %xmm11
        MOVAPS (\off - 0x40)(%rsi), %xmm10
        MOVAPS (\off - 0x30)(%rsi), %xmm9
        MOVAPS (\off - 0x20)(%rsi), %xmm8
        MOVAPS (\off - 0x10)(%rsi), %xmm7
        MOVAPS \off(%rsi), %xmm6
.endm


        .text
/*
 * to call:
* lea -xxx(%rsp), %rax # xxx is 0x70 or 0x78 (depending upon incoming stack alignment offset) * subq $xxx, %rsp # xxx is however much stack space the fn needs
 * callq        __msabi_save_<nregs>
 */
HIDDEN_FUNC(__msabi_save_18)
        mov        %r15,-0x70(%rax)
HIDDEN_FUNC(__msabi_save_17)
        mov        %r14,-0x68(%rax)
HIDDEN_FUNC(__msabi_save_16)
        mov        %r13,-0x60(%rax)
HIDDEN_FUNC(__msabi_save_15)
        mov        %r12,-0x58(%rax)
HIDDEN_FUNC(__msabi_save_14)
        mov        %rbp,-0x50(%rax)
HIDDEN_FUNC(__msabi_save_13)
        mov        %rbx,-0x48(%rax)
HIDDEN_FUNC(__msabi_save_12)
        mov        %rdi,-0x40(%rax)
        mov        %rsi,-0x38(%rax)
        SSE_SAVE off=0x60
        ret
FUNC_END(__msabi_save_12)
FUNC_END(__msabi_save_13)
FUNC_END(__msabi_save_14)
FUNC_END(__msabi_save_15)
FUNC_END(__msabi_save_16)
FUNC_END(__msabi_save_17)
FUNC_END(__msabi_save_18)

/*
 * to call:
* lea xxx(%rsp), %rsi # xxx = SP adjustment to point to -0x70 offset for data * lea xxx(%rsp), r10 # xxx = SP adjustment to restore stack
 * jmp          __msabi_restore_ret_<nregs>
 */
HIDDEN_FUNC(__msabi_restore_ret_18)
        mov        -0x70(%rsi),%r15
HIDDEN_FUNC(__msabi_restore_ret_17)
        mov        -0x68(%rsi),%r14
HIDDEN_FUNC(__msabi_restore_ret_16)
        mov        -0x60(%rsi),%r13
HIDDEN_FUNC(__msabi_restore_ret_15)
        mov        -0x58(%rsi),%r12
HIDDEN_FUNC(__msabi_restore_ret_14)
        mov        -0x50(%rsi),%rbp
HIDDEN_FUNC(__msabi_restore_ret_13)
        mov        -0x48(%rsi),%rbx
HIDDEN_FUNC(__msabi_restore_ret_12)
        mov        -0x40(%rsi),%rdi
        SSE_RESTORE off=0x60
        mov        -0x38(%rsi),%rsi
        mov        %r10,%rsp
        ret
FUNC_END(__msabi_restore_ret_12)
FUNC_END(__msabi_restore_ret_13)
FUNC_END(__msabi_restore_ret_14)
FUNC_END(__msabi_restore_ret_15)
FUNC_END(__msabi_restore_ret_16)
FUNC_END(__msabi_restore_ret_17)
FUNC_END(__msabi_restore_ret_18)

/*
 * to call:
* lea xxx(%rsp), %rsi # xxx = SP adjustment to point to -0x70 offset for data
 * callq        __msabi_restore_<nregs>
 * subq         $xxx,%rsp        # xxx = SP adjustment to restore stack
 */
HIDDEN_FUNC(__msabi_restore_18)
        mov        -0x70(%rsi),%r15
HIDDEN_FUNC(__msabi_restore_17)
        mov        -0x68(%rsi),%r14
HIDDEN_FUNC(__msabi_restore_16)
        mov        -0x60(%rsi),%r13
HIDDEN_FUNC(__msabi_restore_15)
        mov        -0x58(%rsi),%r12
HIDDEN_FUNC(__msabi_restore_14)
        mov        -0x50(%rsi),%rbp
HIDDEN_FUNC(__msabi_restore_13)
        mov        -0x48(%rsi),%rbx
HIDDEN_FUNC(__msabi_restore_12)
        mov        -0x40(%rsi),%rdi
        SSE_RESTORE off=0x60
        mov        -0x38(%rsi),%rsi
        ret
FUNC_END(__msabi_restore_12)
FUNC_END(__msabi_restore_13)
FUNC_END(__msabi_restore_14)
FUNC_END(__msabi_restore_15)
FUNC_END(__msabi_restore_16)
FUNC_END(__msabi_restore_17)
FUNC_END(__msabi_restore_18)

#endif /* __x86_64__ */

Thanks!
Daniel


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]