[PATCH 2/2] [i386] PR82002 Part 2: Correct non-immediate offset/invalid INSN
Daniel Santos
daniel.santos@pobox.com
Thu Nov 2 22:37:00 GMT 2017
On 10/31/2017 04:31 AM, Uros Bizjak wrote:
> On Tue, Oct 31, 2017 at 3:09 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
>> When we are realigning the stack pointer, making an ms_abi to sysv_abi
>> call and alllocating 2GiB or more on the stack we end up with an invalid
>> INSN due to a non-immediate offset. This occurs both with and without
>> -mcall-ms2sysv-xlogues. Additionally, I've discovered that the stack
>> allocation with -mcall-ms2sysv-xlogues is incorrect as it ignores stack
>> checking, stack clash checking and probing.
>>
>> This patch fixes these problems by
>>
>> 1. No longer allocate stack space in ix86_emit_outlined_ms2sysv_save.
>> 2. Rearrange where we emit SSE saves or stub call:
>> a. Before frame allocation when offset from frame to save area is >= 2GiB.
>> b. After frame allocation when frame is < 2GiB. (Stack allocations
>> prior to the stub call can't be combined with those afterwards, so
>> this is better when possible.)
>> 3. Modify choose_baseaddr to take an optional scratch_regno argument
>> and never return rtx that cannot be used as an immediate.
>>
>> gcc:
>> config/i386/i386.c (choose_basereg): Use optional scratch
>> register and add assertion.
>> (x86_emit_outlined_ms2sysv_save): use scratch register when
>> needed, and don't allocate stack.
>> (ix86_expand_prologue): Rearrange where SSE saves/stub call is
>> emitted, correct wrong allocation with -mcall-ms2sysv-xlogues.
>> (ix86_emit_outlined_ms2sysv_restore): Fix non-immediate offsets.
>>
>> gcc/testsuite:
>> gcc.target/i386/pr82002-2a.c: Change from xfail to fail.
>> gcc.target/i386/pr82002-2b.c: Likewise.
>>
>> Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
>> ---
>> gcc/config/i386/i386.c | 76 ++++++++++++++++++++++++------
>> gcc/testsuite/gcc.target/i386/pr82002-2a.c | 2 -
>> gcc/testsuite/gcc.target/i386/pr82002-2b.c | 2 -
>> 3 files changed, 62 insertions(+), 18 deletions(-)
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index 83a07afb3e1..abd8e937e0d 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -11520,7 +11520,8 @@ choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
>> The valid base registers are taken from CFUN->MACHINE->FS. */
>>
>> static rtx
>> -choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
>> +choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
>> + int scratch_regno = -1)
>> {
>> rtx base_reg = NULL;
>> HOST_WIDE_INT base_offset = 0;
>> @@ -11534,6 +11535,28 @@ choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
>> choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
>>
>> gcc_assert (base_reg != NULL);
>> +
>> + if (TARGET_64BIT)
>> + {
>> + rtx base_offset_rtx = GEN_INT (base_offset);
>> +
>> + if (scratch_regno >= 0)
>> + {
>> + if (!x86_64_immediate_operand (base_offset_rtx, DImode))
>> + {
>> + rtx tmp;
>> + rtx scratch_reg = gen_rtx_REG (DImode, scratch_regno);
>> +
>> + emit_insn (gen_rtx_SET (scratch_reg, base_offset_rtx));
>> + tmp = gen_rtx_PLUS (DImode, scratch_reg, base_reg);
>> + emit_insn (gen_rtx_SET (scratch_reg, tmp));
>> + return scratch_reg;
>> + }
>> + }
>> + else
>> + gcc_assert (x86_64_immediate_operand (base_offset_rtx, DImode));
>> + }
>> +
>> return plus_constant (Pmode, base_reg, base_offset);
>> }
> This function doesn't need to return a register, it can return plus
> RTX. I'd suggest the following implementation:
>
> --cut here--
> Index: i386.c
> ===================================================================
> --- i386.c (revision 254243)
> +++ i386.c (working copy)
> @@ -11520,7 +11520,8 @@
> The valid base registers are taken from CFUN->MACHINE->FS. */
>
> static rtx
> -choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
> +choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
> + unsigned int scratch_regno = INVALID_REGNUM)
> {
> rtx base_reg = NULL;
> HOST_WIDE_INT base_offset = 0;
> @@ -11534,6 +11535,19 @@
> choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
>
> gcc_assert (base_reg != NULL);
> +
> + rtx base_offset_rtx = GEN_INT (base_offset);
> +
> + if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
> + {
> + gcc_assert (scratch_regno != INVALID_REGNUM);
> +
> + rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
> + emit_move_insn (scratch_reg, base_offset_rtx);
> +
> + return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
> + }
> +
> return plus_constant (Pmode, base_reg, base_offset);
> }
Oh, that's much better, thanks.
> --cut here--
>
> You have to always return Pmode, otherwise x32 will complain (you may
> try with -maddress-mode=short). Also, the above will immediately ICE
> when too large base_offset is used without the scratch, so one can
> backtrace to offending function.
>
>> @@ -12793,23 +12816,22 @@ ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
>> rtx sym, addr;
>> rtx rax = gen_rtx_REG (word_mode, AX_REG);
>> const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
>> - HOST_WIDE_INT allocate = frame.stack_pointer_offset - m->fs.sp_offset;
>>
>> /* AL should only be live with sysv_abi. */
>> gcc_assert (!ix86_eax_live_at_start_p ());
>> + gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
>>
>> /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
>> we've actually realigned the stack or not. */
>> align = GET_MODE_ALIGNMENT (V4SFmode);
>> addr = choose_baseaddr (frame.stack_realign_offset
>> - + xlogue.get_stub_ptr_offset (), &align);
>> + + xlogue.get_stub_ptr_offset (), &align, AX_REG);
>> gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
>> - emit_insn (gen_rtx_SET (rax, addr));
>>
>> - /* Allocate stack if not already done. */
>> - if (allocate > 0)
>> - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
>> - GEN_INT (-allocate), -1, false);
>> + /* If choose_baseaddr returned our scratch register, then we don't need to
>> + do another SET. */
>> + if (!REG_P (addr) || REGNO (addr) != AX_REG)
>> + emit_insn (gen_rtx_SET (rax, addr));
> You won't need the above change with a choose_baseaddr that returns PLUS RTX.
>
>> /* Get the stub symbol. */
>> sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
>> @@ -12841,6 +12863,7 @@ ix86_expand_prologue (void)
>> HOST_WIDE_INT allocate;
>> bool int_registers_saved;
>> bool sse_registers_saved;
>> + bool save_stub_call_needed;
>> rtx static_chain = NULL_RTX;
>>
>> if (ix86_function_naked (current_function_decl))
>> @@ -13016,6 +13039,8 @@ ix86_expand_prologue (void)
>>
>> int_registers_saved = (frame.nregs == 0);
>> sse_registers_saved = (frame.nsseregs == 0);
>> + save_stub_call_needed = (m->call_ms2sysv);
>> + gcc_assert (!(!sse_registers_saved && save_stub_call_needed));
> Oooh, double negation :(
I'm just saying that we shouldn't be saving SSE registers inline and via
the stub. If I followed the naming convention of e.g.,
"see_registers_saved" then my variable would end up being called
"save_stub_called" which would be incorrect and misleading, similar to
how "see_registers_saved" is misleading when there are in fact no SSE
register that need to be saved. Maybe I should rename
(int|sse)_registers_saved to (int|sse)_register_saves_needed with
inverted logic instead.
>> if (frame_pointer_needed && !m->fs.fp_valid)
>> {
>> @@ -13110,10 +13135,27 @@ ix86_expand_prologue (void)
>> target. */
>> if (TARGET_SEH)
>> m->fs.sp_valid = false;
>> - }
>>
>> - if (m->call_ms2sysv)
>> - ix86_emit_outlined_ms2sysv_save (frame);
>> + /* If SP offset is non-immediate after allocation of the stack frame,
>> + then emit SSE saves or stub call prior to allocating the rest of the
>> + stack frame. This is less efficient for the out-of-line stub because
>> + we can't combine allocations across the call barrier, but it's better
>> + than using a scratch register. */
>> + else if (frame.stack_pointer_offset - m->fs.sp_realigned_offset
>> + > 0x7fffffff)
> Should we use x86_64_immediate_operand here that betters document the
> limitation instead of using magic constants?
Probably so.
>
>> + {
>> + if (!sse_registers_saved)
>> + {
>> + ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
>> + sse_registers_saved = true;
>> + }
>> + else if (save_stub_call_needed)
>> + {
>> + ix86_emit_outlined_ms2sysv_save (frame);
>> + save_stub_call_needed = false;
>> + }
>> + }
>> + }
>>
>> allocate = frame.stack_pointer_offset - m->fs.sp_offset;
>>
>> @@ -13337,6 +13379,8 @@ ix86_expand_prologue (void)
>> ix86_emit_save_regs_using_mov (frame.reg_save_offset);
>> if (!sse_registers_saved)
>> ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
>> + else if (save_stub_call_needed)
>> + ix86_emit_outlined_ms2sysv_save (frame);
>>
>> /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
>> in PROLOGUE. */
>> @@ -13560,7 +13604,7 @@ ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
>> rtvec v;
>> unsigned int elems_needed, align, i, vi = 0;
>> rtx_insn *insn;
>> - rtx sym, tmp;
>> + rtx sym, addr, tmp;
>> rtx rsi = gen_rtx_REG (word_mode, SI_REG);
>> rtx r10 = NULL_RTX;
>> const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
>> @@ -13577,9 +13621,13 @@ ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
>>
>> /* Setup RSI as the stub's base pointer. */
>> align = GET_MODE_ALIGNMENT (V4SFmode);
>> - tmp = choose_baseaddr (rsi_offset, &align);
>> + addr = choose_baseaddr (rsi_offset, &align, SI_REG);
>> gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
>> - emit_insn (gen_rtx_SET (rsi, tmp));
>> +
>> + /* If choose_baseaddr returned our scratch register, then we don't need to
>> + do another SET. */
>> + if (!REG_P (addr) || REGNO (addr) != SI_REG)
>> + emit_insn (gen_rtx_SET (rsi, addr));
> Again, no need for these changes with the above implementation of
> choose_baseaddr.
>
>> /* Get a symbol for the stub. */
>> if (frame_pointer_needed)
>> diff --git a/gcc/testsuite/gcc.target/i386/pr82002-2a.c b/gcc/testsuite/gcc.target/i386/pr82002-2a.c
>> index bc85080ba8e..c31440debe2 100644
>> --- a/gcc/testsuite/gcc.target/i386/pr82002-2a.c
>> +++ b/gcc/testsuite/gcc.target/i386/pr82002-2a.c
>> @@ -1,7 +1,5 @@
>> /* { dg-do compile { target lp64 } } */
>> /* { dg-options "-Ofast -mstackrealign -mabi=ms" } */
>> -/* { dg-xfail-if "" { *-*-* } } */
>> -/* { dg-xfail-run-if "" { *-*-* } } */
>>
>> void __attribute__((sysv_abi)) a (char *);
>> void
>> diff --git a/gcc/testsuite/gcc.target/i386/pr82002-2b.c b/gcc/testsuite/gcc.target/i386/pr82002-2b.c
>> index 10e44cd7b1d..939e069517d 100644
>> --- a/gcc/testsuite/gcc.target/i386/pr82002-2b.c
>> +++ b/gcc/testsuite/gcc.target/i386/pr82002-2b.c
>> @@ -1,7 +1,5 @@
>> /* { dg-do compile { target lp64 } } */
>> /* { dg-options "-Ofast -mstackrealign -mabi=ms -mcall-ms2sysv-xlogues" } */
>> -/* { dg-xfail-if "" { *-*-* } } */
>> -/* { dg-xfail-run-if "" { *-*-* } } */
>>
>> void __attribute__((sysv_abi)) a (char *);
>> void
>> --
>> 2.14.3
>>
Thanks,
Daniel
More information about the Gcc-patches
mailing list