[PATCH][middle-end][i386][Version 4] Add -fzero-call-used-regs=[skip|used-gpr-arg|used-arg|all-arg|used-gpr|all-gpr|used|all]
Qing Zhao
QING.ZHAO@ORACLE.COM
Mon Oct 26 23:06:39 GMT 2020
Hi, Uros,
Could you please check the change compared to the previous version for i386.c as following:
Let me know any issue there.
Thanks a lot.
Qing
---
gcc/config/i386/i386.c | 136 ++++++++++++++++++---
.../gcc.target/i386/zero-scratch-regs-28.c | 17 +++
.../gcc.target/i386/zero-scratch-regs-29.c | 11 ++
.../gcc.target/i386/zero-scratch-regs-30.c | 11 ++
4 files changed, 155 insertions(+), 20 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/zero-scratch-regs-28.c
create mode 100644 gcc/testsuite/gcc.target/i386/zero-scratch-regs-29.c
create mode 100644 gcc/testsuite/gcc.target/i386/zero-scratch-regs-30.c
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e66dcf0d587..65f778112d9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3554,17 +3554,17 @@ ix86_function_value_regno_p (const unsigned int regno)
/* Check whether the register REGNO should be zeroed on X86.
When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
together, no need to zero it again.
- Stack registers (st0-st7) and mm0-mm7 are aliased with each other.
- very hard to be zeroed individually, don't zero individual st or
- mm registgers. */
+ When NEED_ZERO_MMX is true, MMX registers should be cleared. */
static bool
zero_call_used_regno_p (const unsigned int regno,
- bool all_sse_zeroed)
+ bool all_sse_zeroed,
+ bool need_zero_mmx)
{
return GENERAL_REGNO_P (regno)
|| (!all_sse_zeroed && SSE_REGNO_P (regno))
- || MASK_REGNO_P (regno);
+ || MASK_REGNO_P (regno)
+ || (need_zero_mmx && MMX_REGNO_P (regno));
}
/* Return the machine_mode that is used to zero register REGNO. */
@@ -3579,8 +3579,12 @@ zero_call_used_regno_mode (const unsigned int regno)
return SImode;
else if (SSE_REGNO_P (regno))
return V4SFmode;
- else
+ else if (MASK_REGNO_P (regno))
return HImode;
+ else if (MMX_REGNO_P (regno))
+ return DImode;
+ else
+ gcc_unreachable ();
}
/* Generate a rtx to zero all vector registers together if possible,
@@ -3603,7 +3607,7 @@ zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
return gen_avx_vzeroall ();
}
-/* Generate insns to zero all st/mm registers together.
+/* Generate insns to zero all st registers together.
Return true when zeroing instructions are generated.
Assume the number of st registers that are zeroed is num_of_st,
we will emit the following sequence to zero them together:
@@ -3616,23 +3620,50 @@ zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
...
fstp %%st(0);
i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
- mark stack slots empty. */
+ mark stack slots empty.
+
+ How to compute the num_of_st?
+ There is no direct mapping from stack registers to hard register
+ numbers. If one stack register need to be cleared, we don't know
+ where in the stack the value remains. So, if any stack register
+ need to be cleared, the whole stack should be cleared. However,
+ x87 stack registers that hold the return value should be excluded.
+ x87 returns in the top (two for complex values) register, so
+ num_of_st should be 7/6 when x87 returns, otherwise it will be 8. */
+
static bool
-zero_all_st_mm_registers (HARD_REG_SET need_zeroed_hardregs)
+zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
{
unsigned int num_of_st = 0;
for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (STACK_REGNO_P (regno)
- && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno)
- /* When the corresponding mm register also need to be cleared too. */
- && TEST_HARD_REG_BIT (need_zeroed_hardregs,
- (regno - FIRST_STACK_REG + FIRST_MMX_REG)))
- num_of_st++;
+ if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
+ && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
+ {
+ num_of_st++;
+ break;
+ }
if (num_of_st == 0)
return false;
+ bool return_with_x87 = false;
+ return_with_x87 = (crtl->return_rtx
+ && (GET_CODE (crtl->return_rtx) == REG)
+ && (STACK_REG_P (crtl->return_rtx)));
+
+ bool complex_return = false;
+ complex_return = (crtl->return_rtx
+ && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
+
+ if (return_with_x87)
+ if (complex_return)
+ num_of_st = 6;
+ else
+ num_of_st = 7;
+ else
+ num_of_st = 8;
+
rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
for (unsigned int i = 0; i < num_of_st; i++)
emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
@@ -3646,6 +3677,43 @@ zero_all_st_mm_registers (HARD_REG_SET need_zeroed_hardregs)
return true;
}
+
+/* When the routine exit with MMX mode, if there is any ST registers
+ need to be zeroed, we should clear all MMX registers except the
+ one that holds the return value RET_MMX_REGNO. */
+static bool
+zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
+ unsigned int ret_mmx_regno)
+{
+ bool need_zero_all_mm = false;
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (STACK_REGNO_P (regno)
+ && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
+ {
+ need_zero_all_mm = true;
+ break;
+ }
+
+ if (!need_zero_all_mm)
+ return false;
+
+ rtx zero_mmx = NULL_RTX;
+ machine_mode mode = DImode;
+ for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
+ if (regno != ret_mmx_regno)
+ {
+ rtx reg = gen_rtx_REG (mode, regno);
+ if (zero_mmx == NULL_RTX)
+ {
+ zero_mmx = reg;
+ emit_insn (gen_rtx_SET (reg, const0_rtx));
+ }
+ else
+ emit_move_insn (reg, zero_mmx);
+ }
+ return true;
+}
+
/* TARGET_ZERO_CALL_USED_REGS. */
/* Generate a sequence of instructions that zero registers specified by
NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
@@ -3655,7 +3723,8 @@ ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
{
HARD_REG_SET zeroed_hardregs;
bool all_sse_zeroed = false;
- bool st_zeroed = false;
+ bool all_st_zeroed = false;
+ bool all_mm_zeroed = false;
/* first, let's see whether we can zero all vector registers together. */
rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
@@ -3665,24 +3734,42 @@ ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
all_sse_zeroed = true;
}
- /* then, let's see whether we can zero all st+mm registers togeter. */
- st_zeroed = zero_all_st_mm_registers (need_zeroed_hardregs);
+ /* Then, decide which mode (MMX mode or x87 mode) the function exit with.
+ In order to decide whether we need to clear the MMX registers or the
+ stack registers. */
+
+ bool exit_with_mmx_mode = (crtl->return_rtx
+ && (GET_CODE (crtl->return_rtx) == REG)
+ && (MMX_REG_P (crtl->return_rtx)));
+
+ /* then, let's see whether we can zero all st registers together. */
+ if (!exit_with_mmx_mode)
+ all_st_zeroed = zero_all_st_registers (need_zeroed_hardregs);
+ /* Or should we zero all MMX registers. */
+ else
+ {
+ unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
+ all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
+ exit_mmx_regno);
+ }
/* Now, generate instructions to zero all the registers. */
CLEAR_HARD_REG_SET (zeroed_hardregs);
- if (st_zeroed)
+ if (all_st_zeroed)
SET_HARD_REG_BIT (zeroed_hardregs, FIRST_STACK_REG);
rtx zero_gpr = NULL_RTX;
rtx zero_vector = NULL_RTX;
rtx zero_mask = NULL_RTX;
+ rtx zero_mmx = NULL_RTX;
for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
{
if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
continue;
- if (!zero_call_used_regno_p (regno, all_sse_zeroed))
+ if (!zero_call_used_regno_p (regno, all_sse_zeroed,
+ exit_with_mmx_mode && !all_mm_zeroed))
continue;
SET_HARD_REG_BIT (zeroed_hardregs, regno);
@@ -3728,6 +3815,15 @@ ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
}
else
emit_move_insn (reg, zero_mask);
+ else if (mode == DImode)
+ if (zero_mmx == NULL_RTX)
+ {
+ zero_mmx = reg;
+ tmp = gen_rtx_SET (reg, const0_rtx);
+ emit_insn (tmp);
+ }
+ else
+ emit_move_insn (reg, zero_mmx);
else
gcc_unreachable ();
}
diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-28.c b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-28.c
new file mode 100644
index 00000000000..61c0bb7a35c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-28.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -m32 -mmmx -fzero-call-used-regs=all" } */
+
+typedef int __v2si __attribute__ ((vector_size (8)));
+
+__v2si ret_mmx (void)
+{
+ return (__v2si) { 123, 345 };
+}
+
+/* { dg-final { scan-assembler "pxor\[ \t\]*%mm1, %mm1" } } */
+/* { dg-final { scan-assembler "movq\[ \t\]*%mm1, %mm2" } } */
+/* { dg-final { scan-assembler "movq\[ \t\]*%mm1, %mm3" } } */
+/* { dg-final { scan-assembler "movq\[ \t\]*%mm1, %mm4" } } */
+/* { dg-final { scan-assembler "movq\[ \t\]*%mm1, %mm5" } } */
+/* { dg-final { scan-assembler "movq\[ \t\]*%mm1, %mm6" } } */
+/* { dg-final { scan-assembler "movq\[ \t\]*%mm1, %mm7" } } */
diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-29.c b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-29.c
new file mode 100644
index 00000000000..db636654e70
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-29.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -m32 -mmmx -fzero-call-used-regs=all" } */
+typedef int __v2si __attribute__ ((vector_size (8)));
+
+long double ret_x87 (void)
+{
+ return 1.1L;
+}
+
+/* { dg-final { scan-assembler-times "fldz" 7 } } */
+/* { dg-final { scan-assembler-times "fstp" 7 } } */
diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-30.c b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-30.c
new file mode 100644
index 00000000000..7c20b569bfa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-30.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fzero-call-used-regs=all" } */
+typedef int __v2si __attribute__ ((vector_size (8)));
+
+_Complex long double ret_x87_cplx (void)
+{
+ return 1.1L + 1.2iL;
+}
+
+/* { dg-final { scan-assembler-times "fldz" 6 } } */
+/* { dg-final { scan-assembler-times "fstp" 6 } } */
--
2.11.0
> On Oct 26, 2020, at 4:23 PM, Qing Zhao via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>
>
>
>> On Oct 26, 2020, at 3:33 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>
>> On Mon, Oct 26, 2020 at 9:05 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>>>
>>> On Mon, Oct 26, 2020 at 8:10 PM Qing Zhao <QING.ZHAO@oracle.com> wrote:
>>>>
>>>>
>>>>
>>>>> On Oct 26, 2020, at 1:42 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>>
>>>>> On Mon, Oct 26, 2020 at 6:30 PM Qing Zhao <QING.ZHAO@oracle.com> wrote:
>>>>>>
>>>>>>
>>>>>> The following is the current change in i386.c, could you check whether the logic is good?
>>>>>
>>>>> x87 handling looks good to me.
>>>>>
>>>>> One remaining question: If the function uses MMX regs (either
>>>>> internally or as an argument register), but exits in x87 mode, does
>>>>> your logic clear the x87 stack?
>>>>
>>>> Yes but not completely yes.
>>>>
>>>> FIRST, As following:
>>>>
>>>> /* Then, decide which mode (MMX mode or x87 mode) the function exit with.
>>>> In order to decide whether we need to clear the MMX registers or the
>>>> stack registers. */
>>>> bool exit_with_mmx_mode = false;
>>>>
>>>> exit_with_mmx_mode = ((GET_CODE (crtl->return_rtx) == REG)
>>>> && (MMX_REG_P (crtl->return_rtx)));
>>>>
>>>> /* then, let's see whether we can zero all st registers togeter. */
>>>> if (!exit_with_mmx_mode)
>>>> st_zeroed = zero_all_st_registers (need_zeroed_hardregs);
>>>>
>>>>
>>>> We first check whether this routine exit with mmx mode, if Not then it’s X87 mode
>>>> (at exit, “EMMS” should already been called per ABI), then
>>>> The st/mm registers will be cleared as x87 stack registers.
>>>>
>>>> However, within the routine “zero_all_st_registers”:
>>>>
>>>> static bool
>>>> zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
>>>> {
>>>> unsigned int num_of_st = 0;
>>>> for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>>>> if (STACK_REGNO_P (regno)
>>>> && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>>>> {
>>>> num_of_st++;
>>>> break;
>>>> }
>>>>
>>>> if (num_of_st == 0)
>>>> return false;
>>>>
>>>>
>>>> In the above, I currently only check whether any “Stack” registers need to be zeroed or not.
>>>> But looks like we should also check any “MMX” register need to be zeroed or not too. If there is any
>>>> “MMX” register need to be zeroed, we still need to clear the whole X87 stack?
>>>
>>> I think so, but I have to check the details.
>>
>> Please compile the following testcase with "-m32 -mmmx":
>>
>> --cut here--
>> #include <stdio.h>
>>
>> typedef int __v2si __attribute__ ((vector_size (8)));
>>
>> __v2si zzz;
>>
>> void
>> __attribute__ ((noinline))
>> mmx (__v2si a, __v2si b, __v2si c)
>> {
>> __v2si res;
>>
>> res = __builtin_ia32_paddd (a, b);
>> zzz = __builtin_ia32_paddd (res, c);
>>
>> __builtin_ia32_emms ();
>> }
>>
>>
>> int main ()
>> {
>> __v2si a = { 123, 345 };
>> __v2si b = { 234, 456 };
>> __v2si c = { 345, 567 };
>>
>> mmx (a, b, c);
>>
>> printf ("%i, %i\n", zzz[0], zzz[1]);
>>
>> return 0;
>> }
>> --cut here--
>>
>> at the end of mmx() function:
>>
>> 0x080491ed in mmx ()
>> (gdb) disass
>> Dump of assembler code for function mmx:
>> 0x080491e0 <+0>: paddd %mm1,%mm0
>> 0x080491e3 <+3>: paddd %mm2,%mm0
>> 0x080491e6 <+6>: movq %mm0,0x804c020
>> => 0x080491ed <+13>: emms
>> 0x080491ef <+15>: ret
>> End of assembler dump.
>> (gdb) i r flo
>> st0 <invalid float value> (raw 0xffff00000558000002be)
>> st1 <invalid float value> (raw 0xffff000001c8000000ea)
>> st2 <invalid float value> (raw 0xffff0000023700000159)
>> st3 0 (raw 0x00000000000000000000)
>> st4 0 (raw 0x00000000000000000000)
>> st5 0 (raw 0x00000000000000000000)
>> st6 0 (raw 0x00000000000000000000)
>> st7 0 (raw 0x00000000000000000000)
>> fctrl 0x37f 895
>> fstat 0x0 0
>> ftag 0x556a 21866
>> fiseg 0x0 0
>> fioff 0x0 0
>> foseg 0x0 0
>> fooff 0x0 0
>> fop 0x0 0
>>
>> There are still values in the MMX registers. However, we are in x87
>> mode, so the whole stack has to be cleared.
>
> Yes. And I just tried, my current implementation behaved correctly.
>>
>> Now, what to do if the function uses x87 registers and exits in MMX
>> mode? I guess we have to clear all MMX registers (modulo return value
>> reg).
>
> Need to add this part.
>
> thanks.
> Qing
>>
>> Uros.
More information about the Gcc-patches
mailing list