This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Fix 70083, lra-induced crash
- From: Bernd Schmidt <bschmidt at redhat dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, Vladimir Makarov <vmakarov at redhat dot com>
- Date: Thu, 10 Mar 2016 09:26:20 +0100
- Subject: Fix 70083, lra-induced crash
- Authentication-results: sourceware.org; auth=none
- References: <56E12ADA dot 5060400 at t-online dot de>
This crash happens because LRA tries to save an AVX hard reg in a large
mode, and it only appears in the function in smaller modes. Stack
alignment isn't set up to support the larger mode.
Currently, biggest_mode for hard registers is set up from regno_reg_rtx,
set up to a large mode for argument regs. That mode is not necessarily
seen in the function itself and may be too large. If that initialization
is changed to use VOIDmode, we compute the correct value during
lra_push_insns, but then subsequently we clear it to VOIDmode again, and
it never seems to get updated. Hence, the patch has several parts:
initialize hard reg biggest_mode with VOIDmode, ensure it gets updated
during process_bb_lives, and use the value in split_reg.
Bootstrapped and tested on x86_64-linux, ok?
Bernd
PR target/70083
* lra-lives.c (process_bb_lives): Also update biggest mode for hard
regs.
(lra_create_live_ranges_1): initialize hard register biggest_mode to
VOIDmode.
* lra-constraints.c (split_reg): For hard regs, try to find the
biggest single-register mode used in the function.
testsuite/
PR target/70083
* gcc.dg/torture/pr70083.c: New test.
* gcc.target/i386/pr70083.c: New test.
Index: gcc/lra-lives.c
===================================================================
--- gcc/lra-lives.c (revision 234025)
+++ gcc/lra-lives.c (working copy)
@@ -700,12 +700,13 @@ process_bb_lives (basic_block bb, int &c
/* Update max ref width and hard reg usage. */
for (reg = curr_id->regs; reg != NULL; reg = reg->next)
- if (reg->regno >= FIRST_PSEUDO_REGISTER
- && (GET_MODE_SIZE (reg->biggest_mode)
- > GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode)))
- lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode;
- else if (reg->regno < FIRST_PSEUDO_REGISTER)
- lra_hard_reg_usage[reg->regno] += freq;
+ {
+ if (GET_MODE_SIZE (reg->biggest_mode)
+ > GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode))
+ lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode;
+ if (reg->regno < FIRST_PSEUDO_REGISTER)
+ lra_hard_reg_usage[reg->regno] += freq;
+ }
call_p = CALL_P (curr_insn);
src_regno = (set != NULL_RTX && REG_P (SET_SRC (set))
@@ -1208,7 +1209,7 @@ lra_create_live_ranges_1 (bool all_p, bo
conservative because of recent transformation. Here in this
file we recalculate it again as it costs practically
nothing. */
- if (regno_reg_rtx[i] != NULL_RTX)
+ if (i >= FIRST_PSEUDO_REGISTER && regno_reg_rtx[i] != NULL_RTX)
lra_reg_info[i].biggest_mode = GET_MODE (regno_reg_rtx[i]);
else
lra_reg_info[i].biggest_mode = VOIDmode;
Index: gcc/lra-constraints.c
===================================================================
--- gcc/lra-constraints.c (revision 234025)
+++ gcc/lra-constraints.c (working copy)
@@ -4972,6 +4972,7 @@ split_reg (bool before_p, int original_r
rtx_insn *restore, *save;
bool after_p;
bool call_save_p;
+ machine_mode mode;
if (original_regno < FIRST_PSEUDO_REGISTER)
{
@@ -4979,24 +4980,32 @@ split_reg (bool before_p, int original_r
hard_regno = original_regno;
call_save_p = false;
nregs = 1;
+ mode = lra_reg_info[hard_regno].biggest_mode;
+ machine_mode reg_rtx_mode = GET_MODE (regno_reg_rtx[hard_regno]);
+ if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (reg_rtx_mode))
+ {
+ original_reg = regno_reg_rtx[hard_regno];
+ mode = reg_rtx_mode;
+ }
+ else
+ original_reg = gen_rtx_REG (mode, hard_regno);
}
else
{
+ mode = PSEUDO_REGNO_MODE (original_regno);
hard_regno = reg_renumber[original_regno];
- nregs = hard_regno_nregs[hard_regno][PSEUDO_REGNO_MODE (original_regno)];
+ nregs = hard_regno_nregs[hard_regno][mode];
rclass = lra_get_allocno_class (original_regno);
original_reg = regno_reg_rtx[original_regno];
call_save_p = need_for_call_save_p (original_regno);
}
- original_reg = regno_reg_rtx[original_regno];
lra_assert (hard_regno >= 0);
if (lra_dump_file != NULL)
fprintf (lra_dump_file,
" ((((((((((((((((((((((((((((((((((((((((((((((((\n");
+
if (call_save_p)
{
- machine_mode mode = GET_MODE (original_reg);
-
mode = HARD_REGNO_CALLER_SAVE_MODE (hard_regno,
hard_regno_nregs[hard_regno][mode],
mode);
@@ -5004,8 +5013,7 @@ split_reg (bool before_p, int original_r
}
else
{
- rclass = choose_split_class (rclass, hard_regno,
- GET_MODE (original_reg));
+ rclass = choose_split_class (rclass, hard_regno, mode);
if (rclass == NO_REGS)
{
if (lra_dump_file != NULL)
@@ -5023,8 +5031,7 @@ split_reg (bool before_p, int original_r
}
return false;
}
- new_reg = lra_create_new_reg (GET_MODE (original_reg), original_reg,
- rclass, "split");
+ new_reg = lra_create_new_reg (mode, original_reg, rclass, "split");
reg_renumber[REGNO (new_reg)] = hard_regno;
}
save = emit_spill_move (true, new_reg, original_reg);
Index: gcc/testsuite/gcc.dg/torture/pr70083.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr70083.c (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr70083.c (working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Wno-psabi" } */
+
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+
+int
+foo(int u32_0, int u64_0, int u64_1, v16hi v32u16_0, v8si v32u32_0, v4di v32u64_0, v16hi v32u16_1, v8si v32u32_1, v4di v32u64_1)
+{
+ v32u32_1 %= (v8si) v32u16_1 | 1;
+ v32u64_1[1] |= ((1));
+ v32u16_0 /= (v16hi){~u64_1, 1, 0xb56c, 0xd279, 0x26b6, 0x74d9, 0xf764, 0, 0, -v32u16_1[6]} | 1;
+ v32u16_1 ^= (v16hi){0xc98d, 1, 0x8c71, u32_0, 0x5366, 0, ~v32u64_1[1]} & 31;
+ v32u32_0 -= (v8si)~v32u64_1;
+ v32u32_1[2] |= 0x1f;
+ v32u16_0 %= (v16hi){2, 0xffff, u32_0, 1, v32u64_0[1], u32_0 };
+ v32u32_1 /= (v8si){0x1e7390, v32u16_0[12], ~v32u16_1[2], -u64_0};
+ return v32u16_0[4] + v32u16_0[5] + v32u32_0[5] + v32u32_1[6] + v32u64_1[3];
+}
Index: gcc/testsuite/gcc.target/i386/pr70083.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr70083.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/pr70083.c (working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Wno-psabi -O2 -fno-dce -fschedule-insns -fno-sched-critical-path-heuristic -mavx512dq --param=max-cse-insns=1" } */
+
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+
+int
+foo(int u32_0, int u64_0, int u64_1, v16hi v32u16_0, v8si v32u32_0, v4di v32u64_0, v16hi v32u16_1, v8si v32u32_1, v4di v32u64_1)
+{
+ v32u32_1 %= (v8si) v32u16_1 | 1;
+ v32u64_1[1] |= ((1));
+ v32u16_0 /= (v16hi){~u64_1, 1, 0xb56c, 0xd279, 0x26b6, 0x74d9, 0xf764, 0, 0, -v32u16_1[6]} | 1;
+ v32u16_1 ^= (v16hi){0xc98d, 1, 0x8c71, u32_0, 0x5366, 0, ~v32u64_1[1]} & 31;
+ v32u32_0 -= (v8si)~v32u64_1;
+ v32u32_1[2] |= 0x1f;
+ v32u16_0 %= (v16hi){2, 0xffff, u32_0, 1, v32u64_0[1], u32_0 };
+ v32u32_1 /= (v8si){0x1e7390, v32u16_0[12], ~v32u16_1[2], -u64_0};
+ return v32u16_0[4] + v32u16_0[5] + v32u32_0[5] + v32u32_1[6] + v32u64_1[3];
+}