commit e0d3041c9caece8b48be016fa515747eb2746d35 Author: Vladimir Makarov Date: Thu Feb 18 22:40:54 2021 +0000 [PR96264] LRA: Check output insn hard regs when updating available rematerialization after the insn Insn for rematerialization can contain a clobbered hard register. We can not move such insn through another insn setting up the same hard register. The patch adds such check. gcc/ChangeLog: PR rtl-optimization/96264 * lra-remat.c (reg_overlap_for_remat_p): Check also output insn hard regs. gcc/testsuite/ChangeLog: PR rtl-optimization/96264 * gcc.target/powerpc/pr96264.c: New. diff --git a/gcc/lra-remat.c b/gcc/lra-remat.c index 8bd9ffa..d983731 100644 --- a/gcc/lra-remat.c +++ b/gcc/lra-remat.c @@ -651,7 +651,11 @@ calculate_local_reg_remat_bb_data (void) -/* Return true if REG overlaps an input operand of INSN. */ +/* Return true if REG overlaps an input operand or non-input hard register of + INSN. Basically the function returns false if we can move rematerialization + candidate INSN through another insn with output REG or dead input REG (we + consider it to avoid extending reg live range) with possible output pseudo + renaming in INSN. */ static bool reg_overlap_for_remat_p (lra_insn_reg *reg, rtx_insn *insn) { @@ -675,10 +679,11 @@ reg_overlap_for_remat_p (lra_insn_reg *reg, rtx_insn *insn) reg2 != NULL; reg2 = reg2->next) { - if (reg2->type != OP_IN) - continue; - unsigned regno2 = reg2->regno; int nregs2; + unsigned regno2 = reg2->regno; + + if (reg2->type != OP_IN && regno2 >= FIRST_PSEUDO_REGISTER) + continue; if (regno2 >= FIRST_PSEUDO_REGISTER && reg_renumber[regno2] >= 0) regno2 = reg_renumber[regno2]; diff --git a/gcc/testsuite/gcc.target/powerpc/pr96264.c b/gcc/testsuite/gcc.target/powerpc/pr96264.c new file mode 100644 index 0000000..e89979b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr96264.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-Os -fno-forward-propagate -fschedule-insns -fno-tree-ter -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +typedef unsigned char __attribute__ ((__vector_size__ (64))) v512u8; +typedef unsigned short u16; +typedef unsigned short __attribute__ ((__vector_size__ (64))) v512u16; +typedef unsigned __int128 __attribute__ ((__vector_size__ (64))) v512u128; + +v512u16 d; +v512u128 f; + +v512u8 +foo (u16 e) +{ + v512u128 g = f - -e; + d = (5 / (d + 1)) < e; + return (v512u8) g; +} + +int +main (void) +{ + v512u8 x = foo (2); + for (unsigned i = 0; i < sizeof (x); i++) + if (x[i] != (i % 16 ? 0 : 2)) + __builtin_abort (); +}