[PATCH] tree-optimization/97081 - fix wrong-code with vectorized shift
Richard Biener
rguenther@suse.de
Fri Sep 18 11:39:16 GMT 2020
This corrects the mask for creation of x << s | x >> (-x & mask)
from a rotate x <<r s to use the precision of x.
Bootstrap / regtest running on x86_64-unknown-linux-gnu.
2020-09-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/97081
* tree-vect-patterns.c (vect_recog_rotate_pattern): Use the
precision of the shifted operand to determine the mask.
* gcc.dg/vect/pr97081.c: New testcase.
---
gcc/testsuite/gcc.dg/vect/pr97081.c | 26 ++++++++++++++++++++++++++
gcc/tree-vect-patterns.c | 3 +--
2 files changed, 27 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr97081.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr97081.c b/gcc/testsuite/gcc.dg/vect/pr97081.c
new file mode 100644
index 00000000000..bc83c88c019
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr97081.c
@@ -0,0 +1,26 @@
+#include "tree-vect.h"
+
+#define EXEC_ROR2(a, b, sz) (a >> b) | (a << (64 - b))
+
+#define TYPE __UINT64_TYPE__
+
+void __attribute__((noipa))
+exec_VRORudi_i(TYPE *__restrict__ pvd,
+ TYPE *__restrict__ const pva, unsigned char IMM)
+{
+ unsigned char I2 = IMM & 63;
+
+ for (unsigned i = 0; i < 4; i++)
+ pvd[i] = EXEC_ROR2(pva[i], I2, 8);
+}
+
+int main()
+{
+ check_vect ();
+
+ TYPE pvd[4], pva[4] = { 0x0102030405060708, 0x0102030405060708, 0x0102030405060708, 0x0102030405060708 };
+ exec_VRORudi_i (pvd, pva, 7);
+ if (pvd[0] != 0x10020406080a0c0e)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 478a45a2281..db45740da3c 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -2456,7 +2456,6 @@ vect_recog_rotate_pattern (vec_info *vinfo,
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
}
stype = TREE_TYPE (def);
- scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
if (TREE_CODE (def) == INTEGER_CST)
{
@@ -2485,7 +2484,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
def2 = vect_recog_temp_ssa_var (stype, NULL);
- tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
+ tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
gimple_assign_lhs (def_stmt), mask);
if (ext_def)
--
2.26.2
More information about the Gcc-patches
mailing list