This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix type handling in undistribute_bitref_for_vector (PR tree-optimization/92618)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Fri, 22 Nov 2019 15:25:05 +0100
- Subject: [PATCH] Fix type handling in undistribute_bitref_for_vector (PR tree-optimization/92618)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
The undistribute_bitref_for_vector function assumes that BIT_FIELD_REFs
with vector first argument must have always type of the vector element,
but that is not the case, BIT_FIELD_REF can extract any type with the
corresponding size from any other vector.
So, without this patch, when it saw e.g. addition in unsigned long long
type, even when the vector was 2x long long, it computed the addition
in 2x long long vector (in theory could run into undefined overflows)
and fail on IL checking because conversion from long long to unsigned long
long is not useless. Even worse, for double addition with the vector 2x
long long, it would again perform the addition in 2x long long vector type,
which is completely wrong.
The following patch determines the right vector type and adds VCE around
the operands when needed.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2019-11-22 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/92618
* tree-ssa-reassoc.c (v_info): Change from auto_vec to a struct
containing the auto_vec and a tree.
(undistribute_bitref_for_vector): Handle the case when element type
of vec is not the same as type of the BIT_FIELD_REF. Formatting
fixes.
* gcc.c-torture/compile/pr92618.c: New test.
* gcc.c-torture/execute/pr92618.c: New test.
--- gcc/tree-ssa-reassoc.c.jj 2019-11-13 10:54:56.038884580 +0100
+++ gcc/tree-ssa-reassoc.c 2019-11-22 10:57:24.956931307 +0100
@@ -1775,7 +1775,10 @@ undistribute_ops_list (enum tree_code op
first: element index for each relevant BIT_FIELD_REF.
second: the index of vec ops* for each relevant BIT_FIELD_REF. */
typedef std::pair<unsigned, unsigned> v_info_elem;
-typedef auto_vec<v_info_elem, 32> v_info;
+struct v_info {
+ tree vec_type;
+ auto_vec<v_info_elem, 32> vec;
+};
typedef v_info *v_info_ptr;
/* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
@@ -1840,8 +1843,11 @@ undistribute_bitref_for_vector (enum tre
if (ops->length () <= 1)
return false;
- if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != BIT_XOR_EXPR
- && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
+ if (opcode != PLUS_EXPR
+ && opcode != MULT_EXPR
+ && opcode != BIT_XOR_EXPR
+ && opcode != BIT_IOR_EXPR
+ && opcode != BIT_AND_EXPR)
return false;
hash_map<tree, v_info_ptr> v_info_map;
@@ -1879,9 +1885,45 @@ undistribute_bitref_for_vector (enum tre
if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
continue;
+ if (VECTOR_TYPE_P (TREE_TYPE (rhs))
+ || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
+ continue;
+
+ /* The type of BIT_FIELD_REF might not be equal to the element type of
+ the vector. We want to use a vector type with element type the
+ same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
+ if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
+ {
+ machine_mode simd_mode;
+ unsigned HOST_WIDE_INT size, nunits;
+ unsigned HOST_WIDE_INT elem_size
+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
+ if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
+ continue;
+ if (size <= elem_size || (size % elem_size) != 0)
+ continue;
+ nunits = size / elem_size;
+ if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
+ nunits).exists (&simd_mode))
+ continue;
+ vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
+
+ /* Ignore it if target machine can't support this VECTOR type. */
+ if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
+ continue;
+
+ /* Check const vector type, constrain BIT_FIELD_REF offset and
+ size. */
+ if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
+ continue;
+
+ if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
+ GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
+ continue;
+ }
+
tree elem_type = TREE_TYPE (vec_type);
- unsigned HOST_WIDE_INT elem_size
- = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+ unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
if (maybe_ne (bit_field_size (rhs), elem_size))
continue;
@@ -1898,8 +1940,13 @@ undistribute_bitref_for_vector (enum tre
bool existed;
v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
if (!existed)
- info = new v_info;
- info->safe_push (std::make_pair (idx, i));
+ {
+ info = new v_info;
+ info->vec_type = vec_type;
+ }
+ else if (!types_compatible_p (vec_type, info->vec_type))
+ continue;
+ info->vec.safe_push (std::make_pair (idx, i));
}
/* At least two VECTOR to combine. */
@@ -1919,14 +1966,15 @@ undistribute_bitref_for_vector (enum tre
{
tree cand_vec = (*it).first;
v_info_ptr cand_info = (*it).second;
- unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant ();
- if (cand_info->length () != num_elems)
+ unsigned int num_elems
+ = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
+ if (cand_info->vec.length () != num_elems)
continue;
sbitmap holes = sbitmap_alloc (num_elems);
bitmap_ones (holes);
bool valid = true;
v_info_elem *curr;
- FOR_EACH_VEC_ELT (*cand_info, i, curr)
+ FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
{
if (!bitmap_bit_p (holes, curr->first))
{
@@ -1962,25 +2010,53 @@ undistribute_bitref_for_vector (enum tre
unsigned int idx, j;
gimple *sum = NULL;
- v_info_ptr info_ptr;
tree sum_vec = tvec;
+ v_info_ptr info_ptr = *(v_info_map.get (tvec));
v_info_elem *elem;
+ tree vec_type = info_ptr->vec_type;
/* Build the sum for all candidates with same mode. */
do
{
- sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
+ sum = build_and_add_sum (vec_type, sum_vec,
valid_vecs[i + 1], opcode);
+ if (!useless_type_conversion_p (vec_type,
+ TREE_TYPE (valid_vecs[i + 1])))
+ {
+ /* Update the operands only after build_and_add_sum,
+ so that we don't have to repeat the placement algorithm
+ of build_and_add_sum. */
+ gimple_stmt_iterator gsi = gsi_for_stmt (sum);
+ tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
+ valid_vecs[i + 1]);
+ tree lhs = make_ssa_name (vec_type);
+ gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
+ gimple_set_uid (g, gimple_uid (sum));
+ gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+ gimple_assign_set_rhs2 (sum, lhs);
+ if (sum_vec == tvec)
+ {
+ vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
+ lhs = make_ssa_name (vec_type);
+ g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
+ gimple_set_uid (g, gimple_uid (sum));
+ gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+ gimple_assign_set_rhs1 (sum, lhs);
+ }
+ update_stmt (sum);
+ }
sum_vec = gimple_get_lhs (sum);
info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
+ gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
/* Update those related ops of current candidate VECTOR. */
- FOR_EACH_VEC_ELT (*info_ptr, j, elem)
+ FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
{
idx = elem->second;
gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
/* Set this then op definition will get DCEd later. */
gimple_set_visited (def, true);
- if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
+ if (opcode == PLUS_EXPR
+ || opcode == BIT_XOR_EXPR
|| opcode == BIT_IOR_EXPR)
(*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
else if (opcode == MULT_EXPR)
@@ -2007,16 +2083,16 @@ undistribute_bitref_for_vector (enum tre
BIT_FIELD_REF statements accordingly. */
info_ptr = *(v_info_map.get (tvec));
gcc_assert (sum);
- tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
- FOR_EACH_VEC_ELT (*info_ptr, j, elem)
+ tree elem_type = TREE_TYPE (vec_type);
+ FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
{
idx = elem->second;
tree dst = make_ssa_name (elem_type);
- gimple *gs = gimple_build_assign (
- dst, BIT_FIELD_REF,
- build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
- bitsize_int (elem->first
- * tree_to_uhwi (TYPE_SIZE (elem_type)))));
+ tree pos = bitsize_int (elem->first
+ * tree_to_uhwi (TYPE_SIZE (elem_type)));
+ tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
+ TYPE_SIZE (elem_type), pos);
+ gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
insert_stmt_after (gs, sum);
gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
/* Set this then op definition will get DCEd later. */
--- gcc/testsuite/gcc.c-torture/compile/pr92618.c.jj 2019-11-22 11:11:52.971470532 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr92618.c 2019-11-22 11:11:39.434680213 +0100
@@ -0,0 +1,67 @@
+/* PR tree-optimization/92618 */
+
+typedef long long __m128i __attribute__((__may_alias__, __vector_size__(2 * sizeof (long long))));
+typedef long long __m256i __attribute__((__may_alias__, __vector_size__(4 * sizeof (long long))));
+typedef long long __m512i __attribute__((__may_alias__, __vector_size__(8 * sizeof (long long))));
+
+double a[32];
+unsigned long long b[32];
+__m128i bar (void);
+__m256i qux (void);
+__m512i corge (void);
+
+void
+foo (unsigned long long *x)
+{
+ __m128i c = bar ();
+ __m128i d = bar ();
+ __m256i e = qux ();
+ __m256i f = qux ();
+ __m256i g = qux ();
+ __m512i h = corge ();
+ __m512i i = corge ();
+ *(__m128i *) &b[0] = c;
+ *(__m128i *) &b[2] = d;
+ *(__m256i *) &b[4] = e;
+ *(__m256i *) &b[8] = f;
+ *(__m256i *) &b[12] = g;
+ *(__m512i *) &b[16] = h;
+ *(__m512i *) &b[24] = i;
+ *x = b[0] + b[1] + b[2] + b[3]
+ + b[4] + b[5] + b[6] + b[7]
+ + b[8] + b[9] + b[10] + b[11]
+ + b[12] + b[13] + b[14] + b[15]
+ + b[16] + b[17] + b[18] + b[19]
+ + b[20] + b[21] + b[22] + b[23]
+ + b[24] + b[25] + b[26] + b[27]
+ + b[28] + b[29] + b[30] + b[31];
+}
+
+void
+baz (double *x)
+{
+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+ __m128i c = bar ();
+ __m128i d = bar ();
+ __m256i e = qux ();
+ __m256i f = qux ();
+ __m256i g = qux ();
+ __m512i h = corge ();
+ __m512i i = corge ();
+ *(__m128i *) &a[0] = c;
+ *(__m128i *) &a[2] = d;
+ *(__m256i *) &a[4] = e;
+ *(__m256i *) &a[8] = f;
+ *(__m256i *) &a[12] = g;
+ *(__m512i *) &a[16] = h;
+ *(__m512i *) &a[24] = i;
+ *x = a[0] + a[1] + a[2] + a[3]
+ + a[4] + a[5] + a[6] + a[7]
+ + a[8] + a[9] + a[10] + a[11]
+ + a[12] + a[13] + a[14] + a[15]
+ + a[16] + a[17] + a[18] + a[19]
+ + a[20] + a[21] + a[22] + a[23]
+ + a[24] + a[25] + a[26] + a[27]
+ + a[28] + a[29] + a[30] + a[31];
+#endif
+}
--- gcc/testsuite/gcc.c-torture/execute/pr92618.c.jj 2019-11-22 11:11:10.543127733 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr92618.c 2019-11-22 11:10:56.930338594 +0100
@@ -0,0 +1,63 @@
+/* PR tree-optimization/92618 */
+
+typedef long long __m128i __attribute__((__may_alias__, __vector_size__(2 * sizeof (long long))));
+
+double a[4];
+unsigned long long b[4];
+
+__attribute__((noipa)) __m128i
+bar (void)
+{
+ static int cnt;
+ cnt += 2;
+ return (__m128i) { cnt, cnt + 1 };
+}
+
+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+typedef double __m128d __attribute__((__may_alias__, __vector_size__(2 * sizeof (double))));
+
+__attribute__((noipa)) __m128i
+qux (void)
+{
+ static double cnt;
+ cnt += 2.0;
+ return (__m128i) (__m128d) { cnt, cnt + 1.0 };
+}
+#endif
+
+void
+foo (unsigned long long *x)
+{
+ __m128i c = bar ();
+ __m128i d = bar ();
+ *(__m128i *) &b[0] = c;
+ *(__m128i *) &b[2] = d;
+ *x = b[0] + b[1] + b[2] + b[3];
+}
+
+void
+baz (double *x)
+{
+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+ __m128i c = qux ();
+ __m128i d = qux ();
+ *(__m128i *) &a[0] = c;
+ *(__m128i *) &a[2] = d;
+ *x = a[0] + a[1] + a[2] + a[3];
+#endif
+}
+
+int
+main ()
+{
+ unsigned long long c = 0;
+ foo (&c);
+ if (c != 2 + 3 + 4 + 5)
+ __builtin_abort ();
+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+ double d = 0.0;
+ baz (&d);
+ if (d != 2.0 + 3.0 + 4.0 + 5.0)
+ __builtin_abort ();
+#endif
+}
Jakub