This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix type handling in undistribute_bitref_for_vector (PR tree-optimization/92618)


On November 22, 2019 3:25:05 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>The undistribute_bitref_for_vector function assumes that BIT_FIELD_REFs
>with vector first argument must have always type of the vector element,
>but that is not the case, BIT_FIELD_REF can extract any type with the
>corresponding size from any other vector.
>
>So, without this patch, when it saw e.g. addition in unsigned long long
>type, even when the vector was 2x long long, it computed the addition
>in 2x long long vector (in theory could run into undefined overflows)
>and fail on IL checking because conversion from long long to unsigned
>long
>long is not useless.  Even worse, for double addition with the vector
>2x
>long long, it would again perform the addition in 2x long long vector
>type,
>which is completely wrong.
>
>The following patch determines the right vector type and adds VCE
>around
>the operands when needed.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok. 

Thanks, 
Richard. 

>2019-11-22  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/92618
>	* tree-ssa-reassoc.c (v_info): Change from auto_vec to a struct
>	containing the auto_vec and a tree.
>	(undistribute_bitref_for_vector): Handle the case when element type
>	of vec is not the same as type of the BIT_FIELD_REF.  Formatting
>	fixes.
>
>	* gcc.c-torture/compile/pr92618.c: New test.
>	* gcc.c-torture/execute/pr92618.c: New test.
>
>--- gcc/tree-ssa-reassoc.c.jj	2019-11-13 10:54:56.038884580 +0100
>+++ gcc/tree-ssa-reassoc.c	2019-11-22 10:57:24.956931307 +0100
>@@ -1775,7 +1775,10 @@ undistribute_ops_list (enum tree_code op
>    first: element index for each relevant BIT_FIELD_REF.
>    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
> typedef std::pair<unsigned, unsigned> v_info_elem;
>-typedef auto_vec<v_info_elem, 32> v_info;
>+struct v_info {
>+  tree vec_type;
>+  auto_vec<v_info_elem, 32> vec;
>+};
> typedef v_info *v_info_ptr;
> 
>/* Comparison function for qsort on VECTOR SSA_NAME trees by machine
>mode.  */
>@@ -1840,8 +1843,11 @@ undistribute_bitref_for_vector (enum tre
>   if (ops->length () <= 1)
>     return false;
> 
>-  if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode !=
>BIT_XOR_EXPR
>-      && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
>+  if (opcode != PLUS_EXPR
>+      && opcode != MULT_EXPR
>+      && opcode != BIT_XOR_EXPR
>+      && opcode != BIT_IOR_EXPR
>+      && opcode != BIT_AND_EXPR)
>     return false;
> 
>   hash_map<tree, v_info_ptr> v_info_map;
>@@ -1879,9 +1885,45 @@ undistribute_bitref_for_vector (enum tre
>       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
> 	continue;
> 
>+      if (VECTOR_TYPE_P (TREE_TYPE (rhs))
>+	  || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
>+	continue;
>+
>+      /* The type of BIT_FIELD_REF might not be equal to the element
>type of
>+	 the vector.  We want to use a vector type with element type the
>+	 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
>+      if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE
>(vec_type)))
>+	{
>+	  machine_mode simd_mode;
>+	  unsigned HOST_WIDE_INT size, nunits;
>+	  unsigned HOST_WIDE_INT elem_size
>+	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
>+	  if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
>+	    continue;
>+	  if (size <= elem_size || (size % elem_size) != 0)
>+	    continue;
>+	  nunits = size / elem_size;
>+	  if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
>+				nunits).exists (&simd_mode))
>+	    continue;
>+	  vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
>+
>+	  /* Ignore it if target machine can't support this VECTOR type.  */
>+	  if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
>+	    continue;
>+
>+	  /* Check const vector type, constrain BIT_FIELD_REF offset and
>+	     size.  */
>+	  if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
>+	    continue;
>+
>+	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
>+			GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
>+	    continue;
>+	}
>+
>       tree elem_type = TREE_TYPE (vec_type);
>-      unsigned HOST_WIDE_INT elem_size
>-	= TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
>+      unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE
>(elem_type));
>       if (maybe_ne (bit_field_size (rhs), elem_size))
> 	continue;
> 
>@@ -1898,8 +1940,13 @@ undistribute_bitref_for_vector (enum tre
>       bool existed;
>       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
>       if (!existed)
>-	info = new v_info;
>-      info->safe_push (std::make_pair (idx, i));
>+	{
>+	  info = new v_info;
>+	  info->vec_type = vec_type;
>+	}
>+      else if (!types_compatible_p (vec_type, info->vec_type))
>+	continue;
>+      info->vec.safe_push (std::make_pair (idx, i));
>     }
> 
>   /* At least two VECTOR to combine.  */
>@@ -1919,14 +1966,15 @@ undistribute_bitref_for_vector (enum tre
>     {
>       tree cand_vec = (*it).first;
>       v_info_ptr cand_info = (*it).second;
>-      unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant
>();
>-      if (cand_info->length () != num_elems)
>+      unsigned int num_elems
>+	= TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
>+      if (cand_info->vec.length () != num_elems)
> 	continue;
>       sbitmap holes = sbitmap_alloc (num_elems);
>       bitmap_ones (holes);
>       bool valid = true;
>       v_info_elem *curr;
>-      FOR_EACH_VEC_ELT (*cand_info, i, curr)
>+      FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
> 	{
> 	  if (!bitmap_bit_p (holes, curr->first))
> 	    {
>@@ -1962,25 +2010,53 @@ undistribute_bitref_for_vector (enum tre
> 
>       unsigned int idx, j;
>       gimple *sum = NULL;
>-      v_info_ptr info_ptr;
>       tree sum_vec = tvec;
>+      v_info_ptr info_ptr = *(v_info_map.get (tvec));
>       v_info_elem *elem;
>+      tree vec_type = info_ptr->vec_type;
> 
>       /* Build the sum for all candidates with same mode.  */
>       do
> 	{
>-	  sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
>+	  sum = build_and_add_sum (vec_type, sum_vec,
> 				   valid_vecs[i + 1], opcode);
>+	  if (!useless_type_conversion_p (vec_type,
>+					  TREE_TYPE (valid_vecs[i + 1])))
>+	    {
>+	      /* Update the operands only after build_and_add_sum,
>+		 so that we don't have to repeat the placement algorithm
>+		 of build_and_add_sum.  */
>+	      gimple_stmt_iterator gsi = gsi_for_stmt (sum);
>+	      tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
>+				 valid_vecs[i + 1]);
>+	      tree lhs = make_ssa_name (vec_type);
>+	      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
>+	      gimple_set_uid (g, gimple_uid (sum));
>+	      gsi_insert_before (&gsi, g, GSI_NEW_STMT);
>+	      gimple_assign_set_rhs2 (sum, lhs);
>+	      if (sum_vec == tvec)
>+		{
>+		  vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
>+		  lhs = make_ssa_name (vec_type);
>+		  g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
>+		  gimple_set_uid (g, gimple_uid (sum));
>+		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
>+		  gimple_assign_set_rhs1 (sum, lhs);
>+		}
>+	      update_stmt (sum);
>+	    }
> 	  sum_vec = gimple_get_lhs (sum);
> 	  info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
>+	  gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
> 	  /* Update those related ops of current candidate VECTOR.  */
>-	  FOR_EACH_VEC_ELT (*info_ptr, j, elem)
>+	  FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
> 	    {
> 	      idx = elem->second;
> 	      gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
> 	      /* Set this then op definition will get DCEd later.  */
> 	      gimple_set_visited (def, true);
>-	      if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
>+	      if (opcode == PLUS_EXPR
>+		  || opcode == BIT_XOR_EXPR
> 		  || opcode == BIT_IOR_EXPR)
> 		(*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
> 	      else if (opcode == MULT_EXPR)
>@@ -2007,16 +2083,16 @@ undistribute_bitref_for_vector (enum tre
>          BIT_FIELD_REF statements accordingly.  */
>       info_ptr = *(v_info_map.get (tvec));
>       gcc_assert (sum);
>-      tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
>-      FOR_EACH_VEC_ELT (*info_ptr, j, elem)
>+      tree elem_type = TREE_TYPE (vec_type);
>+      FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
> 	{
> 	  idx = elem->second;
> 	  tree dst = make_ssa_name (elem_type);
>-	  gimple *gs = gimple_build_assign (
>-	    dst, BIT_FIELD_REF,
>-	    build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
>-		    bitsize_int (elem->first
>-				 * tree_to_uhwi (TYPE_SIZE (elem_type)))));
>+	  tree pos = bitsize_int (elem->first
>+				  * tree_to_uhwi (TYPE_SIZE (elem_type)));
>+	  tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
>+			     TYPE_SIZE (elem_type), pos);
>+	  gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
> 	  insert_stmt_after (gs, sum);
> 	  gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
> 	  /* Set this then op definition will get DCEd later.  */
>--- gcc/testsuite/gcc.c-torture/compile/pr92618.c.jj	2019-11-22
>11:11:52.971470532 +0100
>+++ gcc/testsuite/gcc.c-torture/compile/pr92618.c	2019-11-22
>11:11:39.434680213 +0100
>@@ -0,0 +1,67 @@
>+/* PR tree-optimization/92618 */
>+
>+typedef long long __m128i __attribute__((__may_alias__,
>__vector_size__(2 * sizeof (long long))));
>+typedef long long __m256i __attribute__((__may_alias__,
>__vector_size__(4 * sizeof (long long))));
>+typedef long long __m512i __attribute__((__may_alias__,
>__vector_size__(8 * sizeof (long long))));
>+
>+double a[32];
>+unsigned long long b[32];
>+__m128i bar (void);
>+__m256i qux (void);
>+__m512i corge (void);
>+
>+void
>+foo (unsigned long long *x)
>+{
>+  __m128i c = bar ();
>+  __m128i d = bar ();
>+  __m256i e = qux ();
>+  __m256i f = qux ();
>+  __m256i g = qux ();
>+  __m512i h = corge ();
>+  __m512i i = corge ();
>+  *(__m128i *) &b[0] = c;
>+  *(__m128i *) &b[2] = d;
>+  *(__m256i *) &b[4] = e;
>+  *(__m256i *) &b[8] = f;
>+  *(__m256i *) &b[12] = g;
>+  *(__m512i *) &b[16] = h;
>+  *(__m512i *) &b[24] = i;
>+  *x = b[0] + b[1] + b[2] + b[3]
>+     + b[4] + b[5] + b[6] + b[7]
>+     + b[8] + b[9] + b[10] + b[11]
>+     + b[12] + b[13] + b[14] + b[15]
>+     + b[16] + b[17] + b[18] + b[19]
>+     + b[20] + b[21] + b[22] + b[23]
>+     + b[24] + b[25] + b[26] + b[27]
>+     + b[28] + b[29] + b[30] + b[31];
>+}
>+
>+void
>+baz (double *x)
>+{
>+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
>+  __m128i c = bar ();
>+  __m128i d = bar ();
>+  __m256i e = qux ();
>+  __m256i f = qux ();
>+  __m256i g = qux ();
>+  __m512i h = corge ();
>+  __m512i i = corge ();
>+  *(__m128i *) &a[0] = c;
>+  *(__m128i *) &a[2] = d;
>+  *(__m256i *) &a[4] = e;
>+  *(__m256i *) &a[8] = f;
>+  *(__m256i *) &a[12] = g;
>+  *(__m512i *) &a[16] = h;
>+  *(__m512i *) &a[24] = i;
>+  *x = a[0] + a[1] + a[2] + a[3]
>+     + a[4] + a[5] + a[6] + a[7]
>+     + a[8] + a[9] + a[10] + a[11]
>+     + a[12] + a[13] + a[14] + a[15]
>+     + a[16] + a[17] + a[18] + a[19]
>+     + a[20] + a[21] + a[22] + a[23]
>+     + a[24] + a[25] + a[26] + a[27]
>+     + a[28] + a[29] + a[30] + a[31];
>+#endif
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr92618.c.jj	2019-11-22
>11:11:10.543127733 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr92618.c	2019-11-22
>11:10:56.930338594 +0100
>@@ -0,0 +1,63 @@
>+/* PR tree-optimization/92618 */
>+
>+typedef long long __m128i __attribute__((__may_alias__,
>__vector_size__(2 * sizeof (long long))));
>+
>+double a[4];
>+unsigned long long b[4];
>+
>+__attribute__((noipa)) __m128i
>+bar (void)
>+{
>+  static int cnt;
>+  cnt += 2;
>+  return (__m128i) { cnt, cnt + 1 };
>+}
>+
>+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
>+typedef double __m128d __attribute__((__may_alias__, __vector_size__(2
>* sizeof (double))));
>+
>+__attribute__((noipa)) __m128i
>+qux (void)
>+{
>+  static double cnt;
>+  cnt += 2.0;
>+  return (__m128i) (__m128d) { cnt, cnt + 1.0 };
>+}
>+#endif
>+
>+void
>+foo (unsigned long long *x)
>+{
>+  __m128i c = bar ();
>+  __m128i d = bar ();
>+  *(__m128i *) &b[0] = c;
>+  *(__m128i *) &b[2] = d;
>+  *x = b[0] + b[1] + b[2] + b[3];
>+}
>+
>+void
>+baz (double *x)
>+{
>+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
>+  __m128i c = qux ();
>+  __m128i d = qux ();
>+  *(__m128i *) &a[0] = c;
>+  *(__m128i *) &a[2] = d;
>+  *x = a[0] + a[1] + a[2] + a[3];
>+#endif
>+}
>+
>+int
>+main ()
>+{
>+  unsigned long long c = 0;
>+  foo (&c);
>+  if (c != 2 + 3 + 4 + 5)
>+    __builtin_abort ();
>+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
>+  double d = 0.0;
>+  baz (&d);
>+  if (d != 2.0 + 3.0 + 4.0 + 5.0)
>+    __builtin_abort ();
>+#endif
>+}
>
>	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]