Use unsigned arithmetic for demoted vector plus/minus/mult (PR 88064)

Wed Dec 5 08:38:00 GMT 2018

As Jakub pointed out, if we narrow a plus, minus or mult operation based
on the number of bits that consumers need, we have to convert a signed
operation to an unsigned one in order to avoid new undefined behaviour.
This patch does that and generalises vect_convert_input and
vect_recog_over_widening_pattern to cope with the extra casts.
(The changes to both functions are covered by existing tests.)

Tested on x86_64-linux-gnu, aarch64-linux-gnu and aarch64_be-elf.
OK to install?

Richard


2018-12-05  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/88064
	* tree-vect-patterns.c (vect_convert_input): Convert the result of
	an existing cast if it has the right width but the wrong sign.
	Do not test the signedness of the required result when
	considering whether to split an existing cast; instead split to
	a type with the same signedness as the source of the cast, then
	convert it to the opposite signedness where necessary.
	(vect_recog_over_widening_pattern): Handle sign changes between
	the final PLUS_EXPR and the RSHIFT_EXPR.
	(vect_recog_average_pattern): Use an unsigned operation when
	truncating an addition, subtraction or multiplication.  Cast the
	result back to the "real" signedness before promoting.

gcc/testsuite/
	PR tree-optimization/88064
	* gcc.dg/vect/vect-over-widen-23.c: New test.

Index: gcc/tree-vect-patterns.c
===================================================================

--- gcc/tree-vect-patterns.c	2018-12-05 08:33:40.658922796 +0000
+++ gcc/tree-vect-patterns.c	2018-12-05 08:33:55.130797055 +0000
@@ -720,34 +720,60 @@ vect_convert_input (stmt_vec_info stmt_i
   if (TREE_CODE (unprom->op) == INTEGER_CST)
     return wide_int_to_tree (type, wi::to_widest (unprom->op));
 
-  /* See if we can reuse an existing result.  */
+  tree input = unprom->op;
   if (unprom->caster)
     {
       tree lhs = gimple_get_lhs (unprom->caster->stmt);
-      if (types_compatible_p (TREE_TYPE (lhs), type))
-	return lhs;
+      tree lhs_type = TREE_TYPE (lhs);
+
+      /* If the result of the existing cast is the right width, use it
+	 instead of the source of the cast.  */
+      if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
+	input = lhs;
+      /* If the precision we want is between the source and result
+	 precisions of the existing cast, try splitting the cast into
+	 two and tapping into a mid-way point.  */
+      else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
+	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
+	{
+	  /* In order to preserve the semantics of the original cast,
+	     give the mid-way point the same signedness as the input value.
+
+	     It would be possible to use a signed type here instead if
+	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
+	     make the sign of the midtype sensitive to the order in
+	     which we process the statements, since the signedness of
+	     TYPE is the signedness required by just one of possibly
+	     many users.  Also, unsigned promotions are usually as cheap
+	     as or cheaper than signed ones, so it's better to keep an
+	     unsigned promotion.  */
+	  tree midtype = build_nonstandard_integer_type
+	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
+	  tree vec_midtype = get_vectype_for_scalar_type (midtype);
+	  if (vec_midtype)
+	    {
+	      input = vect_recog_temp_ssa_var (midtype, NULL);
+	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
+						       unprom->op);
+	      if (!vect_split_statement (unprom->caster, input, new_stmt,
+					 vec_midtype))
+		append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
+	    }
+	}
+
+      /* See if we can reuse an existing result.  */
+      if (types_compatible_p (type, TREE_TYPE (input)))
+	return input;
     }
 
   /* We need a new conversion statement.  */
   tree new_op = vect_recog_temp_ssa_var (type, NULL);
-  gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
-
-  /* If the operation is the input to a vectorizable cast, try splitting
-     that cast into two, taking the required result as a mid-way point.  */
-  if (unprom->caster)
-    {
-      tree lhs = gimple_get_lhs (unprom->caster->stmt);
-      if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
-	  && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
-	  && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
-	  && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
-	return new_op;
-    }
+  gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
 
   /* If OP is an external value, see if we can insert the new statement
      on an incoming edge.  */
-  if (unprom->dt == vect_external_def)
-    if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
+  if (input == unprom->op && unprom->dt == vect_external_def)
+    if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
       {
 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
 	gcc_assert (!new_bb);
@@ -1644,28 +1670,37 @@ vect_recog_over_widening_pattern (stmt_v
   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
 
+  /* If we're truncating an operation, we need to make sure that we
+     don't introduce new undefined overflow.  The codes tested here are
+     a subset of those accepted by vect_truncatable_operation_p.  */
+  tree op_type = new_type;
+  if (TYPE_OVERFLOW_UNDEFINED (new_type)
+      && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
+    op_type = build_nonstandard_integer_type (new_precision, true);
+
   /* We specifically don't check here whether the target supports the
      new operation, since it might be something that a later pattern
      wants to rewrite anyway.  If targets have a minimum element size
      for some optabs, we should pattern-match smaller ops to larger ops
      where beneficial.  */
   tree new_vectype = get_vectype_for_scalar_type (new_type);
-  if (!new_vectype)
+  tree op_vectype = get_vectype_for_scalar_type (op_type);
+  if (!new_vectype || !op_vectype)
     return NULL;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
 		     type, new_type);
 
-  /* Calculate the rhs operands for an operation on NEW_TYPE.  */
+  /* Calculate the rhs operands for an operation on OP_TYPE.  */
   tree ops[3] = {};
   for (unsigned int i = 1; i < first_op; ++i)
     ops[i - 1] = gimple_op (last_stmt, i);
   vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
-		       new_type, &unprom[0], new_vectype);
+		       op_type, &unprom[0], op_vectype);
 
-  /* Use the operation to produce a result of type NEW_TYPE.  */
-  tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
+  /* Use the operation to produce a result of type OP_TYPE.  */
+  tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
   gimple *pattern_stmt = gimple_build_assign (new_var, code,
 					      ops[0], ops[1], ops[2]);
   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
@@ -1674,6 +1709,13 @@ vect_recog_over_widening_pattern (stmt_v
     dump_printf_loc (MSG_NOTE, vect_location,
 		     "created pattern stmt: %G", pattern_stmt);
 
+  /* Convert back to the original signedness, if OP_TYPE is different
+     from NEW_TYPE.  */
+  if (op_type != new_type)
+    pattern_stmt = vect_convert_output (last_stmt_info, new_type,
+					pattern_stmt, op_vectype);
+
+  /* Promote the result to the original type.  */
   pattern_stmt = vect_convert_output (last_stmt_info, type,
 				      pattern_stmt, new_vectype);
 
@@ -1719,8 +1761,16 @@ vect_recog_average_pattern (stmt_vec_inf
   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
     return NULL;
 
-  /* Get the definition of the shift input.  */
+  /* Look through any change in sign on the shift input.  */
   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
+  vect_unpromoted_value unprom_plus;
+  rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
+						     &unprom_plus);
+  if (!rshift_rhs
+      || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
+    return NULL;
+
+  /* Get the definition of the shift input.  */
   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
   if (!plus_stmt_info)
     return NULL;
Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c
===================================================================
--- /dev/null	2018-11-29 13:15:04.463550658 +0000
+++ gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c	2018-12-05 08:33:55.130797055 +0000
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-forwprop -fno-tree-vrp" }
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS / 2)
+#else
+#define N 64
+#endif
+
+int a[N], b[N], c[N];
+
+void
+foo ()
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      long long d = a[i];
+      long long e = b[i];
+      d += e;
+      c[i] = d;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+} "vect" } } */
+/* { dg-final { scan-tree-dump {VIEW_CONVERT_EXPR<vector[^ ]* unsigned} "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */