This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Check TYPE_OVERFLOW_WRAPS for parloops reductions


[ was: Re: [RFC, PR66873] Use graphite for parloops ]

On 22/07/15 13:02, Richard Biener wrote:
On Wed, Jul 22, 2015 at 1:01 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
On Tue, Jul 21, 2015 at 8:42 PM, Sebastian Pop <sebpop@gmail.com> wrote:
Tom de Vries wrote:
Fix reduction safety checks


diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 9145dbf..e014be2 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2613,16 +2613,30 @@ vect_is_simple_reduction_1 (loop_vec_info
loop_info, gimple phi,
                         "reduction: unsafe fp math optimization: ");
        return NULL;
      }
-  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
-          && check_reduction)
+  else if (INTEGRAL_TYPE_P (type) && check_reduction)
      {
...

You didn't need to adjust any testcases?
 That's probably because the
checking above is
not always executed (see PR66623 for a related testcase).  The code
needs refactoring.
And we need a way-out, that is, we do _not_ want to not vectorize
signed reductions.
So you need to fix code generation instead.

Btw, for the vectorizer the current "trick" is that nobody takes advantage about
overflow undefinedness for vector types.


AFAIU, you're saying here that there's no current bug related to assuming wrapping overflow in the vectorizer?

I've updated the patch accordingly, so we only bother about TYPE_OVERFLOW_WRAPS for parloops reductions.

Currently bootstrapping and reg-testing on x86_64.

OK for trunk?

Thanks,
- Tom

Check TYPE_OVERFLOW_WRAPS for parloops reductions

2015-07-21  Tom de Vries  <tom@codesourcery.com>

	* tree-parloops.c (gather_scalar_reductions): Add arg to call to
	vect_force_simple_reduction.
	* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
	(vect_is_simple_reduction_1): Add and handle
	need_wrapping_integral_overflow parameter.
	(vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
	need_wrapping_integral_overflow parameter.
	(vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
	* tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.

	* gcc.dg/autopar/outer-4.c: Add xfail.
	* gcc.dg/autopar/outer-5.c: Same.
	* gcc.dg/autopar/outer-6.c: Same.
	* gcc.dg/autopar/reduc-2.c: Same.
	* gcc.dg/autopar/reduc-2char.c: Same.
	* gcc.dg/autopar/reduc-2short.c: Same.
	* gcc.dg/autopar/reduc-8.c: Same.
	* gcc.dg/autopar/uns-outer-4.c: New test.
	* gcc.dg/autopar/uns-outer-5.c: New test.
	* gcc.dg/autopar/uns-outer-6.c: New test.
---
 gcc/testsuite/gcc.dg/autopar/outer-4.c      |  2 +-
 gcc/testsuite/gcc.dg/autopar/outer-5.c      |  2 +-
 gcc/testsuite/gcc.dg/autopar/outer-6.c      |  4 +--
 gcc/testsuite/gcc.dg/autopar/reduc-2.c      |  4 +--
 gcc/testsuite/gcc.dg/autopar/reduc-2char.c  |  4 +--
 gcc/testsuite/gcc.dg/autopar/reduc-2short.c |  4 +--
 gcc/testsuite/gcc.dg/autopar/reduc-8.c      |  4 +--
 gcc/testsuite/gcc.dg/autopar/uns-outer-4.c  | 36 ++++++++++++++++++++
 gcc/testsuite/gcc.dg/autopar/uns-outer-5.c  | 49 +++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/autopar/uns-outer-6.c  | 51 +++++++++++++++++++++++++++++
 gcc/tree-parloops.c                         |  6 ++--
 gcc/tree-vect-loop.c                        | 44 +++++++++++++++++--------
 gcc/tree-vectorizer.h                       |  3 +-
 13 files changed, 183 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
 create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-5.c
 create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-6.c

diff --git a/gcc/testsuite/gcc.dg/autopar/outer-4.c b/gcc/testsuite/gcc.dg/autopar/outer-4.c
index 6fd37c5..2027499 100644
--- a/gcc/testsuite/gcc.dg/autopar/outer-4.c
+++ b/gcc/testsuite/gcc.dg/autopar/outer-4.c
@@ -32,4 +32,4 @@ int main(void)
 
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/outer-5.c b/gcc/testsuite/gcc.dg/autopar/outer-5.c
index 6a0ae91..d6e0dd3 100644
--- a/gcc/testsuite/gcc.dg/autopar/outer-5.c
+++ b/gcc/testsuite/gcc.dg/autopar/outer-5.c
@@ -45,4 +45,4 @@ int main(void)
 }
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/outer-6.c b/gcc/testsuite/gcc.dg/autopar/outer-6.c
index 6bef7cc..726794c 100644
--- a/gcc/testsuite/gcc.dg/autopar/outer-6.c
+++ b/gcc/testsuite/gcc.dg/autopar/outer-6.c
@@ -44,6 +44,6 @@ int main(void)
 
 
 /* Check that outer loop is parallelized.  */
-/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2.c b/gcc/testsuite/gcc.dg/autopar/reduc-2.c
index 3ad16e4..2f4883d 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2.c
@@ -63,6 +63,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" { xfail *-*-* } } } */
 
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
index 072489f..14867f3 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
@@ -60,7 +60,7 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
index 4dbbc8a..7c19cc5 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
@@ -59,6 +59,6 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-8.c b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
index 16fb954..1d05c48 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-8.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
@@ -84,5 +84,5 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
new file mode 100644
index 0000000..ef9fc2a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int g_sum=0;
+unsigned int x[500][500];
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Double reduction is currently not supported, outer loop is not
+     parallelized.  Inner reduction is detected, inner loop is
+     parallelized.  */
+  sum = 0;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum += x[i][j];
+
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  parloop (500);
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c
new file mode 100644
index 0000000..a929e5d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+  x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Inner cycle is currently not supported, outer loop is not
+     parallelized.  Inner reduction is detected, inner loop is
+     parallelized.  */
+  for (i = 0; i < N; i++)
+    {
+      sum = 0;
+      for (j = 0; j < N; j++)
+	sum += x[i][j];
+      y[i]=sum;
+    }
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  int i, j;
+  for (i = 0; i < 500; i++)
+    for (j = 0; j < 500; j++)
+      init (i, j);
+
+  parloop (500);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c
new file mode 100644
index 0000000..5c745f8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+  x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Outer loop reduction, outerloop is parallelized.  */
+  sum=0;
+  for (i = 0; i < N; i++)
+    {
+      for (j = 0; j < N; j++)
+	y[i]=x[i][j];
+      sum += y[i];
+    }
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  int i, j;
+  for (i = 0; i < 500; i++)
+    for (j = 0; j < 500; j++)
+      init (i, j);
+
+  parloop (500);
+
+  return 0;
+}
+
+
+/* Check that outer loop is parallelized.  */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index ec41834..88f22e8 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2376,9 +2376,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
       if (!simple_iv (loop, loop, res, &iv, true)
 	&& simple_loop_info)
 	{
-           gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
-							    phi, true,
-							    &double_reduc);
+	   gimple reduc_stmt
+	     = vect_force_simple_reduction (simple_loop_info, phi, true,
+					    &double_reduc, true);
 	   if (reduc_stmt && !double_reduc)
               build_new_reduction (reduction_list, reduc_stmt, phi);
         }
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 9145dbf..c31bfbd 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -715,7 +715,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 
       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
-						&double_reduc);
+						&double_reduc, false);
       if (reduc_stmt)
         {
           if (double_reduc)
@@ -2339,7 +2339,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
 static gimple
 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 			    bool check_reduction, bool *double_reduc,
-			    bool modify)
+			    bool modify, bool need_wrapping_integral_overflow)
 {
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
@@ -2613,14 +2613,26 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 			"reduction: unsafe fp math optimization: ");
       return NULL;
     }
-  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
-	   && check_reduction)
+  else if (INTEGRAL_TYPE_P (type) && check_reduction)
     {
-      /* Changing the order of operations changes the semantics.  */
-      if (dump_enabled_p ())
-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			"reduction: unsafe int math optimization: ");
-      return NULL;
+      if (TYPE_OVERFLOW_TRAPS (type))
+	{
+	  /* Changing the order of operations changes the semantics.  */
+	  if (dump_enabled_p ())
+	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			    "reduction: unsafe int math optimization"
+			    " (overflow traps): ");
+	  return NULL;
+	}
+      if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type))
+	{
+	  /* Changing the order of operations changes the semantics.  */
+	  if (dump_enabled_p ())
+	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			    "reduction: unsafe int math optimization"
+			    " (overflow doesn't wrap): ");
+	  return NULL;
+	}
     }
   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
     {
@@ -2749,10 +2761,12 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 
 static gimple
 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+			  bool check_reduction, bool *double_reduc,
+			  bool need_wrapping_integral_overflow)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
-				     double_reduc, false);
+				     double_reduc, false,
+				     need_wrapping_integral_overflow);
 }
 
 /* Wrapper around vect_is_simple_reduction_1, which will modify code
@@ -2761,10 +2775,12 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
 
 gimple
 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+			     bool check_reduction, bool *double_reduc,
+			     bool need_wrapping_integral_overflow)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
-				     double_reduc, true);
+				     double_reduc, true,
+				     need_wrapping_integral_overflow);
 }
 
 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
@@ -5074,7 +5090,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
-					 !nested_cycle, &dummy);
+					 !nested_cycle, &dummy, false);
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 48c1f8d..dfa8795 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1090,7 +1090,8 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
 /* In tree-vect-loop.c.  */
 /* FORNOW: Used in tree-parloops.c.  */
 extern void destroy_loop_vec_info (loop_vec_info, bool);
-extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
+extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *,
+					   bool);
 /* Drive for loop analysis stage.  */
 extern loop_vec_info vect_analyze_loop (struct loop *);
 /* Drive for loop transformation stage.  */
-- 
1.9.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]