This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4, committed] various parloops-related backports


Hi,

I've backported these parloops-related patches to gomp-4_0-branch.

- Ignore -ftree-parallelize-loops={0,1} using gt
- Handle unused reduction in create_loads_for_reductions
- Handle exit phi without header phi in create_parallel_loop
- Check TYPE_OVERFLOW_WRAPS for parloops reductions
- Remove xfail in autopar/uns-outer-4.c
- Add transform_to_exit_first_loop_alt dump success message
- Don't allow unsafe reductions in graphite
- Enable reductions without fassociative-math in graphite
- Fixup graphite/uns-*.c testcases

Thanks,
- Tom
Ignore -ftree-parallelize-loops={0,1} using gt

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-14  Tom de Vries  <tom@codesourcery.com>

	* gcc.c (greater_than_spec_func): Declare forward.
	(LINK_COMMAND_SPEC, GOMP_SELF_SPECS): Use gt to ignore
	-ftree-parallelize-loops={0,1}.
	(static_spec_functions): Add greater_than_spec_func function with name
	"gt".
	(greater_than_spec_func): New function.
---
 gcc/gcc.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 2 deletions(-)

diff --git a/gcc/gcc.c b/gcc/gcc.c
index 66a35b3..00680ac 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -274,6 +274,7 @@ static const char *compare_debug_self_opt_spec_function (int, const char **);
 static const char *compare_debug_auxbase_opt_spec_function (int, const char **);
 static const char *pass_through_libs_spec_func (int, const char **);
 static const char *replace_extension_spec_func (int, const char **);
+static const char *greater_than_spec_func (int, const char **);
 static char *convert_white_space (char *);
 
 /* The Specs Language
@@ -881,7 +882,8 @@ proper position among the other output files.  */
     %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!nostartfiles:%S}} " VTABLE_VERIFICATION_SPEC " \
     %{static:} %{L*} %(mfwrap) %(link_libgcc) " SANITIZER_EARLY_SPEC " %o\
     " CHKP_SPEC " \
-    %{fopenacc|fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\
+    %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*} 1):\
+	%:include(libgomp.spec)%(link_gomp)}\
     %{fcilkplus:%:include(libcilkrts.spec)%(link_cilkrts)}\
     %{fgnu-tm:%:include(libitm.spec)%(link_itm)}\
     %(mflib) " STACK_SPLIT_SPEC "\
@@ -1042,7 +1044,8 @@ static const char *const multilib_defaults_raw[] = MULTILIB_DEFAULTS;
 /* Linking to libgomp implies pthreads.  This is particularly important
    for targets that use different start files and suchlike.  */
 #ifndef GOMP_SELF_SPECS
-#define GOMP_SELF_SPECS "%{fopenacc|fopenmp|ftree-parallelize-loops=*: " \
+#define GOMP_SELF_SPECS \
+  "%{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*} 1): " \
   "-pthread}"
 #endif
 
@@ -1487,6 +1490,7 @@ static const struct spec_function static_spec_functions[] =
   { "compare-debug-auxbase-opt", compare_debug_auxbase_opt_spec_function },
   { "pass-through-libs",	pass_through_libs_spec_func },
   { "replace-extension",	replace_extension_spec_func },
+  { "gt",			greater_than_spec_func },
 #ifdef EXTRA_SPEC_FUNCTIONS
   EXTRA_SPEC_FUNCTIONS
 #endif
@@ -9462,6 +9466,47 @@ replace_extension_spec_func (int argc, const char **argv)
   return result;
 }
 
+/* Returns "" if the n in ARGV[1] == -opt=<n> is greater than ARGV[2].
+   Otherwise, return NULL.  */
+
+static const char *
+greater_than_spec_func (int argc, const char **argv)
+{
+  char *converted;
+
+  if (argc == 1)
+    return NULL;
+
+  gcc_assert (argc == 3);
+  gcc_assert (argv[0][0] == '-');
+  gcc_assert (argv[0][1] == '\0');
+
+  /* Point p to <n> in in -opt=<n>.  */
+  const char *p = argv[1];
+  while (true)
+    {
+      char c = *p;
+      if (c == '\0')
+	gcc_unreachable ();
+
+      ++p;
+
+      if (c == '=')
+	break;
+    }
+
+  long arg = strtol (p, &converted, 10);
+  gcc_assert (converted != p);
+
+  long lim = strtol (argv[2], &converted, 10);
+  gcc_assert (converted != argv[2]);
+
+  if (arg > lim)
+    return "";
+
+  return NULL;
+}
+
 /* Insert backslash before spaces in ORIG (usually a file path), to 
    avoid being broken by spec parser.
 
-- 
1.9.1

Handle unused reduction in create_loads_for_reductions

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-16  Tom de Vries  <tom@codesourcery.com>

	* tree-parloops.c (create_loads_for_reductions): Handle case that
	reduction is unused.
---
 gcc/tree-parloops.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 44a3816..f791731 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1166,6 +1166,10 @@ create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
   tree name;
   tree x;
 
+  /* If there's no exit phi, the result of the reduction is unused.  */
+  if (red->keep_res == NULL)
+    return 1;
+
   gsi = gsi_after_labels (clsn_data->load_bb);
   load_struct = build_simple_mem_ref (clsn_data->load);
   load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
-- 
1.9.1

Handle exit phi without header phi in create_parallel_loop

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-16  Tom de Vries  <tom@codesourcery.com>

	* tree-parloops.c (create_parallel_loop): Handle case that exit phi does
	not have a corresponding loop header phi.
---
 gcc/tree-parloops.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index f791731..3708565 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2167,13 +2167,17 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
        !gsi_end_p (gpi); gsi_next (&gpi))
     {
       source_location locus;
-      tree def;
       gphi *phi = gpi.phi ();
-      gphi *stmt;
+      tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
+      gimple def_stmt = SSA_NAME_DEF_STMT (def);
 
-      stmt = as_a <gphi *> (
-	       SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
+      /* If the exit phi is not connected to a header phi in the same loop, this
+	 value is not modified in the loop, and we're done with this phi.  */
+      if (!(gimple_code (def_stmt) == GIMPLE_PHI
+	    && gimple_bb (def_stmt) == loop->header))
+	continue;
 
+      gphi *stmt = as_a <gphi *> (def_stmt);
       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
       locus = gimple_phi_arg_location_from_edge (stmt,
 						 loop_preheader_edge (loop));
-- 
1.9.1

Check TYPE_OVERFLOW_WRAPS for parloops reductions

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-23  Tom de Vries  <tom@codesourcery.com>

	* tree-parloops.c (gather_scalar_reductions): Add arg to call to
	vect_force_simple_reduction.
	* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
	(vect_is_simple_reduction_1): Add and handle
	need_wrapping_integral_overflow parameter.
	(vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
	need_wrapping_integral_overflow parameter.
	(vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
	* tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.

	* gcc.dg/autopar/outer-4.c: Add xfail.
	* gcc.dg/autopar/outer-5.c: Same.
	* gcc.dg/autopar/outer-6.c: Same.
	* gcc.dg/autopar/reduc-2.c: Same.
	* gcc.dg/autopar/reduc-2char.c: Same.
	* gcc.dg/autopar/reduc-2short.c: Same.
	* gcc.dg/autopar/reduc-8.c: Same.
	* gcc.dg/autopar/uns-outer-4.c: New test.
	* gcc.dg/autopar/uns-outer-5.c: New test.
	* gcc.dg/autopar/uns-outer-6.c: New test.
---
 gcc/testsuite/gcc.dg/autopar/outer-4.c      |  2 +-
 gcc/testsuite/gcc.dg/autopar/outer-5.c      |  2 +-
 gcc/testsuite/gcc.dg/autopar/outer-6.c      |  4 +--
 gcc/testsuite/gcc.dg/autopar/reduc-2.c      |  4 +--
 gcc/testsuite/gcc.dg/autopar/reduc-2char.c  |  4 +--
 gcc/testsuite/gcc.dg/autopar/reduc-2short.c |  4 +--
 gcc/testsuite/gcc.dg/autopar/reduc-8.c      |  4 +--
 gcc/testsuite/gcc.dg/autopar/uns-outer-4.c  | 36 ++++++++++++++++++++
 gcc/testsuite/gcc.dg/autopar/uns-outer-5.c  | 49 +++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/autopar/uns-outer-6.c  | 51 +++++++++++++++++++++++++++++
 gcc/tree-parloops.c                         |  6 ++--
 gcc/tree-vect-loop.c                        | 44 +++++++++++++++++--------
 gcc/tree-vectorizer.h                       |  3 +-
 13 files changed, 183 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
 create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-5.c
 create mode 100644 gcc/testsuite/gcc.dg/autopar/uns-outer-6.c

diff --git a/gcc/testsuite/gcc.dg/autopar/outer-4.c b/gcc/testsuite/gcc.dg/autopar/outer-4.c
index 6fd37c5..2027499 100644
--- a/gcc/testsuite/gcc.dg/autopar/outer-4.c
+++ b/gcc/testsuite/gcc.dg/autopar/outer-4.c
@@ -32,4 +32,4 @@ int main(void)
 
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/outer-5.c b/gcc/testsuite/gcc.dg/autopar/outer-5.c
index 6a0ae91..d6e0dd3 100644
--- a/gcc/testsuite/gcc.dg/autopar/outer-5.c
+++ b/gcc/testsuite/gcc.dg/autopar/outer-5.c
@@ -45,4 +45,4 @@ int main(void)
 }
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/outer-6.c b/gcc/testsuite/gcc.dg/autopar/outer-6.c
index 6bef7cc..726794c 100644
--- a/gcc/testsuite/gcc.dg/autopar/outer-6.c
+++ b/gcc/testsuite/gcc.dg/autopar/outer-6.c
@@ -44,6 +44,6 @@ int main(void)
 
 
 /* Check that outer loop is parallelized.  */
-/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2.c b/gcc/testsuite/gcc.dg/autopar/reduc-2.c
index 3ad16e4..2f4883d 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2.c
@@ -63,6 +63,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" { xfail *-*-* } } } */
 
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
index 072489f..14867f3 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
@@ -60,7 +60,7 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
index 4dbbc8a..7c19cc5 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
@@ -59,6 +59,6 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-8.c b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
index 16fb954..1d05c48 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-8.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
@@ -84,5 +84,5 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
new file mode 100644
index 0000000..ef9fc2a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int g_sum=0;
+unsigned int x[500][500];
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Double reduction is currently not supported, outer loop is not
+     parallelized.  Inner reduction is detected, inner loop is
+     parallelized.  */
+  sum = 0;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum += x[i][j];
+
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  parloop (500);
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c
new file mode 100644
index 0000000..a929e5d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+  x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Inner cycle is currently not supported, outer loop is not
+     parallelized.  Inner reduction is detected, inner loop is
+     parallelized.  */
+  for (i = 0; i < N; i++)
+    {
+      sum = 0;
+      for (j = 0; j < N; j++)
+	sum += x[i][j];
+      y[i]=sum;
+    }
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  int i, j;
+  for (i = 0; i < 500; i++)
+    for (j = 0; j < 500; j++)
+      init (i, j);
+
+  parloop (500);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c
new file mode 100644
index 0000000..5c745f8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+  x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Outer loop reduction, outerloop is parallelized.  */
+  sum=0;
+  for (i = 0; i < N; i++)
+    {
+      for (j = 0; j < N; j++)
+	y[i]=x[i][j];
+      sum += y[i];
+    }
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  int i, j;
+  for (i = 0; i < 500; i++)
+    for (j = 0; j < 500; j++)
+      init (i, j);
+
+  parloop (500);
+
+  return 0;
+}
+
+
+/* Check that outer loop is parallelized.  */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 3708565..9e49944 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2519,9 +2519,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
       if (!simple_iv (loop, loop, res, &iv, true)
 	&& simple_loop_info)
 	{
-           gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
-							    phi, true,
-							    &double_reduc);
+	   gimple reduc_stmt
+	     = vect_force_simple_reduction (simple_loop_info, phi, true,
+					    &double_reduc, true);
 	   if (reduc_stmt && !double_reduc)
               build_new_reduction (reduction_list, reduc_stmt, phi);
         }
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 83a993f..65d7bdd 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -714,7 +714,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 
       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
-						&double_reduc);
+						&double_reduc, false);
       if (reduc_stmt)
         {
           if (double_reduc)
@@ -2338,7 +2338,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
 static gimple
 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 			    bool check_reduction, bool *double_reduc,
-			    bool modify)
+			    bool modify, bool need_wrapping_integral_overflow)
 {
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
@@ -2612,14 +2612,26 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 			"reduction: unsafe fp math optimization: ");
       return NULL;
     }
-  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
-	   && check_reduction)
+  else if (INTEGRAL_TYPE_P (type) && check_reduction)
     {
-      /* Changing the order of operations changes the semantics.  */
-      if (dump_enabled_p ())
-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			"reduction: unsafe int math optimization: ");
-      return NULL;
+      if (TYPE_OVERFLOW_TRAPS (type))
+	{
+	  /* Changing the order of operations changes the semantics.  */
+	  if (dump_enabled_p ())
+	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			    "reduction: unsafe int math optimization"
+			    " (overflow traps): ");
+	  return NULL;
+	}
+      if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type))
+	{
+	  /* Changing the order of operations changes the semantics.  */
+	  if (dump_enabled_p ())
+	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			    "reduction: unsafe int math optimization"
+			    " (overflow doesn't wrap): ");
+	  return NULL;
+	}
     }
   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
     {
@@ -2748,10 +2760,12 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 
 static gimple
 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+			  bool check_reduction, bool *double_reduc,
+			  bool need_wrapping_integral_overflow)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
-				     double_reduc, false);
+				     double_reduc, false,
+				     need_wrapping_integral_overflow);
 }
 
 /* Wrapper around vect_is_simple_reduction_1, which will modify code
@@ -2760,10 +2774,12 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
 
 gimple
 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+			     bool check_reduction, bool *double_reduc,
+			     bool need_wrapping_integral_overflow)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
-				     double_reduc, true);
+				     double_reduc, true,
+				     need_wrapping_integral_overflow);
 }
 
 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
@@ -5073,7 +5089,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
-					 !nested_cycle, &dummy);
+					 !nested_cycle, &dummy, false);
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 48c1f8d..dfa8795 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1090,7 +1090,8 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
 /* In tree-vect-loop.c.  */
 /* FORNOW: Used in tree-parloops.c.  */
 extern void destroy_loop_vec_info (loop_vec_info, bool);
-extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
+extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *,
+					   bool);
 /* Drive for loop analysis stage.  */
 extern loop_vec_info vect_analyze_loop (struct loop *);
 /* Drive for loop transformation stage.  */
-- 
1.9.1

Remove xfail in autopar/uns-outer-4.c

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-24  Tom de Vries  <tom@codesourcery.com>

	* gcc.dg/autopar/uns-outer-4.c: Remove loopfn xfail.
---
 gcc/testsuite/gcc.dg/autopar/uns-outer-4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
index ef9fc2a..8365a89 100644
--- a/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
+++ b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
@@ -33,4 +33,4 @@ main (void)
 
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
-- 
1.9.1

Add transform_to_exit_first_loop_alt dump success message

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-24  Tom de Vries  <tom@codesourcery.com>

	* tree-parloops.c (gen_parallel_loop): Add debug print for alternative
	exit-first loop transform.

	* gcc.dg/parloops-exit-first-loop-alt-2.c: Use debug print for
	alternative exit-first loop transform.
	* gcc.dg/parloops-exit-first-loop-alt-3.c: Same.
	* gcc.dg/parloops-exit-first-loop-alt-4.c: Same.
	* gcc.dg/parloops-exit-first-loop-alt-5.c: Same.
	* gcc.dg/parloops-exit-first-loop-alt-6.c: Same.
	* gcc.dg/parloops-exit-first-loop-alt-7.c: Same.
	* gcc.dg/parloops-exit-first-loop-alt-pr66652.c: Same.
	* gcc.dg/parloops-exit-first-loop-alt.c: Same.
	* gfortran.dg/parloops-exit-first-loop-alt-2.f95: Same.
	* gfortran.dg/parloops-exit-first-loop-alt.f95: Same.
---
 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-2.c        |  9 ++-------
 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c        |  9 ++-------
 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-4.c        |  9 ++-------
 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-5.c        |  9 ++-------
 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-6.c        |  9 ++-------
 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-7.c        |  9 ++-------
 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c  | 11 +++--------
 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c          | 10 +++-------
 gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt-2.f95 |  9 ++-------
 gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt.f95   | 10 +++-------
 gcc/tree-parloops.c                                          | 10 +++++++++-
 11 files changed, 32 insertions(+), 72 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-2.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-2.c
index 24e605a..f1cf75f 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-2.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target pthread } */
-/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
 
 /* Constant bound, vector addition.  */
 
@@ -19,9 +19,4 @@ f (void)
       c[i] = a[i] + b[i];
 }
 
-/* Three times three array accesses:
-   - three in f._loopfn.0
-   - three in the parallel
-   - three in the low iteration count loop
-   Crucially, none for a peeled off last iteration following the parallel.  */
-/* { dg-final { scan-tree-dump-times "(?n)\\\[i" 9 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c
index fec53a1..c7154ba 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target pthread } */
-/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
 
 /* Variable bound, reduction.  */
 
@@ -18,9 +18,4 @@ f (unsigned int n, unsigned int *__restrict__ a)
   return sum;
 }
 
-/* Three array accesses:
-   - one in f._loopfn.0
-   - one in the parallel
-   - one in the low iteration count loop
-   Crucially, none for a peeled off last iteration following the parallel.  */
-/* { dg-final { scan-tree-dump-times "(?n)\\\* 4" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-4.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-4.c
index 2b8d289..5f7fe68 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-4.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-4.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target pthread } */
-/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
 
 /* Constant bound, reduction.  */
 
@@ -20,9 +20,4 @@ f (void)
   return sum;
 }
 
-/* Three array accesses:
-   - one in f._loopfn.0
-   - one in the parallel
-   - one in the low iteration count loop
-   Crucially, none for a peeled off last iteration following the parallel.  */
-/* { dg-final { scan-tree-dump-times "(?n)\\\* 4" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-5.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-5.c
index 3f799cf..3c1e99b 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-5.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-5.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target pthread } */
-/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
 
 /* Variable bound, vector addition, unsigned loop counter, unsigned bound.  */
 
@@ -14,9 +14,4 @@ f (unsigned int n, unsigned int *__restrict__ a, unsigned int *__restrict__ b,
     c[i] = a[i] + b[i];
 }
 
-/* Three times a store:
-   - one in f._loopfn.0
-   - one in the parallel
-   - one in the low iteration count loop
-   Crucially, none for a peeled off last iteration following the parallel.  */
-/* { dg-final { scan-tree-dump-times "(?n)^  \\*_\[0-9\]*" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-6.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-6.c
index ee19a55..edc60ba 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-6.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-6.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target pthread } */
-/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
 
 /* Variable bound, vector addition, unsigned loop counter, signed bound.  */
 
@@ -14,9 +14,4 @@ f (int n, unsigned int *__restrict__ a, unsigned int *__restrict__ b,
     c[i] = a[i] + b[i];
 }
 
-/* Three times a store:
-   - one in f._loopfn.0
-   - one in the parallel
-   - one in the low iteration count loop
-   Crucially, none for a peeled off last iteration following the parallel.  */
-/* { dg-final { scan-tree-dump-times "(?n)^  \\*_\[0-9\]*" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-7.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-7.c
index c337342..38be2e8 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-7.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-7.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target pthread } */
-/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
 
 /* Variable bound, vector addition, signed loop counter, signed bound.  */
 
@@ -14,9 +14,4 @@ f (int n, unsigned int *__restrict__ a, unsigned int *__restrict__ b,
     c[i] = a[i] + b[i];
 }
 
-/* Three times a store:
-   - one in f._loopfn.0
-   - one in the parallel
-   - one in the low iteration count loop
-   Crucially, none for a peeled off last iteration following the parallel.  */
-/* { dg-final { scan-tree-dump-times "(?n)^  \\*_\[0-9\]*" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c
index 2ea097d..6f3ece5 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target pthread } */
-/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -22,10 +22,5 @@ f (unsigned int n, unsigned int sum)
   return sum;
 }
 
-/* Four times % 13:
-   - once in f._loopfn.0
-   - once in the parallel
-   - once in the low iteration count loop
-   - once for a peeled off last iteration following the parallel.
-   In other words, we want try_transform_to_exit_first_loop_alt to fail.  */
-/* { dg-final { scan-tree-dump-times "(?n)% 13" 4 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 0 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c
index 0b69165..44596e3 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target pthread } */
-/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
 
 /* Variable bound, vector addition, signed loop counter, unsigned bound.  */
 
@@ -14,9 +14,5 @@ f (unsigned int n, unsigned int *__restrict__ a, unsigned int *__restrict__ b,
     c[i] = a[i] + b[i];
 }
 
-/* Three times a store:
-   - one in f._loopfn.0
-   - one in the parallel
-   - one in the low iteration count loop
-   Crucially, none for a peeled off last iteration following the parallel.  */
-/* { dg-final { scan-tree-dump-times "(?n)^  \\*_\[0-9\]*" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } } */
+
diff --git a/gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt-2.f95 b/gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt-2.f95
index f26a6e3..52434f2 100644
--- a/gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt-2.f95
+++ b/gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt-2.f95
@@ -1,7 +1,7 @@
 ! { dg-additional-options "-O2" }
 ! { dg-require-effective-target pthread }
 ! { dg-additional-options "-ftree-parallelize-loops=2" }
-! { dg-additional-options "-fdump-tree-parloops" }
+! { dg-additional-options "-fdump-tree-parloops-details" }
 
 ! Constant bound, vector addition.
 
@@ -16,9 +16,4 @@ subroutine foo ()
   end do
 end subroutine foo
 
-! Three times plus 25:
-! - once in f._loopfn.0
-! - once in the parallel
-! - once in the low iteration count loop
-! Crucially, none for a peeled off last iteration following the parallel.
-! { dg-final { scan-tree-dump-times "(?n) \\+ 25;" 3 "parloops" } }
+! { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } }
diff --git a/gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt.f95 b/gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt.f95
index 6dc8a38..1eb9dfd 100644
--- a/gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt.f95
+++ b/gcc/testsuite/gfortran.dg/parloops-exit-first-loop-alt.f95
@@ -1,7 +1,7 @@
 ! { dg-additional-options "-O2" }
 ! { dg-require-effective-target pthread }
 ! { dg-additional-options "-ftree-parallelize-loops=2" }
-! { dg-additional-options "-fdump-tree-parloops" }
+! { dg-additional-options "-fdump-tree-parloops-details" }
 
 ! Variable bound, vector addition.
 
@@ -17,9 +17,5 @@ subroutine foo (nr)
   end do
 end subroutine foo
 
-! Three times plus 25:
-! - once in f._loopfn.0
-! - once in the parallel
-! - once in the low iteration count loop
-! Crucially, none for a peeled off last iteration following the parallel.
-! { dg-final { scan-tree-dump-times "(?n) \\+ 25;" 3 "parloops" } }
+! { dg-final { scan-tree-dump-times "alternative exit-first loop transform succeeded" 1 "parloops" } }
+
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 9e49944..05508e7 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2375,7 +2375,15 @@ gen_parallel_loop (struct loop *loop,
      increment) and immediately follows the loop exit test.  Attempt to move the
      entry of the loop directly before the exit check and increase the number of
      iterations of the loop by one.  */
-  if (!try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
+  if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
+    {
+      if (dump_file
+	  && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file,
+		 "alternative exit-first loop transform succeeded"
+		 " for loop %d\n", loop->num);
+    }
+  else
     {
       if (oacc_kernels_p)
 	n_threads = 1;
-- 
1.9.1

Don't allow unsafe reductions in graphite

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-24  Tom de Vries  <tom@codesourcery.com>

	* graphite-sese-to-poly.c (is_reduction_operation_p): Limit
	flag_associative_math to FLOAT_TYPE_P.  Honour
	TYPE_OVERFLOW_WRAPS for INTEGRAL_TYPE_P. Don't allow any other types.

	* gcc.dg/graphite/block-1.c: Xfail scan.
	* gcc.dg/graphite/interchange-12.c: Same.
	* gcc.dg/graphite/interchange-14.c: Same.
	* gcc.dg/graphite/interchange-15.c: Same.
	* gcc.dg/graphite/interchange-9.c: Same.
	* gcc.dg/graphite/interchange-mvt.c: Same.
	* gcc.dg/graphite/uns-block-1.c: New test.
	* gcc.dg/graphite/uns-interchange-12.c: New test.
	* gcc.dg/graphite/uns-interchange-14.c: New test.
	* gcc.dg/graphite/uns-interchange-15.c: New test.
	* gcc.dg/graphite/uns-interchange-9.c: New test.
	* gcc.dg/graphite/uns-interchange-mvt.c: New test.
---
 gcc/graphite-sese-to-poly.c                        | 14 +++--
 gcc/testsuite/gcc.dg/graphite/block-1.c            |  2 +-
 gcc/testsuite/gcc.dg/graphite/interchange-12.c     |  2 +-
 gcc/testsuite/gcc.dg/graphite/interchange-14.c     |  2 +-
 gcc/testsuite/gcc.dg/graphite/interchange-15.c     |  2 +-
 gcc/testsuite/gcc.dg/graphite/interchange-9.c      |  2 +-
 gcc/testsuite/gcc.dg/graphite/interchange-mvt.c    |  2 +-
 gcc/testsuite/gcc.dg/graphite/uns-block-1.c        | 48 +++++++++++++++++
 gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c | 56 +++++++++++++++++++
 gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c | 58 ++++++++++++++++++++
 gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c | 53 ++++++++++++++++++
 gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c  | 47 ++++++++++++++++
 .../gcc.dg/graphite/uns-interchange-mvt.c          | 63 ++++++++++++++++++++++
 13 files changed, 342 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/graphite/uns-block-1.c
 create mode 100644 gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c
 create mode 100644 gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c
 create mode 100644 gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
 create mode 100644 gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c
 create mode 100644 gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index 9195592..24c4864 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -2612,9 +2612,17 @@ is_reduction_operation_p (gimple stmt)
   gcc_assert (is_gimple_assign (stmt));
   code = gimple_assign_rhs_code (stmt);
 
-  return flag_associative_math
-    && commutative_tree_code (code)
-    && associative_tree_code (code);
+  if (!commutative_tree_code (code)
+      || !associative_tree_code (code))
+    return false;
+
+  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
+
+  if (FLOAT_TYPE_P (type))
+    return flag_associative_math;
+
+  return (INTEGRAL_TYPE_P (type)
+	  && TYPE_OVERFLOW_WRAPS (type));
 }
 
 /* Returns true when PHI contains an argument ARG.  */
diff --git a/gcc/testsuite/gcc.dg/graphite/block-1.c b/gcc/testsuite/gcc.dg/graphite/block-1.c
index a73c20f..2208eb9 100644
--- a/gcc/testsuite/gcc.dg/graphite/block-1.c
+++ b/gcc/testsuite/gcc.dg/graphite/block-1.c
@@ -45,4 +45,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "will be loop blocked" 3 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "will be loop blocked" 3 "graphite" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-12.c b/gcc/testsuite/gcc.dg/graphite/interchange-12.c
index 41a8882..bf95fdd 100644
--- a/gcc/testsuite/gcc.dg/graphite/interchange-12.c
+++ b/gcc/testsuite/gcc.dg/graphite/interchange-12.c
@@ -53,4 +53,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-14.c b/gcc/testsuite/gcc.dg/graphite/interchange-14.c
index 36990ab..46f6a6d 100644
--- a/gcc/testsuite/gcc.dg/graphite/interchange-14.c
+++ b/gcc/testsuite/gcc.dg/graphite/interchange-14.c
@@ -55,4 +55,4 @@ main (void)
 }
 
 /* PRE destroys the perfect nest and we can't cope with that yet.  */
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-15.c b/gcc/testsuite/gcc.dg/graphite/interchange-15.c
index 3ddb74f..9f6b7ae 100644
--- a/gcc/testsuite/gcc.dg/graphite/interchange-15.c
+++ b/gcc/testsuite/gcc.dg/graphite/interchange-15.c
@@ -49,5 +49,5 @@ main (void)
 }
 
 /* PRE destroys the perfect nest and we can't cope with that yet.  */
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { xfail *-*-* } } } */
 
diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-9.c b/gcc/testsuite/gcc.dg/graphite/interchange-9.c
index cfec110..b023ea8 100644
--- a/gcc/testsuite/gcc.dg/graphite/interchange-9.c
+++ b/gcc/testsuite/gcc.dg/graphite/interchange-9.c
@@ -44,4 +44,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c b/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
index 4b8f264..8c00f80 100644
--- a/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
+++ b/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
@@ -59,5 +59,5 @@ main (void)
 }
 
 /* PRE destroys the perfect nest and we can't cope with that yet.  */
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { xfail *-*-* } } } */
 
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-block-1.c b/gcc/testsuite/gcc.dg/graphite/uns-block-1.c
new file mode 100644
index 0000000..57d522b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/uns-block-1.c
@@ -0,0 +1,48 @@
+/* { dg-require-effective-target size32plus } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define MAX 100
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j;
+  int sum = 0;
+  int A[MAX * MAX];
+  int B[MAX * MAX];
+
+  /* These loops should be loop blocked.  */
+  for (i = 0; i < MAX; i++)
+    for (j = 0; j < MAX; j++)
+      {
+	A[i*MAX + j] = j;
+	B[i*MAX + j] = j;
+      }
+
+  /* These loops should be loop blocked.  */
+  for (i = 0; i < MAX; i++)
+    for (j = 0; j < MAX; j++)
+      A[i*MAX + j] += B[j*MAX + i];
+
+  /* These loops should be loop blocked.  */
+  for (i = 0; i < MAX; i++)
+    for (j = 0; j < MAX; j++)
+      sum += A[i*MAX + j];
+
+#if DEBUG
+  fprintf (stderr, "sum = %d \n", sum);
+#endif
+
+  if (sum != 990000)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "will be loop blocked" 3 "graphite" } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c
new file mode 100644
index 0000000..dc26926
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c
@@ -0,0 +1,56 @@
+/* { dg-require-effective-target size32plus } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 200
+
+int A[N][N], B[N][N], C[N][N];
+
+static int __attribute__((noinline))
+matmult (void)
+{
+  int i, j, k;
+
+  /* Loops J and K should be interchanged.  */
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+	A[i][j] = 0;
+	for (k = 0; k < N; k++)
+	  A[i][j] += B[i][k] * C[k][j];
+      }
+
+  return A[0][0] + A[N-1][N-1];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+	A[i][j] = 0;
+	B[i][j] = i - j;
+	C[i][j] = i + j;
+      }
+
+  res = matmult ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 2626800)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c
new file mode 100644
index 0000000..36990ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target size32plus } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 200
+
+int A[N][N], B[N][N], C[N][N];
+
+static void __attribute__((noinline))
+matmult (void)
+{
+  int i, j, k;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      A[i][j] = 0;
+
+  /* Loops J and K should be interchanged.  */
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      for (k = 0; k < N; k++)
+	A[i][j] += B[i][k] * C[k][j];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res = 0;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+	B[i][j] = j;
+	C[i][j] = i;
+      }
+
+  matmult ();
+
+  for (i = 0; i < N; i++)
+    res += A[i][i];
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 529340000)
+    abort ();
+
+  return 0;
+}
+
+/* PRE destroys the perfect nest and we can't cope with that yet.  */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
new file mode 100644
index 0000000..3ddb74f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
@@ -0,0 +1,53 @@
+/* { dg-require-effective-target size32plus } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define NMAX 2000
+
+static int x[NMAX], a[NMAX][NMAX];
+
+static int __attribute__((noinline))
+mvt (long N)
+{
+  int i,j;
+
+  /* These two loops should be interchanged.  */
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      x[i] += a[j][i];
+
+  return x[1];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < NMAX; i++)
+    for (j = 0; j < NMAX; j++)
+      a[i][j] = j;
+
+  for (i = 0; i < NMAX; i++)
+    x[i] = i;
+
+  res = mvt (NMAX);
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 2001)
+    abort ();
+
+  return 0;
+}
+
+/* PRE destroys the perfect nest and we can't cope with that yet.  */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c
new file mode 100644
index 0000000..cfec110
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target size32plus } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 111
+#define M 111
+
+static int __attribute__((noinline))
+foo (int *x)
+{
+  int i, j;
+  int sum = 0;
+
+  for (j = 0; j < M; ++j)
+    for (i = 0;  i < N; ++i)
+      sum += x[M * i + j];
+
+  return sum;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int A[N*M];
+  int i, res;
+
+  for (i = 0; i < N*M; i++)
+    A[i] = 2;
+
+  res = foo (A);
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 24642)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c
new file mode 100644
index 0000000..4b8f264
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c
@@ -0,0 +1,63 @@
+/* { dg-require-effective-target size32plus } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define NMAX 2000
+
+static int x1[NMAX], x2[NMAX], a[NMAX][NMAX], y1[NMAX], y2[NMAX];
+
+static int __attribute__((noinline))
+mvt (long N)
+{
+
+  int i,j;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      x1[i] = x1[i] + a[i][j] * y1[j];
+
+  /* These two loops should be interchanged.  */
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      x2[i] = x2[i] + a[j][i] * y2[j];
+
+  return x1[0] + x2[0];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < NMAX; i++)
+    for (j = 0; j < NMAX; j++)
+      a[i][j] = i + j;
+
+  for (i = 0; i < NMAX; i++)
+    {
+      x1[i] = 0;
+      x2[i] = 2*i;
+      y1[i] = 100 - i;
+      y2[i] = i;
+    }
+
+  res = mvt (NMAX);
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 199900000)
+    abort ();
+
+  return 0;
+}
+
+/* PRE destroys the perfect nest and we can't cope with that yet.  */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+
-- 
1.9.1

Enable reductions without fassociative-math in graphite

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-24  Tom de Vries  <tom@codesourcery.com>

	* graphite-sese-to-poly.c (build_poly_scop): Always call
	rewrite_commutative_reductions_out_of_ssa.
---
 gcc/graphite-sese-to-poly.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index 24c4864..0a47629 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -3163,8 +3163,7 @@ build_poly_scop (scop_p scop)
   if (!scop_ivs_can_be_represented (scop))
     return;
 
-  if (flag_associative_math)
-    rewrite_commutative_reductions_out_of_ssa (scop);
+  rewrite_commutative_reductions_out_of_ssa (scop);
 
   build_sese_loop_nests (region);
   /* Record all conditions in REGION.  */
-- 
1.9.1

Fixup graphite/uns-*.c testcases

2015-07-27  Tom de Vries  <tom@codesourcery.com>

	backport from trunk:
	2015-07-25  Tom de Vries  <tom@codesourcery.com>

	* gcc.dg/graphite/graphite.exp: Include uns-*.c files in
	interchange_files and block_files variables.
	* gcc.dg/graphite/uns-block-1.c (main): Change signed into unsigned
	arithmetic.
	* gcc.dg/graphite/uns-interchange-12.c: Same.
	* gcc.dg/graphite/uns-interchange-14.c: Same.
	* gcc.dg/graphite/uns-interchange-15.c: Same.
	* gcc.dg/graphite/uns-interchange-9.c (foo): Same.
	* gcc.dg/graphite/uns-interchange-mvt.c: Same.
---
 gcc/testsuite/gcc.dg/graphite/graphite.exp          |  6 ++++--
 gcc/testsuite/gcc.dg/graphite/uns-block-1.c         |  6 +++---
 gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c  |  7 ++++---
 gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c  |  5 +++--
 gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c  |  7 ++++---
 gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c   | 11 ++++++-----
 gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c |  7 ++++---
 7 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/graphite/graphite.exp b/gcc/testsuite/gcc.dg/graphite/graphite.exp
index 9dba5d6..9e7ede6 100644
--- a/gcc/testsuite/gcc.dg/graphite/graphite.exp
+++ b/gcc/testsuite/gcc.dg/graphite/graphite.exp
@@ -41,8 +41,10 @@ set wait_to_run_files [lsort [glob -nocomplain $srcdir/$subdir/*.c ] ]
 set scop_files        [lsort [glob -nocomplain $srcdir/$subdir/scop-*.c ] ]
 set id_files          [lsort [glob -nocomplain $srcdir/$subdir/id-*.c ] ]
 set run_id_files      [lsort [glob -nocomplain $srcdir/$subdir/run-id-*.c ] ]
-set interchange_files [lsort [glob -nocomplain $srcdir/$subdir/interchange-*.c ] ]
-set block_files       [lsort [glob -nocomplain $srcdir/$subdir/block-*.c ] ]
+set interchange_files [lsort [glob -nocomplain $srcdir/$subdir/interchange-*.c \
+			      $srcdir/$subdir/uns-interchange-*.c ] ]
+set block_files       [lsort [glob -nocomplain $srcdir/$subdir/block-*.c \
+			      $srcdir/$subdir/uns-block-*.c ] ]
 set vect_files        [lsort [glob -nocomplain $srcdir/$subdir/vect-*.c ] ]
 
 # Tests to be compiled.
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-block-1.c b/gcc/testsuite/gcc.dg/graphite/uns-block-1.c
index 57d522b..c50b770 100644
--- a/gcc/testsuite/gcc.dg/graphite/uns-block-1.c
+++ b/gcc/testsuite/gcc.dg/graphite/uns-block-1.c
@@ -13,9 +13,9 @@ int
 main (void)
 {
   int i, j;
-  int sum = 0;
-  int A[MAX * MAX];
-  int B[MAX * MAX];
+  unsigned int sum = 0;
+  unsigned int A[MAX * MAX];
+  unsigned int B[MAX * MAX];
 
   /* These loops should be loop blocked.  */
   for (i = 0; i < MAX; i++)
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c
index dc26926..bd21ba9 100644
--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-12.c
@@ -7,9 +7,9 @@
 
 #define N 200
 
-int A[N][N], B[N][N], C[N][N];
+unsigned int A[N][N], B[N][N], C[N][N];
 
-static int __attribute__((noinline))
+static unsigned int __attribute__((noinline))
 matmult (void)
 {
   int i, j, k;
@@ -31,7 +31,8 @@ extern void abort ();
 int
 main (void)
 {
-  int i, j, res;
+  int i, j;
+  unsigned int res;
 
   for (i = 0; i < N; i++)
     for (j = 0; j < N; j++)
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c
index 36990ab..b1abd13 100644
--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-14.c
@@ -7,7 +7,7 @@
 
 #define N 200
 
-int A[N][N], B[N][N], C[N][N];
+unsigned int A[N][N], B[N][N], C[N][N];
 
 static void __attribute__((noinline))
 matmult (void)
@@ -30,7 +30,8 @@ extern void abort ();
 int
 main (void)
 {
-  int i, j, res = 0;
+  int i, j;
+  unsigned res = 0;
 
   for (i = 0; i < N; i++)
     for (j = 0; j < N; j++)
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
index 3ddb74f..a5a2e27 100644
--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
@@ -7,9 +7,9 @@
 
 #define NMAX 2000
 
-static int x[NMAX], a[NMAX][NMAX];
+static unsigned int x[NMAX], a[NMAX][NMAX];
 
-static int __attribute__((noinline))
+static unsigned int __attribute__((noinline))
 mvt (long N)
 {
   int i,j;
@@ -27,7 +27,8 @@ extern void abort ();
 int
 main (void)
 {
-  int i, j, res;
+  int i, j;
+  unsigned int res;
 
   for (i = 0; i < NMAX; i++)
     for (j = 0; j < NMAX; j++)
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c
index cfec110..6bfd3d6 100644
--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c
@@ -8,11 +8,11 @@
 #define N 111
 #define M 111
 
-static int __attribute__((noinline))
-foo (int *x)
+static unsigned int __attribute__((noinline))
+foo (unsigned int *x)
 {
   int i, j;
-  int sum = 0;
+  unsigned int sum = 0;
 
   for (j = 0; j < M; ++j)
     for (i = 0;  i < N; ++i)
@@ -26,8 +26,9 @@ extern void abort ();
 int
 main (void)
 {
-  int A[N*M];
-  int i, res;
+  unsigned int A[N*M];
+  int i;
+  unsigned int res;
 
   for (i = 0; i < N*M; i++)
     A[i] = 2;
diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c
index 4b8f264..80f6789 100644
--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c
+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c
@@ -7,9 +7,9 @@
 
 #define NMAX 2000
 
-static int x1[NMAX], x2[NMAX], a[NMAX][NMAX], y1[NMAX], y2[NMAX];
+static unsigned int x1[NMAX], x2[NMAX], a[NMAX][NMAX], y1[NMAX], y2[NMAX];
 
-static int __attribute__((noinline))
+static unsigned int __attribute__((noinline))
 mvt (long N)
 {
 
@@ -32,7 +32,8 @@ extern void abort ();
 int
 main (void)
 {
-  int i, j, res;
+  int i, j;
+  unsigned int res;
 
   for (i = 0; i < NMAX; i++)
     for (j = 0; j < NMAX; j++)
-- 
1.9.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]