[gcc(refs/users/guojiufu/heads/personal-branch)] openmp: Optimize triangular loop logical iterator to actual iterators computation using search for q

Jiu Fu Guo guojiufu@gcc.gnu.org
Mon Aug 10 06:50:13 GMT 2020


https://gcc.gnu.org/g:5acef69f9d3d9f3c537b5e5157519edf02f86c4d

commit 5acef69f9d3d9f3c537b5e5157519edf02f86c4d
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Thu Jul 9 12:07:17 2020 +0200

    openmp: Optimize triangular loop logical iterator to actual iterators computation using search for quadratic equation root(s)
    
    This patch implements the optimized logical to actual iterators
    computation for triangular loops.
    
    I have a rough implementation using integers, but this one uses floating
    point.  There is a small problem that -fopenmp programs aren't linked with
    -lm, so it does it only if the hw has sqrt optab (and uses ifn rather than
    __builtin_sqrt because it obviously doesn't need errno handling etc.).
    
    Do you think it is ok this way, or should I use the integral computation
    using inlined isqrt (we have inequation of the form
    start >= x * t10 + t11 * (((x - 1) * x) / 2)
    where t10 and t11 are signed long long values and start unsigned long long,
    and the division by 2 actually is a problem for accuracy in some cases, so
    if we do it in integral, we need to do actually
          long long t12 = 2 * t10 - t11;
          unsigned long long t13 = t12 * t12 + start * 8 * t11;
          unsigned long long isqrt_ = isqrtull (t13);
          long long x = (((long long) isqrt_ - t12) / t11) >> 1;
    with careful overflow checking on all the computations before isqrtull
    (and on overflows use the fallback implementation).
    
    2020-07-09  Jakub Jelinek  <jakub@redhat.com>
    
            * omp-general.h (struct omp_for_data): Add min_inner_iterations
            and factor members.
            * omp-general.c (omp_extract_for_data): Initialize them and remember
            them in OMP_CLAUSE_COLLAPSE_COUNT if needed and restore from there.
            * omp-expand.c (expand_omp_for_init_counts): Fix up computation of
            counts[fd->last_nonrect] if fd->loop.n2 is INTEGER_CST.
            (expand_omp_for_init_vars): For
            fd->first_nonrect + 1 == fd->last_nonrect loops with for now
            INTEGER_CST fd->loop.n2 find quadratic equation roots instead of
            using fallback method when possible.
    
            * testsuite/libgomp.c/loop-19.c: New test.
            * testsuite/libgomp.c/loop-20.c: New test.

Diff:
---
 gcc/omp-expand.c                      | 211 +++++++++++++++++++++++++++++++++-
 gcc/omp-general.c                     |  23 +++-
 gcc/omp-general.h                     |   7 ++
 libgomp/testsuite/libgomp.c/loop-19.c |  86 ++++++++++++++
 libgomp/testsuite/libgomp.c/loop-20.c |  84 ++++++++++++++
 5 files changed, 404 insertions(+), 7 deletions(-)

diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index b1349d8f088..c3b8820e213 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -2137,7 +2137,7 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
       int non_rect_referenced = 0, non_rect = 0;
       for (i = 0; i < fd->collapse; i++)
 	{
-	  if ((i < fd->first_nonrect || fd->last_nonrect)
+	  if ((i < fd->first_nonrect || i > fd->last_nonrect)
 	      && !integer_zerop (counts[i]))
 	    t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
 	  if (fd->loops[i].non_rect_referenced)
@@ -2249,17 +2249,208 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
 	t = tem;
       if (i == fd->last_nonrect)
 	{
-	  /* Fallback implementation.  Evaluate the loops in between
-	     (inclusive) fd->first_nonrect and fd->last_nonrect at
-	     runtime unsing temporaries instead of the original iteration
-	     variables, in the body just bump the counter and compare
-	     with the desired value.  */
 	  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
 					false, GSI_CONTINUE_LINKING);
 	  tree stopval = t;
 	  tree idx = create_tmp_reg (type, ".count");
 	  expand_omp_build_assign (gsi, idx,
 				   build_zero_cst (type), true);
+	  basic_block bb_triang = NULL;
+	  if (fd->first_nonrect + 1 == fd->last_nonrect
+	      /* For now.  */
+	      && TREE_CODE (fd->loop.n2) == INTEGER_CST
+	      && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
+		  != CODE_FOR_nothing))
+	    {
+	      tree itype = TREE_TYPE (fd->loops[i].v);
+	      tree min_inner_iterations = fd->min_inner_iterations;
+	      tree factor = fd->factor;
+	      gcond *cond_stmt
+		= gimple_build_cond (NE_EXPR, factor,
+				     build_zero_cst (TREE_TYPE (factor)),
+				     NULL_TREE, NULL_TREE);
+	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+	      edge e = split_block (gsi_bb (*gsi), cond_stmt);
+	      basic_block bb0 = e->src;
+	      e->flags = EDGE_TRUE_VALUE;
+	      e->probability = profile_probability::likely ();
+	      *gsi = gsi_after_labels (e->dest);
+	      tree slltype = long_long_integer_type_node;
+	      tree ulltype = long_long_unsigned_type_node;
+	      tree stopvalull = fold_convert (ulltype, stopval);
+	      stopvalull
+		= force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
+					    false, GSI_CONTINUE_LINKING);
+	      min_inner_iterations
+		= fold_convert (slltype, min_inner_iterations);
+	      min_inner_iterations
+		= force_gimple_operand_gsi (gsi, min_inner_iterations, true,
+					    NULL_TREE, false,
+					    GSI_CONTINUE_LINKING);
+	      factor = fold_convert (slltype, factor);
+	      factor
+		= force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
+					    false, GSI_CONTINUE_LINKING);
+	      tree min_inner_iterationsd
+		= fold_build1 (FLOAT_EXPR, double_type_node,
+			       min_inner_iterations);
+	      min_inner_iterationsd
+		= force_gimple_operand_gsi (gsi, min_inner_iterationsd, true,
+					    NULL_TREE, false,
+					    GSI_CONTINUE_LINKING);
+	      tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
+					  factor);
+	      factord = force_gimple_operand_gsi (gsi, factord, true,
+						  NULL_TREE, false,
+						  GSI_CONTINUE_LINKING);
+	      tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
+					   stopvalull);
+	      stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
+						   NULL_TREE, false,
+						   GSI_CONTINUE_LINKING);
+	      /* Temporarily disable flag_rounding_math, values will be
+		 decimal numbers divided by 2 and worst case imprecisions
+		 due to too large values ought to be caught later by the
+		 checks for fallback.  */
+	      int save_flag_rounding_math = flag_rounding_math;
+	      flag_rounding_math = 0;
+	      t = fold_build2 (RDIV_EXPR, double_type_node, factord,
+			       build_real (double_type_node, dconst2));
+	      tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
+				     min_inner_iterationsd, t);
+	      t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
+					     GSI_CONTINUE_LINKING);
+	      t = fold_build2 (MULT_EXPR, double_type_node, factord,
+			       build_real (double_type_node, dconst2));
+	      t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
+	      t = fold_build2 (PLUS_EXPR, double_type_node, t,
+			       fold_build2 (MULT_EXPR, double_type_node,
+					    t3, t3));
+	      flag_rounding_math = save_flag_rounding_math;
+	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+					    GSI_CONTINUE_LINKING);
+	      cond_stmt
+		= gimple_build_cond (LT_EXPR, t,
+				     build_zero_cst (double_type_node),
+				     NULL_TREE, NULL_TREE);
+	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+	      e = split_block (gsi_bb (*gsi), cond_stmt);
+	      basic_block bb1 = e->src;
+	      e->flags = EDGE_FALSE_VALUE;
+	      e->probability = profile_probability::very_likely ();
+	      *gsi = gsi_after_labels (e->dest);
+	      gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
+	      tree sqrtr = create_tmp_var (double_type_node);
+	      gimple_call_set_lhs (call, sqrtr);
+	      gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
+	      t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
+	      t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
+	      t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
+	      tree c = create_tmp_var (ulltype);
+	      tree d = create_tmp_var (ulltype);
+	      expand_omp_build_assign (gsi, c, t, true);
+	      t = fold_build2 (MINUS_EXPR, ulltype, c,
+			       build_one_cst (ulltype));
+	      t = fold_build2 (MULT_EXPR, ulltype, c, t);
+	      t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
+	      t = fold_build2 (MULT_EXPR, ulltype, fd->factor, t);
+	      tree t2 = fold_build2 (MULT_EXPR, ulltype, c,
+				     fd->min_inner_iterations);
+	      t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
+	      expand_omp_build_assign (gsi, d, t, true);
+	      t = fold_build2 (MULT_EXPR, ulltype, fd->factor, c);
+	      t = fold_build2 (PLUS_EXPR, ulltype,
+			       t, fd->min_inner_iterations);
+	      t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+					     GSI_CONTINUE_LINKING);
+	      cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
+					     NULL_TREE, NULL_TREE);
+	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+	      e = split_block (gsi_bb (*gsi), cond_stmt);
+	      basic_block bb2 = e->src;
+	      e->flags = EDGE_TRUE_VALUE;
+	      e->probability = profile_probability::very_likely ();
+	      *gsi = gsi_after_labels (e->dest);
+	      t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
+	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+					    GSI_CONTINUE_LINKING);
+	      cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
+					     NULL_TREE, NULL_TREE);
+	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+	      e = split_block (gsi_bb (*gsi), cond_stmt);
+	      basic_block bb3 = e->src;
+	      e->flags = EDGE_FALSE_VALUE;
+	      e->probability = profile_probability::very_likely ();
+	      *gsi = gsi_after_labels (e->dest);
+	      t = fold_convert (itype, c);
+	      t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
+	      t = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t);
+	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+					    GSI_CONTINUE_LINKING);
+	      expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
+	      t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
+	      t2 = fold_convert (itype, t2);
+	      t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
+	      t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
+	      if (fd->loops[i].m1)
+		{
+		  t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
+		  t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
+		}
+	      expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
+	      e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
+	      bb_triang = e->src;
+	      *gsi = gsi_after_labels (e->dest);
+	      remove_edge (e);
+	      e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
+	      e->probability = profile_probability::very_unlikely ();
+	      e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
+	      e->probability = profile_probability::very_unlikely ();
+	      e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
+	      e->probability = profile_probability::very_unlikely ();
+
+	      basic_block bb4 = create_empty_bb (bb0);
+	      add_bb_to_loop (bb4, bb0->loop_father);
+	      e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
+	      e->probability = profile_probability::unlikely ();
+	      make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
+	      set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
+	      set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
+	      gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
+	      t2 = fold_build2 (TRUNC_DIV_EXPR, type,
+				counts[i], counts[i - 1]);
+	      t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
+					     GSI_CONTINUE_LINKING);
+	      t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
+	      t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
+	      t = fold_convert (itype, t);
+	      t2 = fold_convert (itype, t2);
+	      t = fold_build2 (MULT_EXPR, itype, t,
+			       fold_convert (itype, fd->loops[i].step));
+	      t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
+	      t2 = fold_build2 (MULT_EXPR, itype, t2,
+				fold_convert (itype, fd->loops[i - 1].step));
+	      t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
+	      t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
+					     false, GSI_CONTINUE_LINKING);
+	      stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
+	      gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+	      if (fd->loops[i].m1)
+		{
+		  t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
+				    fd->loops[i - 1].v);
+		  t = fold_build2 (PLUS_EXPR, itype, t, t2);
+		}
+	      t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
+					    false, GSI_CONTINUE_LINKING);
+	      stmt = gimple_build_assign (fd->loops[i].v, t);
+	      gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+	    }
+	  /* Fallback implementation.  Evaluate the loops in between
+	     (inclusive) fd->first_nonrect and fd->last_nonrect at
+	     runtime unsing temporaries instead of the original iteration
+	     variables, in the body just bump the counter and compare
+	     with the desired value.  */
 	  gimple_stmt_iterator gsi2 = *gsi;
 	  basic_block entry_bb = gsi_bb (gsi2);
 	  edge e = split_block (entry_bb, gsi_stmt (gsi2));
@@ -2455,6 +2646,14 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
 	    *gsi = gsi_last_bb (gsi_bb (*gsi));
 	  else
 	    gsi_prev (gsi);
+	  if (bb_triang)
+	    {
+	      e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
+	      make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
+	      *gsi = gsi_after_labels (e->dest);
+	      if (!gsi_end_p (*gsi))
+		gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
+	    }
 	}
       else
 	{
diff --git a/gcc/omp-general.c b/gcc/omp-general.c
index 2a47466f897..c6878cfec66 100644
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -212,6 +212,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
   fd->sched_modifiers = 0;
   fd->chunk_size = NULL_TREE;
   fd->simd_schedule = false;
+  fd->min_inner_iterations = NULL_TREE;
+  fd->factor = NULL_TREE;
   collapse_iter = NULL;
   collapse_count = NULL;
 
@@ -653,6 +655,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
 		  else
 		    t2 = fold_build2 (TRUNC_DIV_EXPR, itype, t2, step);
 		  t2 = fold_convert (llutype, t2);
+		  fd->min_inner_iterations = t;
+		  fd->factor = t2;
 		  t = fold_build2 (MULT_EXPR, llutype, t,
 				   single_nonrect_count);
 		  tree t3 = fold_build2 (MINUS_EXPR, llutype,
@@ -707,7 +711,17 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
   if (collapse_count && *collapse_count == NULL)
     {
       if (count)
-	*collapse_count = fold_convert_loc (loc, iter_type, count);
+	{
+	  *collapse_count = fold_convert_loc (loc, iter_type, count);
+	  if (fd->min_inner_iterations && fd->factor)
+	    {
+	      t = make_tree_vec (3);
+	      TREE_VEC_ELT (t, 0) = *collapse_count;
+	      TREE_VEC_ELT (t, 1) = fd->min_inner_iterations;
+	      TREE_VEC_ELT (t, 2) = fd->factor;
+	      *collapse_count = t;
+	    }
+	}
       else
 	*collapse_count = create_tmp_var (iter_type, ".count");
     }
@@ -717,6 +731,13 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
       fd->loop.v = *collapse_iter;
       fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
       fd->loop.n2 = *collapse_count;
+      if (TREE_CODE (fd->loop.n2) == TREE_VEC)
+	{
+	  gcc_assert (fd->non_rect);
+	  fd->min_inner_iterations = TREE_VEC_ELT (fd->loop.n2, 1);
+	  fd->factor = TREE_VEC_ELT (fd->loop.n2, 2);
+	  fd->loop.n2 = TREE_VEC_ELT (fd->loop.n2, 0);
+	}
       fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
       fd->loop.m1 = NULL_TREE;
       fd->loop.m2 = NULL_TREE;
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index a76396577b9..ec0f2a4becb 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -78,6 +78,13 @@ struct omp_for_data
   unsigned char sched_modifiers;
   enum omp_clause_schedule_kind sched_kind;
   struct omp_for_data_loop *loops;
+  /* The following are relevant only for non-rectangular loops
+     where only a single loop depends on an outer loop iterator.  */
+  tree min_inner_iterations; /* Number of iterations of the inner
+				loop with either the first or last
+				outer iterator, depending on which
+				results in fewer iterations.  */
+  tree factor; /* (m2 - m1) * outer_step / inner_step.  */
 };
 
 #define OACC_FN_ATTRIB "oacc function"
diff --git a/libgomp/testsuite/libgomp.c/loop-19.c b/libgomp/testsuite/libgomp.c/loop-19.c
new file mode 100644
index 00000000000..732ebb04153
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/loop-19.c
@@ -0,0 +1,86 @@
+/* { dg-do run } */
+
+extern void abort (void);
+
+int x, i, j;
+volatile int a, b, c, d, e, f, g, h;
+int k[16][67];
+
+int
+main ()
+{
+  int niters;
+  for (i = 0; i < 16; i++)
+    for (j = i * 2 + 1; j < 4 * i + 3; j++)
+      k[i][j] = 1;
+  a = 0; b = 16; c = 1; d = 2; e = 1; f = 4; g = 3; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 0; i < 16; i++)
+    for (j = i * 2 + 1; j < 4 * i + 3; j++)
+      {
+	if (i < 0 || i >= 16 || j < 2 * i + 1 || j >= 3 + i * 4 || k[i][j] != 1)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 16 || j != 63 || x != 15422 || niters != 272)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i < 0 || i >= 16 || j < 2 * i + 1 || j >= 3 + i * 4 || k[i][j] != 2)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 16 || j != 63 || x != 15422 || niters != 272)
+    abort ();
+  for (i = 0; i < 16; i++)
+    for (j = i * 2 + 1; j < 4 * i + 3; j++)
+      if (k[i][j] == 3)
+	k[i][j] = 0;
+      else
+	abort ();
+  for (i = 0; i < 16; i++)
+    for (j = i * 2 + 1; j < 2 * i + 7; j++)
+      k[i][j] = 1;
+  a = 0; b = 16; c = 1; d = 2; e = 1; f = 2; g = 7; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 0; i < 16; i++)
+    for (j = i * 2 + 1; j < 2 * i + 7; j++)
+      {
+	if (i < 0 || i >= 16 || j < 2 * i + 1 || j >= 7 + i * 2 || k[i][j] != 1)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 16 || j != 37 || x != 15396 || niters != 96)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i < 0 || i >= 16 || j < 2 * i + 1 || j >= 7 + i * 2 || k[i][j] != 2)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 16 || j != 37 || x != 15396 || niters != 96)
+    abort ();
+  for (i = 0; i < 16; i++)
+    for (j = i * 2 + 1; j < 2 * i + 7; j++)
+      if (k[i][j] == 3)
+	k[i][j] = 0;
+      else
+	abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/loop-20.c b/libgomp/testsuite/libgomp.c/loop-20.c
new file mode 100644
index 00000000000..c3265ac0470
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/loop-20.c
@@ -0,0 +1,84 @@
+/* { dg-do run } */
+
+extern void abort (void);
+
+unsigned long long int x, i, j;
+volatile unsigned long long int a, b, c, d, e, f, g, h;
+int k[4][206];
+
+int
+main ()
+{
+  long long int niters;
+  for (j = ~0ULL / 2 - 32; j < ((~0ULL / 2) + 6); j++)
+    k[0][j - ~0ULL / 2 + 64] = 1;
+  a = 1; b = 2; c = 1; d = 0; e = ~0ULL / 2 - 32; f = ((~0ULL / 2) + 6); g = 0; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 1; i < 2; i++)
+    for (j = ~0ULL / 2 - 32; j < i * ((~0ULL / 2) + 6); j++)
+      {
+	if (i != 1 || j < ~0ULL / 2 - 32 || j >= ((~0ULL / 2) + 6) || k[0][j - ~0ULL / 2 + 64] != 1)
+	  abort ();
+	k[0][j - ~0ULL / 2 + 64]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 2 || j != ((~0ULL / 2) + 6) || x != 1028 || niters != 38)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i != 1 || j < ~0ULL / 2 - 32 || j >= ((~0ULL / 2) + 6) || k[0][j - ~0ULL / 2 + 64] != 2)
+	  abort ();
+	k[0][j - ~0ULL / 2 + 64]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 2 || j != ((~0ULL / 2) + 6) || x != 1028 || niters != 38)
+    abort ();
+  for (j = ~0ULL / 2 - 32; j < ((~0ULL / 2) + 6); j++)
+    if (k[0][j - ~0ULL / 2 + 64] == 3)
+      k[0][j - ~0ULL / 2 + 64] = 0;
+    else
+      abort ();
+  for (i = 1; i < 4; i++)
+    for (j = 64ULL * i; j < i * 32ULL + 110; j++)
+      k[i][j] = 1;
+  a = 1; b = 4; c = 1; d = 64ULL; e = 0; f = 32ULL; g = 110ULL; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 1; i < 4; i++)
+    for (j = 64ULL * i; j < i * 32ULL + 110; j++)
+      {
+	if (i < 1 || i >= 4 || j < 64ULL * i || j >= i * 32ULL + 110 || k[i][j] != 1)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 4 || j != 206 || x != 3277 || niters != 138)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i < 1 || i >= 4 || j < 64ULL * i || j >= i * 32ULL + 110 || k[i][j] != 2)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 4 || j != 206 || x != 3277 || niters != 138)
+    abort ();
+  for (i = 1; i < 4; i++)
+    for (j = 64ULL * i; j < i * 32ULL + 110; j++)
+      if (k[i][j] == 3)
+	k[i][j] = 0;
+      else
+	abort ();
+  return 0;
+}


More information about the Gcc-cvs mailing list