[PATCH V2] Split loop for NE condition.

guojiufu guojiufu@linux.ibm.com
Tue Jun 1 03:28:16 GMT 2021


On 2021-05-26 17:50, Richard Biener wrote:
> On Mon, 17 May 2021, Jiufu Guo wrote:
> 
...
>> 
>>   while (++k > n)
>>     a[k] = b[k]  + 1;
>> 
>> then for the second loop, it could be optimized.
> 
> Btw, I think even the first loop should be vectorized.  I see we do
> not handle it in niter analysis:
> 
> Analyzing loop at t.c:3
> t.c:3:14: note:  === analyze_loop_nest ===
> t.c:3:14: note:   === vect_analyze_loop_form ===
> t.c:3:14: note:    === get_loop_niters ===
> t.c:3:14: missed:   not vectorized: number of iterations cannot be
> computed.
> 
> but the number of iterations should be UINT_MAX - k (unless I'm
> missing sth), may_be_zero would be sth like k < n.  It would be
> nice to not split this into loops that niter analysis cannot handle ...
> 
For this case on the first loop, it is not vectorized by trunk gcc.
While since we know the type of 'k' and 'n' is unsigned, the umber of 
iterations
would be computed.  I'm wondering about enhancing 
'number_of_iterations_cond' may
able to handle this.

...
>> +
>> +/* { dg-final { scan-tree-dump-times "Loop split" 9 "lsplit" } } */
> 
> Please consider making the testcase execute ones, verifying computation
> results.
Thanks!
> 
>> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
>> index 3a09bbc39e5..5c1742b5ff4 100644
>> --- a/gcc/tree-ssa-loop-split.c
>> +++ b/gcc/tree-ssa-loop-split.c
>> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "cfghooks.h"
>>  #include "gimple-fold.h"
>>  #include "gimplify-me.h"
>> +#include "tree-ssa-loop-ivopts.h"
>> 
>>  /* This file implements two kinds of loop splitting.
>> 
>> @@ -233,7 +234,8 @@ easy_exit_values (class loop *loop)
>>     this.  The loops need to fulfill easy_exit_values().  */
>> 
> 
> please document use_prev
Thanks.
> 
>>  static void
>> -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
>> +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
>> +		   bool use_prev = false)
>>  {
>>    basic_block rest = loop_preheader_edge (loop2)->src;
>>    gcc_assert (new_e->dest == rest);
>> @@ -279,7 +281,8 @@ connect_loop_phis (class loop *loop1, class loop 
>> *loop2, edge new_e)
>> 
>>        gphi * newphi = create_phi_node (new_init, rest);
>>        add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
>> -      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
>> +      add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, 
>> new_e,
>> +		   UNKNOWN_LOCATION);
>>        SET_USE (op, new_init);
>>      }
>>  }
>> @@ -1593,6 +1596,184 @@ split_loop_on_cond (struct loop *loop)
>>    return do_split;
>>  }
>> 
>> +/* Check if the LOOP exit branch likes "if (idx != bound)",
> 
> is like
Thanks.
> 
>> +   Return the branch edge which exit loop, if overflow/wrap
>> +   may happen on "idx".  */
> 
> I think we only want to handle wrapping (thus not undefined overflow).
Yes, you are right.
> 
>> +
>> +static edge
>> +get_ne_cond_branch (struct loop *loop)
>> +{
>> +  int i;
>> +  edge e;
>> +
>> +  auto_vec<edge> edges = get_loop_exit_edges (loop);
>> +  FOR_EACH_VEC_ELT (edges, i, e)
>> +    {
>> +      /* Check if there is possible wrap/overflow.  */
>> +      class tree_niter_desc niter;
>> +      if (!number_of_iterations_exit (loop, e, &niter, false, false))
>> +	continue;
>> +      if (niter.control.no_overflow)
>> +	return NULL;
>> +      if (niter.cmp != NE_EXPR)
>> +	continue;
>> +
>> +      /* Check loop is simple to split.  */
> 
> it seems like the following and below condition mean "simple"
> is either all code is before or after the exit in question,
> please improve the comment to explain the two cases.
> 
>> +      if (single_pred_p (loop->latch)
>> +	  && single_pred_edge (loop->latch)->src == e->src
>> +	  && (gsi_end_p (gsi_start_nondebug_bb (loop->latch))))
> 
> split_loop uses empty_block_p (loop->latch).
yes, empty_block_p also filter label/debug insts.
> 
>> +	return e;
>> +
>> +      /* Simple header.  */
>> +      if (e->src == loop->header)
>> +	{
>> +	  if (get_virtual_phi (e->src))
>> +	    continue;
> 
> So this disqualifies all loops which store to memory?
We do not need to check this condition since here allows only the 
i++/++i header.
> 
>> +
>> +	  /* Only one phi.  */
>> +	  gphi_iterator psi = gsi_start_phis (e->src);
>> +	  if (gsi_end_p (psi))
>> +	    continue;
>> +	  gsi_next (&psi);
>> +	  if (!gsi_end_p (psi))
>> +	    continue;
>> +
>> +	  /* ++i or ++i */
>> +	  gimple_stmt_iterator gsi = gsi_start_bb (e->src);
> 
> I think you want gsi_start_nondebug_after_labels_bb (e->src)
Thanks.
> 
>> +
>> +	  gimple *gc = last_stmt (e->src);
>> +	  tree idx = gimple_cond_lhs (gc);
> 
> you have to check the last stmt is a GIMPLE_COND, we have
> recorded exits that exit on EH for example.

Above checking "number_of_iterations_exit" could make sure
it is a GIMPLE_COND.  I would add gcc_assert to check/hit this.
> 
>> +	  if (expr_invariant_in_loop_p (loop, idx))
>> +	    idx = gimple_cond_rhs (gc);
>> +
>> +	  gimple *s1 = gsi_stmt (gsi);
>> +	  if (!(is_gimple_assign (s1) && idx
>> +		&& (idx == gimple_assign_lhs (s1)
>> +		    || idx == gimple_assign_rhs1 (s1))))
>> +	    continue;
> 
> I think you want to allow a IV PHI plus an IV increment
> in the block and nothing else.  The pattern matching isn't
> exact though and could be fooled by, say
> 
>  header:
>    i_2 = PHI <i_5(D), i_3(latch)>
>    _3 = i_2 + 5;
>    if (i_2 != n_4(D))
>      goto exit;
> 
>  latch:
>    i_3 = i_2 + 1;
>    foo (_3);
>    goto header;
> 
> that is, the increment you matched is not the new iterator value
> but instead a derived value used in a stmt in the latch.  So
> I think if you really cannot allow any code besides the IV
> increment then you need to actually get the PHI node and
> match the only real stmt with the definition of the PHIs
> latch edge value.
You are right!  Checking PHI's result/latch-edge value would be better.
Thanks.

> 
> Or relax all this, of course.
It would easy to handle the above cases: e->src before latch, or simple 
header.
To relax this, we may need to peel (partial peel) one loop between the 
first loop
and the second loop, or jump into the middle of the second loop.  I had 
a quick
try to implement this, but not find a good way.
Thanks for any suggestions!

> 
>> +	  gsi_next (&gsi);
>> +	  if (!gsi_end_p (gsi) && gsi_stmt (gsi) == gc)
>> +	    return e;
>> +	}
>> +    }
>> +
>> +  return NULL;
>> +}
>> +

Below is an updated patch.  Thanks again for your comments!


diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc 
b/gcc/testsuite/g++.dg/vect/pr98064.cc
index 74043ce7725..dcb2985d05a 100644
--- a/gcc/testsuite/g++.dg/vect/pr98064.cc
+++ b/gcc/testsuite/g++.dg/vect/pr98064.cc
@@ -1,5 +1,7 @@
  // { dg-do compile }
-// { dg-additional-options "-O3" }
+// { dg-additional-options "-O3 -Wno-stringop-overflow" }
+/* There is warning message when "short g = var_8; g; g++"
+   is optimized/analyzed as string operation,e.g. memset.  */

  const long long &min(const long long &__a, long long &__b) {
    if (__b < __a)
diff --git a/gcc/testsuite/gcc.dg/loop-split1.c 
b/gcc/testsuite/gcc.dg/loop-split1.c
new file mode 100644
index 00000000000..dd2d03a7b96
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split1.c
@@ -0,0 +1,101 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+void
+foo (int *a, int *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    a[l] = b[l] + 1;
+}
+void
+foo_1 (int *a, int *b, unsigned n)
+{
+  unsigned l = 0;
+  while (++l != n)
+    a[l] = b[l] + 1;
+}
+
+void
+foo1 (int *a, int *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    a[l] = b[l] + 1;
+}
+
+/* No wrap.  */
+void
+foo1_1 (int *a, int *b, unsigned n)
+{
+  unsigned l = 0;
+  while (l++ != n)
+    a[l] = b[l] + 1;
+}
+
+unsigned
+foo2 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo2_1 (char *a, char *b, unsigned l, unsigned n)
+{
+  l = 0;
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo3 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+/* No wrap.  */
+unsigned
+foo3_1 (char *a, char *b, unsigned l, unsigned n)
+{
+  l = 0;
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+void
+bar ();
+void
+foo4 (unsigned n, unsigned i)
+{
+  do
+    {
+      if (i == n)
+	return;
+      bar ();
+      ++i;
+    }
+  while (1);
+}
+
+unsigned
+find_skip_diff (char *p, char *q, unsigned n, unsigned i)
+{
+  while (p[i] == q[i] && ++i != n)
+    p++, q++;
+
+  return i;
+}
+
+/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-split2.c 
b/gcc/testsuite/gcc.dg/loop-split2.c
new file mode 100644
index 00000000000..0d3fded3f61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+extern void abort (void);
+extern void exit (int);
+
+#define NI __attribute__ ((noinline))
+
+void NI
+foo (int *a, int *b, unsigned char l, unsigned char n)
+{
+  while (++l != n)
+    a[l] = b[l] + 1;
+}
+
+unsigned NI
+bar (int *a, int *b, unsigned char l, unsigned char n)
+{
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+int a[258];
+int b[258];
+
+int main()
+{
+  __builtin_memcpy (b, a, sizeof (a));
+
+  if (bar (a, b, 3, 8) != 9)
+    abort ();
+
+  if (bar (a, b, 8, 3) != 4)
+    abort ();
+
+  b[100] += 1;
+  if (bar (a, b, 90, 110) != 100)
+    abort ();
+
+  if (bar (a, b, 110, 105) != 100)
+    abort ();
+
+  foo (a, b, 99, 99);
+  a[99] = b[99] + 1;
+  for (int i = 0; i < 256; i++)
+    if (a[i] != b[i] + 1)
+      abort();
+
+  exit (0);
+}
+
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3a09bbc39e5..0428b0abea6 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "cfghooks.h"
  #include "gimple-fold.h"
  #include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"

  /* This file implements two kinds of loop splitting.

@@ -229,11 +230,14 @@ easy_exit_values (class loop *loop)
     conditional).  I.e. the second loop can now be entered either
     via the original entry or via NEW_E, so the entry values of LOOP2
     phi nodes are either the original ones or those at the exit
-   of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
-   this.  The loops need to fulfill easy_exit_values().  */
+   of LOOP1.  Selecting the previous value instead next value as the
+   exit value of LOOP1 if USE_PREV is true.  Insert new phi nodes in
+   LOOP2 pre-header reflecting this.  The loops need to fulfill
+   easy_exit_values().  */

  static void
-connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
+connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
+		   bool use_prev = false)
  {
    basic_block rest = loop_preheader_edge (loop2)->src;
    gcc_assert (new_e->dest == rest);
@@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop 
*loop2, edge new_e)

        gphi * newphi = create_phi_node (new_init, rest);
        add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
-      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
+      add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, 
new_e,
+		   UNKNOWN_LOCATION);
        SET_USE (op, new_init);
      }
  }
@@ -1593,6 +1598,199 @@ split_loop_on_cond (struct loop *loop)
    return do_split;
  }

+/* Check if the LOOP exit branch is like "if (idx != bound)",
+   Return the branch edge which exit loop, if wrap may happen on "idx". 
  */
+
+static edge
+get_ne_cond_branch (struct loop *loop)
+{
+  int i;
+  edge e;
+
+  auto_vec<edge> edges = get_loop_exit_edges (loop);
+  FOR_EACH_VEC_ELT (edges, i, e)
+    {
+      /* Check if there is possible wrap.  */
+      class tree_niter_desc niter;
+      if (!number_of_iterations_exit (loop, e, &niter, false, false))
+	continue;
+      if (niter.control.no_overflow)
+	return NULL;
+      if (niter.cmp != NE_EXPR)
+	continue;
+
+      /* If exit edge is just before the empty latch, it is easy to 
link
+	 the split loops: just jump from the exit edge of one loop to the
+	 header of new loop.  */
+      if (single_pred_p (loop->latch)
+	  && single_pred_edge (loop->latch)->src == e->src
+	  && empty_block_p (loop->latch))
+	return e;
+
+      /* If exit edge is at end of header, and header contains i++ or 
++i,
+	 only, it is simple to link the split loops:  jump from the end of
+	 one loop header to the new loop header, and use unchanged PHI
+	 result of first loop as the entry PHI value of the second loop.  */
+      if (e->src == loop->header)
+	{
+	  /* Only one phi.  */
+	  gphi_iterator psi = gsi_start_phis (e->src);
+	  if (gsi_end_p (psi))
+	    continue;
+	  gphi *phi = psi.phi ();
+	  gsi_next (&psi);
+	  if (!gsi_end_p (psi))
+	    continue;
+
+	  /* Get the idx from last stmt (the gcond) of e->src.  */
+	  gimple *gc = last_stmt (e->src);
+	  gcc_assert (gimple_code (gc) == GIMPLE_COND);
+	  tree idx = gimple_cond_lhs (gc);
+	  if (expr_invariant_in_loop_p (loop, idx))
+	    idx = gimple_cond_rhs (gc);
+
+	  tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+	  tree prev = PHI_RESULT (phi);
+	  if (idx != prev && idx != next)
+	    continue;
+
+	  /* ++i or ++i */
+	  gimple_stmt_iterator gsi
+	    = gsi_start_nondebug_after_labels_bb (e->src);
+	  if (gsi_end_p (gsi))
+	    continue;
+
+	  gimple *s1 = gsi_stmt (gsi);
+	  if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next
+	      || gimple_assign_rhs1 (s1) != prev)
+	    continue;
+
+	  gsi_next (&gsi);
+	  if (!gsi_end_p (gsi) && gsi_stmt (gsi) == gc)
+	    return e;
+	}
+    }
+
+  return NULL;
+}
+
+/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR. 
  */
+
+static bool
+split_ne_loop (struct loop *loop, edge cond_e)
+{
+  initialize_original_copy_tables ();
+
+  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
+				     profile_probability::always (),
+				     profile_probability::never (),
+				     profile_probability::always (),
+				     profile_probability::always (), true);
+
+  gcc_assert (loop2);
+  update_ssa (TODO_update_ssa);
+
+  basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src);
+  free_original_copy_tables ();
+
+  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
+  gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb));
+
+  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
+  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
+  enum tree_code up_code = inv ? LT_EXPR : GT_EXPR;
+  enum tree_code down_code = inv ? GT_EXPR : LT_EXPR;
+
+  gimple_cond_set_code (gc, up_code);
+  gimple_cond_set_code (dup_gc, down_code);
+
+  /* Link the exit cond edge to new loop.  */
+  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
+  edge pred_e = single_pred_edge (loop->latch);
+  bool simple_loop = pred_e && pred_e->src == cond_e->src
+		     && (gsi_end_p (gsi_start_nondebug_bb (loop->latch)));
+  if (simple_loop)
+    gimple_cond_set_code (break_cond, down_code);
+  else
+    gimple_cond_make_true (break_cond);
+
+  basic_block break_bb = split_edge (cond_e);
+  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
+  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
+
+  edge to_exit = single_succ_edge (break_bb);
+  edge to_new_loop = make_edge (break_bb, loop_preheader_edge 
(loop2)->src, 0);
+  to_new_loop->flags |= EDGE_TRUE_VALUE;
+  to_exit->flags |= EDGE_FALSE_VALUE;
+  to_exit->flags &= ~EDGE_FALLTHRU;
+  to_exit->probability = cond_e->probability;
+  to_new_loop->probability = to_exit->probability.invert ();
+
+  update_ssa (TODO_update_ssa);
+
+  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
+
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Loop split on wrap index.\n");
+
+  return true;
+}
+
+/* Checks if LOOP contains a suitable NE_EXPR conditional block to 
split.
+L_H:
+ if (i!=N)
+   S;
+ else
+   goto EXIT;
+ i++;
+ goto L_H;
+
+The "i!=N" is like "i>N || i<N", then it could be transform to:
+
+L_H:
+ if (i>N)
+   S;
+ else
+   goto EXIT;
+ i++;
+ goto L_H;
+L1_H:
+ if (i<N)
+   S;
+ else
+   goto EXIT;
+ i++;
+ goto L1_H;
+
+The loop with "i<N" is in favor both GIMPLE and RTL passes.  */
+
+static bool
+split_loop_on_ne_cond (class loop *loop)
+{
+  int num = 0;
+  basic_block *bbs = get_loop_body (loop);
+
+  if (!can_copy_bbs_p (bbs, loop->num_nodes))
+    {
+      free (bbs);
+      return false;
+    }
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+  free (bbs);
+
+  if (num > param_max_peeled_insns)
+    return false;
+
+  edge branch_edge = get_ne_cond_branch (loop);
+  if (branch_edge && split_ne_loop (loop, branch_edge))
+    return true;
+
+  return false;
+}
+
  /* Main entry point.  Perform loop splitting on all suitable loops.  */

  static unsigned int
@@ -1622,7 +1820,8 @@ tree_ssa_split_loops (void)
        if (optimize_loop_for_size_p (loop))
  	continue;

-      if (split_loop (loop) || split_loop_on_cond (loop))
+      if (split_loop (loop) || split_loop_on_cond (loop)
+	  || split_loop_on_ne_cond (loop))
  	{
  	  /* Mark our containing loop as having had some split inner loops.  
*/
  	  loop_outer (loop)->aux = loop;


More information about the Gcc-patches mailing list