Unreviewed 3.0 patch to fix loop unrolling

Zoltan Hidvegi hzoli@hzoli.2y.net
Mon Jul 16 14:16:00 GMT 2001


Bernd Schmidt wrote:
> On Mon, 16 Jul 2001, Zoltan Hidvegi wrote:
> 
> > I've sent some patches to fix some loop unrolling bugs, that are most
> > serious on architectures that use -fbranch-count-reg, e.g. PowerPC.
> > Could someone please review it, and approve it or suggest some
> > alternative fix?  The patch is
> > http://gcc.gnu.org/ml/gcc-patches/2001-07/msg00391.html , fixes PR
> > 3384.
> 
> There isn't really enough information there to understand what the
> patch is doing.

The patch to doloop.c is really the same as your patch, except that you
calculate diff/(abs_inc*unroll) then increment it if the modulo is less
than abs_ins * (unroll-1), I instead calculate
(diff+abs_inc-1)/(abs_inc*unroll).  Even though the conditional increment
gets optimized to straight-line code, the later is a bit better, at least
on PowerPC.

One can say that (diff+abs_inc-1) can overflow.  It can overflow only if
diff > UINT_MAX-abs_inc+1.  But the difference between the final value of
the loop variable and the initial value is at least diff, and it is a
multiple of abs_inc, and also bigger than UINT_MAX-abs_inc+1, this can
only be UINT_MAX+1 since abs_inc is a power of 2.  This means that the
loop is an infinite loop, since the loop variable will overflow before
reaching the final value.  In case of overflow the count register will be
initialized to 0, which will execute the loop HOST_WIDE_UINT_MAX+1 times.
So strictly speaking, we would generate incorrect code for that case,
even with your patch, but it is almost correct, since we run the loop
many many times.

I had one more modification to doloop.c that is not related to
unroll-loops, that dealt with do-while loops with != exit condition.
There is a pre-test if the exit condition is true when you enter the
loops, that should not be done for do-while loops.  E.g.  do { m[i] = 0;
} while (++i != c); did only one iterations if i==c at the start, that is
even without unroll-loops.

In unroll.c, I fix problems with pathologic for (i = 0; --i < 6;) loops.
The last hunk of the unroll.c patch fixes the test when the end condition
is true at the start of the loop.  The test used to use signed compare
even in unsigned loops, that was obviously wrong.  Also the direction of
the compare used to be decided by the direction of the increment, which
fails for for (i = 0; --i < 6;) type loops.  I've added some comments.
Here is the patch again, relative to the 3.0 cvs.

See my previous mail for testcases:
http://gcc.gnu.org/ml/gcc-patches/2001-07/msg00391.html
http://gcc.gnu.org/ml/gcc-patches/2001-06/msg01726.html
PR 3384

Zoli

 Index: unroll.c
 ===================================================================
 RCS file: /cvs/gcc/gcc/gcc/unroll.c,v
 retrieving revision 1.125.4.1
 diff -u -r1.125.4.1 unroll.c
 --- unroll.c	2001/04/21 18:43:40	1.125.4.1
 +++ unroll.c	2001/07/16 21:09:42
 @@ -900,6 +900,9 @@
 	   register rtx diff;
 	   rtx *labels;
 	   int abs_inc, neg_inc;
 +	  enum rtx_code cc = loop_info->comparison_code;
 +	  int less_p     = (cc == LE  || cc == LEU || cc == LT  || cc == LTU);
 +	  int unsigned_p = (cc == LEU || cc == GEU || cc == LTU || cc == GTU);
  
 	   map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx));
  
 @@ -934,9 +937,23 @@
 	      We must copy the final and initial values here to avoid
 	      improperly shared rtl.  */
  
 -	  diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
 -			       copy_rtx (initial_value), NULL_RTX, 0,
 -			       OPTAB_LIB_WIDEN);
 +	  /* We have to deal with for (i = 0; --i < 6;) type loops.
 +	     For such loops the real final value is the first time the
 +	     loop variable overflows, so the diff we calculate is the
 +	     distance from the overflow value.  This is 0 or ~0 for
 +	     unsigned loops depending on the direction, or INT_MAX,
 +	     INT_MAX+1 for signed loops.  We really do not need the
 +	     exact value, since we are only interested in the diff
 +	     modulo the increment, and the increment is a power of 2,
 +	     so we can pretent that the overflow value is 0/~0 */
 +
 +	  if (cc == NE || less_p != neg_inc)
 +	    diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
 +				 copy_rtx (initial_value), NULL_RTX, 0,
 +				 OPTAB_LIB_WIDEN);
 +	  else
 +	    diff = expand_unop (mode, neg_inc ? one_cmpl_optab : neg_optab,
 +				copy_rtx (initial_value), NULL_RTX, 0);
  
 	   /* Now calculate (diff % (unroll * abs (increment))) by using an
 	      and instruction.  */
 @@ -957,11 +974,11 @@
 	      case.  This check does not apply if the loop has a NE
 	      comparison at the end.  */
  
 -	  if (loop_info->comparison_code != NE)
 +	  if (cc != NE)
 	     {
 	       emit_cmp_and_jump_insns (initial_value, final_value,
 -				       neg_inc ? LE : GE,
 -				       NULL_RTX, mode, 0, 0, labels[1]);
 +				       less_p ? GE : LE, NULL_RTX,
 +				       mode, unsigned_p, 0, labels[1]);
 	       JUMP_LABEL (get_last_insn ()) = labels[1];
 	       LABEL_NUSES (labels[1])++;
 	     }
 Index: doloop.c
 ===================================================================
 RCS file: /cvs/gcc/gcc/gcc/doloop.c,v
 retrieving revision 1.3.4.1
 diff -u -r1.3.4.1 doloop.c
 --- doloop.c	2001/05/12 20:32:26	1.3.4.1
 +++ doloop.c	2001/07/16 21:09:42
 @@ -590,67 +590,40 @@
 			copy_rtx (neg_inc ? final_value : initial_value),
 			NULL_RTX, unsigned_p, OPTAB_LIB_WIDEN);
  
 -  if (loop_info->unroll_number == 1)
 +  if (abs_inc * loop_info->unroll_number != 1)
      {
 -      if (abs_inc != 1)
 -	{
 -	  int shift_count;
 -	  rtx extra;
 -	  rtx label;
 +      int shift_count;
  
 -	  shift_count = exact_log2 (abs_inc);
 -	  if (shift_count < 0)
 -	    abort ();
 +      shift_count = exact_log2 (abs_inc * loop_info->unroll_number);
 +      if (shift_count < 0)
 +	abort ();
  
 -	  /* abs (final - initial) / abs_inc  */
 -	  iterations = expand_binop (GET_MODE (diff), lshr_optab,
 -				     diff, GEN_INT (shift_count),
 +      if (abs_inc != 1)
 +        {
 +	  /* abs (final - initial + abs_inc - 1) / abs_inc
 +	     If we get an overflow, the loop is infinite, and
 +	     the loop count is set to zero.  That is not really
 +	     infinite but it will run UINT_MAX+1 times */
 +	  iterations = expand_binop (GET_MODE (diff), add_optab,
 +				     diff, GEN_INT (abs_inc - 1),
 				      NULL_RTX, 1,
 				      OPTAB_LIB_WIDEN);
 -
 -	  /* abs (final - initial) % abs_inc  */
 -	  extra = expand_binop (GET_MODE (iterations), and_optab,
 -				diff, GEN_INT (abs_inc - 1),
 -				NULL_RTX, 1,
 -				OPTAB_LIB_WIDEN);
 -
 -	  /* If (abs (final - initial) % abs_inc == 0) jump past
 -	     following increment instruction.  */
 -	  label = gen_label_rtx();
 -	  emit_cmp_and_jump_insns (extra, const0_rtx, EQ, NULL_RTX,
 -				   GET_MODE (extra), 0, 0, label);
 -	  JUMP_LABEL (get_last_insn ()) = label;
 -	  LABEL_NUSES (label)++;
 -
 -	  /* Increment the iteration count by one.  */
 -	  iterations = expand_binop (GET_MODE (iterations), add_optab,
 -				     iterations, GEN_INT (1),
 +	  iterations = expand_binop (GET_MODE (iterations), lshr_optab,
 +				     iterations, GEN_INT (shift_count),
 				      iterations, 1,
 				      OPTAB_LIB_WIDEN);
 -
 -	  emit_label (label);
 	 }
 	else
 -	iterations = diff;
 +	{
 +	  /* abs (final - initial) / abs_inc  */
 +	  iterations = expand_binop (GET_MODE (diff), lshr_optab,
 +				     diff, GEN_INT (shift_count),
 +				     NULL_RTX, 1,
 +				     OPTAB_LIB_WIDEN);
 +	}
      }
    else
 -    {
 -      int shift_count;
 -
 -      /* precondition_loop_p has preconditioned the loop so that the
 -	 iteration count of the loop body is always a power of 2.
 -	 Since we won't get an overflow calculating the loop count,
 -	 the code we emit is simpler.  */
 -      shift_count = exact_log2 (loop_info->unroll_number * abs_inc);
 -      if (shift_count < 0)
 -	abort ();
 -
 -      iterations = expand_binop (GET_MODE (diff), lshr_optab,
 -				 diff, GEN_INT (shift_count),
 -				 NULL_RTX, 1,
 -				 OPTAB_LIB_WIDEN);
 -    }
 -
 +    iterations = diff;
  
    /* If there is a NOTE_INSN_LOOP_VTOP, we have a `for' or `while'
       style loop, with a loop exit test at the start.  Thus, we can
 @@ -661,7 +634,7 @@
       not executed before the start of the loop.  We need to determine
       if the loop will terminate after the first pass and to limit the
       iteration count to one if necessary.  */
 -  if (! loop->vtop)
 +  if (! loop->vtop && comparison_code != NE)
      {
 	rtx label;
  



More information about the Gcc-patches mailing list