This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[cfg-branch] Iteration number counting using symbolic simplifications

From: Jan Hubicka <jh at suse dot cz>
To: gcc-patches at gcc dot gnu dot org, gcc-pdo at atrey dot karlin dot mff dot cuni dot cz,Zdenek Dvorak <rakdver at atrey dot karlin dot mff dot cuni dot cz>
Date: Thu, 2 May 2002 16:06:37 +0200
Subject: [cfg-branch] Iteration number counting using symbolic simplifications
Hi,
this patch adds the symbolic way of predicting number of iterations we've
talked about.  I now have count_loop_iterations that count number of iterations
in the stupid way - assuming that the loop is not paradoxical and
test_for_iteration returning RTX expression of the comparison at given
iteration count.

Working the symbolic way appears to be really fruitfull.  For instance
following loops:

t1(char *a)
{
	char *p;
	for (p=a+100;p>a;p--) t();
}
t2(char *a)
{
	char *p;
	for (p=a+100;p!=a;p--) t();
}

Gets analyzed as

; Simple loop 1
;  Induction variable:(reg 60 [ p ])
;  Counter decreases
;  Initial value:(plus (reg 59 [ a ]) (100))
;  Compared with:(reg 59 [ a ])
;  Exit condtion:gtu

;  Number of iterations: (99)
;  Conditional to continue loop at 0 th iteration: (gtu (plus (reg 59 [ a ]) (99)) (reg 59 [ a ]))
;  Number of iterations: (99)
; Simple loop 1
;  Induction variable:(reg 60 [ p ])
;  Counter decreases
;  Initial value:(plus (reg 59 [ a ]) (100))
;  Compared with:(reg 59 [ a ])
;  Exit condtion:ne

;  Number of iterations: (99)
;  Conditional to continue loop at 0 th iteration: (1)
;  Number of iterations: (99)

In the first case we compute the number of iterations to be limited by 99, but
we don't know whether the loop terminate earlier.  We use the
runtime_iterations unrolling.  Perhaps we can even teach it to bypass the
peeling (peel exactly once) in such a case.
I also guess it is safe to test pointer flag and make simplify_relational
to prove the conditional as true.

In the second case we know it won't and and proceed via simple unrolling.
Something old unroller was never able to do.

The bootstrap with unrolling is in progress, please let me know what do you
think.  I no longer feel certain enought to commit :)

Index: unroll-new.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/Attic/unroll-new.c,v
retrieving revision 1.1.2.17
diff -c -3 -p -r1.1.2.17 unroll-new.c
*** unroll-new.c	1 May 2002 20:45:45 -0000	1.1.2.17
--- unroll-new.c	2 May 2002 13:58:24 -0000
*************** static basic_block simple_increment PARA
*** 52,59 ****
  static rtx variable_initial_value PARAMS ((struct loop *, rtx));
  static bool simple_loop_p PARAMS ((struct loops *, struct loop *,
  				   struct loop_desc *));
! static bool count_loop_iterations PARAMS ((struct loop_desc *,
! 					   unsigned HOST_WIDE_INT *, rtx *));
  static void unroll_or_peel_loop PARAMS ((struct loops *, struct loop *, int));
  static bool peel_loop_simple PARAMS ((struct loops *, struct loop *, int));
  static bool peel_loop_completely PARAMS ((struct loops *, struct loop *,
--- 52,58 ----
  static rtx variable_initial_value PARAMS ((struct loop *, rtx));
  static bool simple_loop_p PARAMS ((struct loops *, struct loop *,
  				   struct loop_desc *));
! static rtx count_loop_iterations PARAMS ((struct loop_desc *));
  static void unroll_or_peel_loop PARAMS ((struct loops *, struct loop *, int));
  static bool peel_loop_simple PARAMS ((struct loops *, struct loop *, int));
  static bool peel_loop_completely PARAMS ((struct loops *, struct loop *,
*************** static bool unroll_loop_constant_iterati
*** 65,70 ****
--- 64,71 ----
  static bool unroll_loop_runtime_iterations PARAMS ((struct loops *,
  						    struct loop *, int,
  						    struct loop_desc *));
+ static rtx test_for_iteration PARAMS ((struct loop_desc *desc,
+ 				       unsigned HOST_WIDE_INT));
  
  /* Unroll and peel (depending on FLAGS) LOOPS.  */
  void
*************** simple_increment (loops, loop, body, des
*** 223,229 ****
       struct loop_desc *desc;
  {
    rtx mod_insn, insn, set, set_src, set_add;
!   basic_block mod_bb;
    int i;
  
    /* Find insn that modifies var.  */
--- 224,230 ----
       struct loop_desc *desc;
  {
    rtx mod_insn, insn, set, set_src, set_add;
!   basic_block mod_bb = NULL;
    int i;
  
    /* Find insn that modifies var.  */
*************** simple_loop_p (loops, loop, desc)
*** 419,567 ****
    return false;
  }
  
! /* Counts number of iterations described by DESC and store it into NITER
!    or emit sequence to count it at runtime and store to register RNITER.
!    Return true if it is possible to determine number of iterations.  */
! static bool
! count_loop_iterations (desc, niter, rniter)
       struct loop_desc *desc;
-      unsigned HOST_WIDE_INT *niter;
-      rtx *rniter;
  {
    int delta;
-   unsigned HOST_WIDE_INT abs_diff = 0;
    enum rtx_code cond = desc->cond;
  
    /* Ensure that we always handle the condition to stay inside loop.  */
    if (desc->neg)
!     {
!       cond = reverse_condition (cond);
!       if (cond == UNKNOWN)
! 	return false;
!     }
  
    if (desc->grow)
      {
        /* Bypass nonsential tests.  */
!       if (cond == EQ || cond == GE || cond == GT || cond == GEU || cond == GTU)
! 	return false;
!       if (rniter)
! 	{
! 	  *rniter = expand_simple_binop (GET_MODE (desc->var), MINUS,
! 			  copy_rtx (desc->lim), copy_rtx (desc->var),
! 			  NULL_RTX, 0, OPTAB_LIB_WIDEN);
! 	}
!       if (niter)
! 	{
!           abs_diff = desc->lim_n - desc->init_n;
! 	}
      }
    else
      {
        /* Bypass nonsential tests.  */
!       if (cond == EQ || cond == LE || cond == LT || cond == LEU || cond == LTU)
! 	return false;
!       if (rniter)
! 	*rniter = expand_simple_binop (GET_MODE (desc->var), MINUS,
! 			copy_rtx (desc->var), copy_rtx (desc->lim),
! 			NULL_RTX, 0, OPTAB_LIB_WIDEN);
!       if (niter)
! 	{
! 	  abs_diff = desc->init_n - desc->lim_n;
! 	}
      }
  
-   /* Given that iteration_var is going to iterate over its own mode,
-      not HOST_WIDE_INT, disregard higher bits that might have come
-      into the picture due to sign extension of initial and final
-      values.  */
-   if (GET_MODE_BITSIZE (GET_MODE (desc->var)) < HOST_BITS_PER_WIDE_INT)
-     abs_diff &= ((unsigned HOST_WIDE_INT) 1
- 		 << (GET_MODE_BITSIZE (GET_MODE (desc->var)) - 1)
- 		 << 1) - 1;
-       
    delta = 0;
    if (!desc->postincr)
      delta--;
  
!   /* Determine delta caused by exit condition.
!      Also handle properly paradoxical loops iterating 0 times.  For runtime
!      cases this is handled later similar way.  */
    switch (cond)
      {
!       case NE:
! 	if (desc->const_iter
! 	    && (GET_MODE_BITSIZE (GET_MODE (desc->var))
! 	        >= HOST_BITS_PER_WIDE_INT))
! 	  {
! 	    /* We would need HOST_BITS_PER_WIDE_INT + 1 bits.  */
! 	    if (abs_diff == 0 && delta < 0)
! 	      return false;
! 	    /* Similary we would overflow for loops wrapping around in wide
! 	       mode.  */
! 	    if (GET_MODE_BITSIZE (GET_MODE (desc->var))
! 	        > HOST_BITS_PER_WIDE_INT
! 		&& ((desc->init_n >= desc->lim_n && desc->grow)
! 		    || (desc->init_n <= desc->lim_n && !desc->grow)))
! 	      return false;
! 	  }
! 	break;
!       case LT:
! 	if (desc->init_n + !desc->postincr >= desc->lim_n)
! 	  abs_diff = 0;
! 	break;
!       case LE:
! 	if (desc->init_n + !desc->postincr > desc->lim_n)
! 	  abs_diff = -1;
! 	delta++;
! 	break;
!       case GE:
! 	if (desc->init_n - !desc->postincr <= desc->lim_n)
! 	  abs_diff = 0;
! 	delta++;
!       case GT:
! 	if (desc->init_n - !desc->postincr < desc->lim_n)
! 	  abs_diff = -1;
! 	break;
!       case LTU:
! 	if ((unsigned HOST_WIDE_INT)(desc->init_n + !desc->postincr)
! 	    >= (unsigned HOST_WIDE_INT)desc->lim_n)
! 	  abs_diff = 0;
! 	break;
!       case LEU:
! 	if ((unsigned HOST_WIDE_INT)(desc->init_n + !desc->postincr)
! 	    > desc->lim_n)
! 	  abs_diff = -1;
! 	delta++;
! 	break;
!       case GEU:
! 	if ((unsigned HOST_WIDE_INT)(desc->init_n - !desc->postincr)
! 	    <= (unsigned HOST_WIDE_INT)desc->lim_n)
! 	  abs_diff = 0;
! 	delta++;
!       case GTU:
! 	if ((unsigned HOST_WIDE_INT)(desc->init_n - !desc->postincr)
! 	    < (unsigned HOST_WIDE_INT)desc->lim_n)
! 	  abs_diff = -1;
! 	break;
!       default:
! 	abort ();
      }
  
!   if (niter)
      {
!       *niter = abs_diff + delta;
!       if (rtl_dump_file)
! 	fprintf (rtl_dump_file, ";  Number of iterations: %i\n", (int) *niter);
      }
  
!   if (rniter && delta)
!     *rniter = expand_simple_binop (GET_MODE (desc->var), PLUS,
! 		*rniter,
! 		GEN_INT (delta),
! 		NULL_RTX, 0, OPTAB_LIB_WIDEN);
  
!   return true;
  }
  
  /* Peel NPEEL iterations from LOOP, remove exit edges (and cancel the loop
--- 420,549 ----
    return false;
  }
  
! /* Return RTX expression representing number of iterations of loop as bounded
!    by test described by DESC (in the case loop really has multiple exit
!    edges, fewer iterations may happen in the practice).  
! 
!    Return NULL if it is unknown.  Additionally the value may be invalid for
!    paradoxical loop (lets define paradoxical loops as loops whose test is
!    failing at -1th iteration, for instance "for (i=5;i<1;i++);").
!    
!    These cases needs to be eighter cared by copying the loop test in the front
!    of loop or keeping the test in first iteration of loop.  */
! static rtx
! count_loop_iterations (desc)
       struct loop_desc *desc;
  {
    int delta;
    enum rtx_code cond = desc->cond;
+   rtx exp = desc->init ? copy_rtx (desc->init) : desc->var;
+ 
+   /* Give up on floating point modes and friends.  It can be possible to do
+      the job for constant loop bounds, but it is probably not worthwhile.  */
+   if (!INTEGRAL_MODE_P (GET_MODE (desc->var)))
+     return NULL;
  
    /* Ensure that we always handle the condition to stay inside loop.  */
    if (desc->neg)
!     cond = reverse_condition (cond);
  
+   /* Compute absolute value of the difference of initial and final value.  */
    if (desc->grow)
      {
        /* Bypass nonsential tests.  */
!       if (cond == EQ || cond == GE || cond == GT || cond == GEU
! 	  || cond == GTU)
! 	return NULL;
!       exp = simplify_gen_binary (MINUS, GET_MODE (desc->var),
! 				 copy_rtx (desc->lim), exp);
      }
    else
      {
        /* Bypass nonsential tests.  */
!       if (cond == EQ || cond == LE || cond == LT || cond == LEU
! 	  || cond == LTU)
! 	return NULL;
!       exp = simplify_gen_binary (MINUS, GET_MODE (desc->var),
! 				 exp, copy_rtx (desc->lim));
      }
  
    delta = 0;
    if (!desc->postincr)
      delta--;
  
!   /* Determine delta caused by exit condition.  */
    switch (cond)
      {
!     case NE:
!     case LT:
!     case GT:
!     case LTU:
!     case GTU:
!       break;
!     case LE:
!     case GE:
!     case LEU:
!     case GEU:
!       delta++;
!       break;
!     default:
!       abort ();
      }
  
!   if (delta)
!     exp = simplify_gen_binary (PLUS, GET_MODE (desc->var),
! 			       exp, GEN_INT (delta));
! 
!   if (rtl_dump_file)
      {
!       fprintf (rtl_dump_file, ";  Number of iterations: ");
!       print_simple_rtl (rtl_dump_file, exp);
!       fprintf (rtl_dump_file, "\n");
      }
  
!   return exp;
! }
  
! /* Return simplified RTX expression representing the value of test
!    described of DESC at given iteration of loop.  */
! 
! static rtx
! test_for_iteration (desc, iter)
!      struct loop_desc *desc;
!      unsigned HOST_WIDE_INT iter;
! {
!   enum rtx_code cond = desc->cond;
!   rtx exp = desc->init ? copy_rtx (desc->init) : desc->var;
!   rtx addval;
! 
!   /* Give up on floating point modes and friends.  It can be possible to do
!      the job for constant loop bounds, but it is probably not worthwhile.  */
!   if (!INTEGRAL_MODE_P (GET_MODE (desc->var)))
!     return NULL;
! 
!   /* Ensure that we always handle the condition to stay inside loop.  */
!   if (desc->neg)
!     cond = reverse_condition (cond);
! 
!   /* Compute the value of induction variable.  */
!   addval = simplify_gen_binary (MULT, GET_MODE (desc->var),
! 				desc->grow ? const1_rtx : constm1_rtx,
! 				gen_int_mode (desc->postincr
! 					      ? iter : iter + 1,
! 					      GET_MODE (desc->var)));
!   exp = simplify_gen_binary (PLUS, GET_MODE (desc->var), exp, addval);
!   /* Test at given condtion.  */
!   exp = simplify_gen_relational (cond, SImode,
! 				 GET_MODE (desc->var), exp, desc->lim);
! 
!   if (rtl_dump_file)
!     {
!       fprintf (rtl_dump_file,
! 	       ";  Conditional to continue loop at %i th iteration: ", iter);
!       print_simple_rtl (rtl_dump_file, exp);
!       fprintf (rtl_dump_file, "\n");
!     }
!   return exp;
  }
  
  /* Peel NPEEL iterations from LOOP, remove exit edges (and cancel the loop
*************** peel_loop_completely (loops, loop, desc)
*** 575,583 ****
    sbitmap wont_exit;
    unsigned HOST_WIDE_INT npeel;
    edge e;
  
!   if (!count_loop_iterations (desc, &npeel, NULL))
!     abort ();  /* Tested already.  */
  
    wont_exit = sbitmap_alloc (npeel + 2);
    sbitmap_ones (wont_exit);
--- 557,568 ----
    sbitmap wont_exit;
    unsigned HOST_WIDE_INT npeel;
    edge e;
+   rtx exp;
  
!   exp = count_loop_iterations (desc);
!   if (exp || GET_CODE (exp) != CONST_INT)
!     abort ();
!   npeel = INTVAL (exp);
  
    wont_exit = sbitmap_alloc (npeel + 2);
    sbitmap_ones (wont_exit);
*************** unroll_loop_constant_iterations (loops, 
*** 614,623 ****
  {
    unsigned HOST_WIDE_INT niter, exit_mod;
    sbitmap wont_exit;
  
    /* Normalization.  */
!   if (!count_loop_iterations (desc, &niter, NULL))
!     abort ();  /* Tested already.  */
  
    if (niter <= (unsigned) max_unroll)
      abort ();  /* Should get into peeling instead.  */
--- 599,611 ----
  {
    unsigned HOST_WIDE_INT niter, exit_mod;
    sbitmap wont_exit;
+   rtx exp;
  
    /* Normalization.  */
!   exp = count_loop_iterations (desc);
!   if (!exp || GET_CODE (exp) != CONST_INT)
!     abort ();
!   niter = INTVAL (exp);
  
    if (niter <= (unsigned) max_unroll)
      abort ();  /* Should get into peeling instead.  */
*************** unroll_loop_runtime_iterations (loops, l
*** 702,709 ****
  
    /* Normalization.  */
    start_sequence ();
!   if (!count_loop_iterations (desc, NULL, &niter))
!     abort ();			/* Tested already.  */
  
    /* Count modulo by ANDing it with max_unroll.  */
    niter = expand_simple_binop (GET_MODE (desc->var), AND,
--- 690,699 ----
  
    /* Normalization.  */
    start_sequence ();
!   niter = count_loop_iterations (desc);
!   if (!niter)
!     abort ();
!   niter = force_operand (niter, NULL);
  
    /* Count modulo by ANDing it with max_unroll.  */
    niter = expand_simple_binop (GET_MODE (desc->var), AND,
*************** unroll_or_peel_loop (loops, loop, flags)
*** 894,900 ****
       int flags;
  {
    int ninsns;
!   unsigned HOST_WIDE_INT nunroll, npeel, niter;
    struct loop_desc desc;
    bool simple, exact;
  
--- 884,890 ----
       int flags;
  {
    int ninsns;
!   unsigned HOST_WIDE_INT nunroll, npeel, niter = 0;
    struct loop_desc desc;
    bool simple, exact;
  
*************** unroll_or_peel_loop (loops, loop, flags)
*** 969,979 ****
    exact = false;
    if (simple)
      {
        /* Loop with constant number of iterations?  */
!       if (count_loop_iterations (&desc, &niter, NULL))
! 	exact = desc.const_iter;
!       else
  	simple = false;
      }
  
    if (!exact)
--- 959,985 ----
    exact = false;
    if (simple)
      {
+       rtx exp = count_loop_iterations (&desc);
+       rtx test = test_for_iteration (&desc, 0);
+ 
+       /* Bypass loops iterating 0 times.  These should really be
+          elliminated earlier, but we may create them by other transformations.
+          CSE will kill them later.  */
+ 
+       if (test && test == const0_rtx)
+ 	{
+ 	  if ((flags & UAP_UNROLL) && rtl_dump_file)
+ 	    fprintf (rtl_dump_file, ";;  Not unrolling nor peeling loop, iterates 0 times\n");
+ 	}
+ 
        /* Loop with constant number of iterations?  */
!       if (!exp)
  	simple = false;
+       else if (GET_CODE (exp) == CONST_INT
+ 	       && test && test == const_true_rtx)
+ 	exact = true, niter = INTVAL (exp);
+       else
+ 	exact = false;
      }
  
    if (!exact)
Follow-Ups:
- Re: [cfg-branch] Iteration number counting using symbolic simplifications
  - From: Jan Hubicka
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]