This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Tweak loop peeling limits


> ...  And to mention, I also experimented with teaching loop unrolling to not
> give up and rather unroll the loop with smaller number of iterations so
> the expected number of iterations is roughly 3*n_unrollings.  This
> didn't score well for SPEC (resulting in both slower code and bigger
> binaries), so I went for this patch instead.
The patch is attached.  The main idea is that for loops with iteration
counter unrolling wins because of the reduction in actual tests made.

Honza

Index: cfgloop.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cfgloop.c,v
retrieving revision 1.29
diff -c -3 -p -r1.29 cfgloop.c
*** cfgloop.c	2 Jan 2004 22:44:27 -0000	1.29
--- cfgloop.c	18 Jan 2004 15:28:16 -0000
*************** flow_loop_dump (const struct loop *loop,
*** 128,133 ****
--- 128,134 ----
  
    flow_edge_list_print (";;  entry edges", loop->entry_edges,
  			loop->num_entries, file);
+   fprintf (file, ";;  expected number of iterations %d\n", expected_loop_iterations (loop));
    fprintf (file, ";;  nodes:");
    bbs = get_loop_body (loop);
    for (i = 0; i < loop->num_nodes; i++)
Index: loop-unroll.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop-unroll.c,v
retrieving revision 1.13
diff -c -3 -p -r1.13 loop-unroll.c
*** loop-unroll.c	30 Dec 2003 10:40:54 -0000	1.13
--- loop-unroll.c	18 Jan 2004 15:28:16 -0000
*************** decide_unroll_runtime_iterations (struct
*** 654,662 ****
        return;
      }
  
-   if (rtl_dump_file)
-     fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
- 
    /* nunroll = total number of copies of the original loop body in
       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
--- 654,659 ----
*************** decide_unroll_runtime_iterations (struct
*** 666,671 ****
--- 663,672 ----
    if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
      nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
  
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file, ";; Considering unrolling loop %i times with "
+ 	     "runtime computable number of iterations\n", nunroll);
+ 
    /* Skip big loops.  */
    if (nunroll <= 1)
      {
*************** decide_unroll_runtime_iterations (struct
*** 697,707 ****
      }
  
    /* If we have profile feedback, check whether the loop rolls.  */
!   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
      {
!       if (rtl_dump_file)
! 	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
!       return;
      }
  
    /* Success; now force nunroll to be power of 2, as we are unable to
--- 698,760 ----
      }
  
    /* If we have profile feedback, check whether the loop rolls.  */
!   if (loop->header->count)
      {
!        unsigned int expected_iterations = expected_loop_iterations (loop);
! 
!        /* Very low iterations are subject to peeling.  */
!        if (expected_iterations < 4)
! 	 {
! 	    if (rtl_dump_file)
! 	      fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
! 	    return;
! 	 }
! 
!        /* For loop iterating more than 5 times, we may do better job than peeling.
! 	  Peeling loop rooling 6 times would result in code like:
! 
! 	   if (!cond)
! 	     goto away;
! 	   body;
! 	   if (!cond)
! 	     goto away;
! 	   body;
! 	   if (!cond)
! 	     goto away;
! 	   body;
! 	   if (!cond)
! 	     goto away;
! 	   body;
! 	   if (!cond)
! 	     goto away;
! 	   body;
! 	   if (!cond)
! 	     goto away;
! 	   body;
! 	   while (cond)
! 	     body;
! 	   away:
! 
! 	  while unrolling same loop twice produce:
! 	   if (niters % 2)
! 	     body
! 	   while (cond)
! 	     {
! 	       body;
! 	       body;
! 	     }
! 
! 	  that actually reduce loop overhead better.
! 	  ??? We may further consider peeling the loop but it is probably not
! 	  big deal.  */
! 	 
!        if (expected_iterations < 3 * nunroll)
! 	 {
! 	    nunroll = (expected_iterations + 2) / 3;
! 	    if (rtl_dump_file)
! 	      fprintf (rtl_dump_file, ";; Reducing number of unrolling to %i "
! 		       "(loop has too few iterations)\n", nunroll);
! 	 }
      }
  
    /* Success; now force nunroll to be power of 2, as we are unable to
*************** decide_unroll_stupid (struct loop *loop,
*** 1064,1072 ****
        return;
      }
  
-   if (rtl_dump_file)
-     fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
- 
    /* nunroll = total number of copies of the original loop body in
       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
--- 1117,1122 ----
*************** decide_unroll_stupid (struct loop *loop,
*** 1075,1080 ****
--- 1125,1134 ----
      nunroll = nunroll_by_av;
    if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
      nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ 
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file,
+ 	     ";; Considering unrolling loop stupidly %i times\n", nunroll);
  
    /* Skip big loops.  */
    if (nunroll <= 1)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]