[patch, commited] Do not prefetch loops that do not roll

Zdenek Dvorak rakdver@atrey.karlin.mff.cuni.cz
Thu Mar 1 21:58:00 GMT 2007


Hello,

this patch makes us avoid issuing prefetches in loops that are finished
in less time than what the prefetch instruction takes (such prefetches
are obviously useless, and only would introduce unnecessary overhead).

Bootstrapped & regtested on i686 (without -fprefetch-loop-arrays, as
the bootstrap with -fprefetch-loop-arrays currently fails because of the
warnings introduced by the "compile time array subscript checking" patch),
commited.

Zdenek

	* tree-ssa-loop-prefetch.c (determine_unroll_factor):  Bound the unroll
	factor by the estimated number of iterations.
	(loop_prefetch_arrays): Do not prefetch in loops that iterate less than
	prefetch latency.

	* gcc.dg/tree-ssa/prefetch-4.c: New test.

Index: gcc/testsuite/gcc.dg/tree-ssa/prefetch-4.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/prefetch-4.c	(revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/prefetch-4.c	(revision 0)
***************
*** 0 ****
--- 1,18 ----
+ /* The loop rolls too little, hence the prefetching would not be useful.  */
+ 
+ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+ /* { dg-require-effective-target ilp32 } */
+ /* { dg-options "-O2 -fprefetch-loop-arrays -march=athlon -fdump-tree-final_cleanup" } */
+ 
+ int xxx[20];
+ 
+ void foo (int n)
+ {
+   int i;
+ 
+   for (i = 0; i < n; i++)
+     xxx[i] = i;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "prefetch" 0 "final_cleanup" } } */
+ /* { dg-final { cleanup-tree-dump "final_cleanup" } } */
Index: gcc/tree-ssa-loop-prefetch.c
===================================================================
*** gcc/tree-ssa-loop-prefetch.c	(revision 122428)
--- gcc/tree-ssa-loop-prefetch.c	(working copy)
*************** should_unroll_loop_p (struct loop *loop,
*** 885,897 ****
  
  /* Determine the coefficient by that unroll LOOP, from the information
     contained in the list of memory references REFS.  Description of
!    umber of iterations of LOOP is stored to DESC.  AHEAD is the number
!    of iterations ahead that we need to prefetch.  NINSNS is number of
!    insns of the LOOP.  */
  
  static unsigned
  determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs,
! 			 unsigned ninsns, struct tree_niter_desc *desc)
  {
    unsigned upper_bound;
    unsigned nfactor, factor, mod_constraint;
--- 885,898 ----
  
  /* Determine the coefficient by that unroll LOOP, from the information
     contained in the list of memory references REFS.  Description of
!    umber of iterations of LOOP is stored to DESC.  NINSNS is the number of
!    insns of the LOOP.  EST_NITER is the estimated number of iterations of
!    the loop, or -1 if no estimate is available.  */
  
  static unsigned
  determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs,
! 			 unsigned ninsns, struct tree_niter_desc *desc,
! 			 HOST_WIDE_INT est_niter)
  {
    unsigned upper_bound;
    unsigned nfactor, factor, mod_constraint;
*************** determine_unroll_factor (struct loop *lo
*** 906,911 ****
--- 907,918 ----
       gains from better scheduling and decreasing loop overhead, which is not
       the case here.  */
    upper_bound = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns;
+ 
+   /* If we unrolled the loop more times than it iterates, the unrolled version
+      of the loop would be never entered.  */
+   if (est_niter >= 0 && est_niter < (HOST_WIDE_INT) upper_bound)
+     upper_bound = est_niter;
+ 
    if (upper_bound <= 1)
      return 1;
  
*************** static bool
*** 935,941 ****
  loop_prefetch_arrays (struct loop *loop)
  {
    struct mem_ref_group *refs;
!   unsigned ahead, ninsns, unroll_factor;
    struct tree_niter_desc desc;
    bool unrolled = false;
  
--- 942,949 ----
  loop_prefetch_arrays (struct loop *loop)
  {
    struct mem_ref_group *refs;
!   unsigned ahead, ninsns, time, unroll_factor;
!   HOST_WIDE_INT est_niter;
    struct tree_niter_desc desc;
    bool unrolled = false;
  
*************** loop_prefetch_arrays (struct loop *loop)
*** 950,970 ****
  
    /* Step 3: determine the ahead and unroll factor.  */
  
!   /* FIXME: We should use not size of the loop, but the average number of
!      instructions executed per iteration of the loop.  */
!   ninsns = tree_num_loop_insns (loop, &eni_time_weights);
!   ahead = (PREFETCH_LATENCY + ninsns - 1) / ninsns;
!   unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc);
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor);
  
-   /* If the loop rolls less than the required unroll factor, prefetching
-      is useless.  */
-   if (unroll_factor > 1
-       && cst_and_fits_in_hwi (desc.niter)
-       && (unsigned HOST_WIDE_INT) int_cst_value (desc.niter) < unroll_factor)
-     goto fail;
- 
    /* Step 4: what to prefetch?  */
    if (!schedule_prefetches (refs, unroll_factor, ahead))
      goto fail;
--- 958,981 ----
  
    /* Step 3: determine the ahead and unroll factor.  */
  
!   /* FIXME: the time should be weighted by the probabilities of the blocks in
!      the loop body.  */
!   time = tree_num_loop_insns (loop, &eni_time_weights);
!   ahead = (PREFETCH_LATENCY + time - 1) / time;
!   est_niter = estimated_loop_iterations_int (loop, false);
! 
!   /* The prefetches will run for AHEAD iterations of the original loop.  Unless
!      the loop rolls at least AHEAD times, prefetching the references does not
!      make sense.  */
!   if (est_niter >= 0 && est_niter <= (HOST_WIDE_INT) ahead)
!     goto fail;
! 
!   ninsns = tree_num_loop_insns (loop, &eni_size_weights);
!   unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
! 					   est_niter);
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor);
  
    /* Step 4: what to prefetch?  */
    if (!schedule_prefetches (refs, unroll_factor, ahead))
      goto fail;



More information about the Gcc-patches mailing list