This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[killloop] Removal of redundant prefetches during unrolling


Hello,

this patch (partially based on Steven's patch
http://gcc.gnu.org/ml/gcc-patches/2005-08/msg00291.html) removes
prefetches that are made redundant by unrolling.  Only the prefetches
that fit into the same cache line as one of the previous prefetches
are removed.

Zdenek

	* rtlanal.c (may_trap_p): Handle PREFETCH.
	* loop-unroll.c (struct opt_info): Add refetches_to_remove field.
	(analyze_prefetch_to_remove, remove_redundant_prefetch): New functions.
	(analyze_insns_in_loop): Call analyze_prefetch_to_remove.
	(apply_opt_in_copies): Call remove_redundant_prefetch.
	(free_opt_info): Free prefetches_to_remove field.

Index: rtlanal.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/rtlanal.c,v
retrieving revision 1.218
diff -c -3 -p -r1.218 rtlanal.c
*** rtlanal.c	28 Jun 2005 22:24:12 -0000	1.218
--- rtlanal.c	30 Aug 2005 09:02:01 -0000
*************** may_trap_p (rtx x)
*** 2102,2107 ****
--- 2102,2111 ----
      case ASM_OPERANDS:
        return MEM_VOLATILE_P (x);
  
+       /* Prefetch instructions cannot trap, by definition.  */
+     case PREFETCH:
+       return 0;
+ 
        /* Memory ref can trap unless it's a static var or a stack slot.  */
      case MEM:
        if (MEM_NOTRAP_P (x))
Index: loop-unroll.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop-unroll.c,v
retrieving revision 1.36
diff -c -3 -p -r1.36 loop-unroll.c
*** loop-unroll.c	25 Jun 2005 02:00:35 -0000	1.36
--- loop-unroll.c	30 Aug 2005 09:02:01 -0000
*************** Software Foundation, 51 Franklin Street,
*** 70,75 ****
--- 70,81 ----
     showed that this choice may affect performance in order of several %.
     */
  
+ /* Provide a default value for prefetch block size, in case md does not.  */
+ 
+ #ifndef PREFETCH_BLOCK
+ #define PREFETCH_BLOCK 32
+ #endif
+ 
  /* Information about induction variables to split.  */
  
  struct iv_to_split
*************** struct opt_info
*** 112,117 ****
--- 118,125 ----
    htab_t insns_to_split;           /* A hashtable of insns to split.  */
    htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
                                        to expand.  */
+   htab_t prefetches_to_remove;     /* A hashtable of prefetch insns that may
+ 				      need to be removed after unrolling.  */
    unsigned first_new_block;        /* The first basic block that was
                                        duplicated.  */
    basic_block loop_exit;           /* The loop exit basic block.  */
*************** static void free_opt_info (struct opt_in
*** 138,143 ****
--- 146,152 ----
  static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
  static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
  static struct iv_to_split *analyze_iv_to_split_insn (rtx);
+ static struct iv_to_split *analyze_prefetch_to_remove (rtx);
  static void expand_var_during_unrolling (struct var_to_expand *, rtx);
  static int insert_var_expansion_initialization (void **, void *);
  static int combine_var_copies_in_loop_exit (void **, void *);
*************** analyze_iv_to_split_insn (rtx insn)
*** 1675,1680 ****
--- 1684,1731 ----
    return ivts;
  }
  
+ /* Determines whether INSN is a prefetch instruction that may be useful to
+    remove when the loop is unrolled.  Address that is prefetched must be
+    an induction variable with a constant step.  Returns a structure describing
+    the prefetch and the step, or NULL if the insn is not suitable.  */
+ 
+ static struct iv_to_split *
+ analyze_prefetch_to_remove (rtx insn)
+ {
+   rtx patt = PATTERN (insn);
+   rtx addr;
+   HOST_WIDE_INT step;
+   struct iv_to_split *ivts;
+   struct rtx_iv iv;
+ 
+   if (GET_CODE (patt) != PREFETCH)
+     return NULL;
+ 
+   addr = XEXP (patt, 0);
+   if (!iv_analyze_expr (insn, addr, Pmode, &iv))
+     return NULL;
+ 
+   if (GET_CODE (iv.step) != CONST_INT)
+     return NULL;
+ 
+   /* We do not want to remove prefetches in unrolled copies if each of them
+      hits its own cache line.  */
+   step = INTVAL (iv.step);
+   if (step < 0)
+     step = -step;
+   if (step >= PREFETCH_BLOCK)
+     return NULL;
+ 
+   /* Record the prefetch to remove.  */
+   ivts = xmalloc (sizeof (struct iv_to_split));
+   ivts->insn = insn;
+   ivts->base_var = NULL_RTX;
+   ivts->step = iv.step;
+   ivts->n_loc = 0;
+   
+   return ivts;
+ }
+ 
  /* Determines which of insns in LOOP can be optimized.
     Return a OPT_INFO struct with the relevant hash tables filled
     with all insns to be optimized.  The FIRST_NEW_BLOCK field
*************** analyze_insns_in_loop (struct loop *loop
*** 1701,1707 ****
    if (flag_split_ivs_in_unroller)
      opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
                                              si_info_hash, si_info_eq, free);
!   
    /* Record the loop exit bb and loop preheader before the unrolling.  */
    if (!loop_preheader_edge (loop)->src)
      {
--- 1752,1760 ----
    if (flag_split_ivs_in_unroller)
      opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
                                              si_info_hash, si_info_eq, free);
!   opt_info->prefetches_to_remove
! 	  = htab_create (5 * loop->num_nodes, si_info_hash, si_info_eq, free);
! 
    /* Record the loop exit bb and loop preheader before the unrolling.  */
    if (!loop_preheader_edge (loop)->src)
      {
*************** analyze_insns_in_loop (struct loop *loop
*** 1743,1749 ****
--- 1796,1813 ----
              *slot1 = ivts;
              continue;
            }
+        
+         if (opt_info->prefetches_to_remove)
+           ivts = analyze_prefetch_to_remove (insn);
          
+         if (ivts)
+           {
+             slot1 = htab_find_slot (opt_info->prefetches_to_remove, 
+ 				    ivts, INSERT);
+             *slot1 = ivts;
+             continue;
+           }
+ 
          if (opt_info->insns_with_var_to_expand)
            ves = analyze_insn_to_expand_var (loop, insn);
          
*************** split_iv (struct iv_to_split *ivts, rtx 
*** 1903,1908 ****
--- 1967,1994 ----
    delete_insn (insn);
  }
  
+ /* Remove the prefetch INSN if redundant.  Step of the address induction
+    variable whose is taken from IVTS, and DELTA determines which of
+    the unrolled copies is considered.  */
+ 
+ static void
+ remove_redundant_prefetch (struct iv_to_split *ivts, rtx insn, unsigned delta)
+ {
+   HOST_WIDE_INT step = INTVAL (ivts->step);
+   if (step < 0)
+     step  = - step;
+  
+   if (delta == 0)
+     return;
+ 
+   /* If the prefetch is the first one to access a new cache line, we want to
+      keep it.  We assume that the first access in the loop is cache line
+      aligned, which may cause this heuristics to be slightly wrong.  */
+   if (step * delta / PREFETCH_BLOCK != step * (delta - 1) / PREFETCH_BLOCK)
+     return;
+ 
+   delete_insn (insn);
+ }
  
  /* Return one expansion of the accumulator recorded in struct VE.  */
  
*************** apply_opt_in_copies (struct opt_info *op
*** 2114,2119 ****
--- 2200,2221 ----
                    split_iv (ivts, insn, delta);
                  }
              }
+ 
+ 	  /* Remove redundant prefetches.  */
+           if (opt_info->prefetches_to_remove)
+             {
+               ivts = htab_find (opt_info->prefetches_to_remove, &ivts_templ);
+               
+               if (ivts)
+                 {
+ #ifdef ENABLE_CHECKING
+ 		  gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
+ #endif
+                   
+                   remove_redundant_prefetch (ivts, insn, delta);
+                 }
+             }
+ 
            /* Apply variable expansion optimization.  */
            if (unrolling && opt_info->insns_with_var_to_expand)
              {
*************** free_opt_info (struct opt_info *opt_info
*** 2203,2208 ****
--- 2305,2312 ----
  {
    if (opt_info->insns_to_split)
      htab_delete (opt_info->insns_to_split);
+   if (opt_info->prefetches_to_remove)
+     htab_delete (opt_info->prefetches_to_remove);
    if (opt_info->insns_with_var_to_expand)
      {
        htab_traverse (opt_info->insns_with_var_to_expand, 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]