This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[killloop] Removal of redundant prefetches during unrolling
- From: Zdenek Dvorak <rakdver at atrey dot karlin dot mff dot cuni dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Cc: stevenb at suse dot de
- Date: Tue, 30 Aug 2005 11:39:41 +0200
- Subject: [killloop] Removal of redundant prefetches during unrolling
Hello,
this patch (partially based on Steven's patch
http://gcc.gnu.org/ml/gcc-patches/2005-08/msg00291.html) removes
prefetches that are made redundant by unrolling. Only the prefetches
that fit into the same cache line as one of the previous prefetches
are removed.
Zdenek
* rtlanal.c (may_trap_p): Handle PREFETCH.
* loop-unroll.c (struct opt_info): Add refetches_to_remove field.
(analyze_prefetch_to_remove, remove_redundant_prefetch): New functions.
(analyze_insns_in_loop): Call analyze_prefetch_to_remove.
(apply_opt_in_copies): Call remove_redundant_prefetch.
(free_opt_info): Free prefetches_to_remove field.
Index: rtlanal.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/rtlanal.c,v
retrieving revision 1.218
diff -c -3 -p -r1.218 rtlanal.c
*** rtlanal.c 28 Jun 2005 22:24:12 -0000 1.218
--- rtlanal.c 30 Aug 2005 09:02:01 -0000
*************** may_trap_p (rtx x)
*** 2102,2107 ****
--- 2102,2111 ----
case ASM_OPERANDS:
return MEM_VOLATILE_P (x);
+ /* Prefetch instructions cannot trap, by definition. */
+ case PREFETCH:
+ return 0;
+
/* Memory ref can trap unless it's a static var or a stack slot. */
case MEM:
if (MEM_NOTRAP_P (x))
Index: loop-unroll.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop-unroll.c,v
retrieving revision 1.36
diff -c -3 -p -r1.36 loop-unroll.c
*** loop-unroll.c 25 Jun 2005 02:00:35 -0000 1.36
--- loop-unroll.c 30 Aug 2005 09:02:01 -0000
*************** Software Foundation, 51 Franklin Street,
*** 70,75 ****
--- 70,81 ----
showed that this choice may affect performance in order of several %.
*/
+ /* Provide a default value for prefetch block size, in case md does not. */
+
+ #ifndef PREFETCH_BLOCK
+ #define PREFETCH_BLOCK 32
+ #endif
+
/* Information about induction variables to split. */
struct iv_to_split
*************** struct opt_info
*** 112,117 ****
--- 118,125 ----
htab_t insns_to_split; /* A hashtable of insns to split. */
htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
to expand. */
+ htab_t prefetches_to_remove; /* A hashtable of prefetch insns that may
+ need to be removed after unrolling. */
unsigned first_new_block; /* The first basic block that was
duplicated. */
basic_block loop_exit; /* The loop exit basic block. */
*************** static void free_opt_info (struct opt_in
*** 138,143 ****
--- 146,152 ----
static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
static struct iv_to_split *analyze_iv_to_split_insn (rtx);
+ static struct iv_to_split *analyze_prefetch_to_remove (rtx);
static void expand_var_during_unrolling (struct var_to_expand *, rtx);
static int insert_var_expansion_initialization (void **, void *);
static int combine_var_copies_in_loop_exit (void **, void *);
*************** analyze_iv_to_split_insn (rtx insn)
*** 1675,1680 ****
--- 1684,1731 ----
return ivts;
}
+ /* Determines whether INSN is a prefetch instruction that may be useful to
+ remove when the loop is unrolled. Address that is prefetched must be
+ an induction variable with a constant step. Returns a structure describing
+ the prefetch and the step, or NULL if the insn is not suitable. */
+
+ static struct iv_to_split *
+ analyze_prefetch_to_remove (rtx insn)
+ {
+ rtx patt = PATTERN (insn);
+ rtx addr;
+ HOST_WIDE_INT step;
+ struct iv_to_split *ivts;
+ struct rtx_iv iv;
+
+ if (GET_CODE (patt) != PREFETCH)
+ return NULL;
+
+ addr = XEXP (patt, 0);
+ if (!iv_analyze_expr (insn, addr, Pmode, &iv))
+ return NULL;
+
+ if (GET_CODE (iv.step) != CONST_INT)
+ return NULL;
+
+ /* We do not want to remove prefetches in unrolled copies if each of them
+ hits its own cache line. */
+ step = INTVAL (iv.step);
+ if (step < 0)
+ step = -step;
+ if (step >= PREFETCH_BLOCK)
+ return NULL;
+
+ /* Record the prefetch to remove. */
+ ivts = xmalloc (sizeof (struct iv_to_split));
+ ivts->insn = insn;
+ ivts->base_var = NULL_RTX;
+ ivts->step = iv.step;
+ ivts->n_loc = 0;
+
+ return ivts;
+ }
+
/* Determines which of insns in LOOP can be optimized.
Return a OPT_INFO struct with the relevant hash tables filled
with all insns to be optimized. The FIRST_NEW_BLOCK field
*************** analyze_insns_in_loop (struct loop *loop
*** 1701,1707 ****
if (flag_split_ivs_in_unroller)
opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
si_info_hash, si_info_eq, free);
!
/* Record the loop exit bb and loop preheader before the unrolling. */
if (!loop_preheader_edge (loop)->src)
{
--- 1752,1760 ----
if (flag_split_ivs_in_unroller)
opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
si_info_hash, si_info_eq, free);
! opt_info->prefetches_to_remove
! = htab_create (5 * loop->num_nodes, si_info_hash, si_info_eq, free);
!
/* Record the loop exit bb and loop preheader before the unrolling. */
if (!loop_preheader_edge (loop)->src)
{
*************** analyze_insns_in_loop (struct loop *loop
*** 1743,1749 ****
--- 1796,1813 ----
*slot1 = ivts;
continue;
}
+
+ if (opt_info->prefetches_to_remove)
+ ivts = analyze_prefetch_to_remove (insn);
+ if (ivts)
+ {
+ slot1 = htab_find_slot (opt_info->prefetches_to_remove,
+ ivts, INSERT);
+ *slot1 = ivts;
+ continue;
+ }
+
if (opt_info->insns_with_var_to_expand)
ves = analyze_insn_to_expand_var (loop, insn);
*************** split_iv (struct iv_to_split *ivts, rtx
*** 1903,1908 ****
--- 1967,1994 ----
delete_insn (insn);
}
+ /* Remove the prefetch INSN if redundant. Step of the address induction
+ variable whose is taken from IVTS, and DELTA determines which of
+ the unrolled copies is considered. */
+
+ static void
+ remove_redundant_prefetch (struct iv_to_split *ivts, rtx insn, unsigned delta)
+ {
+ HOST_WIDE_INT step = INTVAL (ivts->step);
+ if (step < 0)
+ step = - step;
+
+ if (delta == 0)
+ return;
+
+ /* If the prefetch is the first one to access a new cache line, we want to
+ keep it. We assume that the first access in the loop is cache line
+ aligned, which may cause this heuristics to be slightly wrong. */
+ if (step * delta / PREFETCH_BLOCK != step * (delta - 1) / PREFETCH_BLOCK)
+ return;
+
+ delete_insn (insn);
+ }
/* Return one expansion of the accumulator recorded in struct VE. */
*************** apply_opt_in_copies (struct opt_info *op
*** 2114,2119 ****
--- 2200,2221 ----
split_iv (ivts, insn, delta);
}
}
+
+ /* Remove redundant prefetches. */
+ if (opt_info->prefetches_to_remove)
+ {
+ ivts = htab_find (opt_info->prefetches_to_remove, &ivts_templ);
+
+ if (ivts)
+ {
+ #ifdef ENABLE_CHECKING
+ gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
+ #endif
+
+ remove_redundant_prefetch (ivts, insn, delta);
+ }
+ }
+
/* Apply variable expansion optimization. */
if (unrolling && opt_info->insns_with_var_to_expand)
{
*************** free_opt_info (struct opt_info *opt_info
*** 2203,2208 ****
--- 2305,2312 ----
{
if (opt_info->insns_to_split)
htab_delete (opt_info->insns_to_split);
+ if (opt_info->prefetches_to_remove)
+ htab_delete (opt_info->prefetches_to_remove);
if (opt_info->insns_with_var_to_expand)
{
htab_traverse (opt_info->insns_with_var_to_expand,