This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, RFA] Remove LABEL_NEXTREF and TARGET_ADJUST_UNROLL_MAX from the SH backend
- From: Steven Bosscher <stevenb dot gcc at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Joern RENNECKE <joern dot rennecke at st dot com>
- Date: Sun, 26 Feb 2006 22:05:11 +0100
- Subject: [PATCH, RFA] Remove LABEL_NEXTREF and TARGET_ADJUST_UNROLL_MAX from the SH backend
- References: <200602261725.30716.steven@gcc.gnu.org> <200602261816.35048.steven@gcc.gnu.org>
So here we go again.
I built an x86_64-linux x sh-elf cross-compiler with this patch
applied, and tested (C only) sh-sim, with no new failures. Is
this OK to apply?
Gr.
Steven
* config/sh/sh.c: Include alloc-pool.h.
(sh_adjust_unroll_max, TARGET_ADJUST_UNROLL_MAX): Remove.
(label_ref_list_pool): New alloc pool.
(label_ref_list_t): New type for lists of labels.
(struct pool_node): Make wend a label_ref_list_t.
(add_constant): Use the new type, eradicate LABEL_NEXTREF
references.
(dump_table): Likewise.
(sh_reorg): Set up the alloc pool before using add_constant and
dump_table, and destroy it when finishing.
* doc/invoke.texi (-madjust-unroll): Remove documentation
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi (revision 111452)
+++ doc/invoke.texi (working copy)
@@ -12121,12 +12121,6 @@ Set the name of the library function use
division strategies, and the compiler will still expect the same
sets of input/output/clobbered registers as if this option was not present.
-@item -madjust-unroll
-@opindex madjust-unroll
-Throttle unrolling to avoid thrashing target registers.
-This option only has an effect if the gcc code base supports the
-TARGET_ADJUST_UNROLL_MAX target hook.
-
@item -mindexed-addressing
@opindex mindexed-addressing
Enable the use of the indexed addressing mode for SHmedia32/SHcompact.
Index: config/sh/sh.c
===================================================================
--- config/sh/sh.c (revision 111452)
+++ config/sh/sh.c (working copy)
@@ -53,6 +53,7 @@ Boston, MA 02110-1301, USA. */
#include "ggc.h"
#include "tree-gimple.h"
#include "cfgloop.h"
+#include "alloc-pool.h"
int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
@@ -241,9 +242,6 @@ static bool unspec_caller_rtx_p (rtx);
static bool sh_cannot_copy_insn_p (rtx);
static bool sh_rtx_costs (rtx, int, int, int *);
static int sh_address_cost (rtx);
-#ifdef TARGET_ADJUST_UNROLL_MAX
-static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
-#endif
static int sh_pr_n_sets (void);
static rtx sh_allocate_initial_value (rtx);
static int shmedia_target_regs_stack_space (HARD_REG_SET *);
@@ -468,11 +466,6 @@ static int hard_regs_intersect_p (HARD_R
#endif /* SYMBIAN */
-#ifdef TARGET_ADJUST_UNROLL_MAX
-#undef TARGET_ADJUST_UNROLL_MAX
-#define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
-#endif
-
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD sh_secondary_reload
@@ -2964,6 +2957,14 @@ gen_datalabel_ref (rtx sym)
}
+static alloc_pool label_ref_list_pool;
+
+typedef struct label_ref_list_d
+{
+ rtx label;
+ struct label_ref_list_d *next;
+} *label_ref_list_t;
+
/* The SH cannot load a large constant into a register, constants have to
come from a pc relative load. The reference of a pc relative load
instruction must be less than 1k in front of the instruction. This
@@ -3021,7 +3022,7 @@ typedef struct
{
rtx value; /* Value in table. */
rtx label; /* Label of value. */
- rtx wend; /* End of window. */
+ label_ref_list_t wend; /* End of window. */
enum machine_mode mode; /* Mode of value. */
/* True if this constant is accessed as part of a post-increment
@@ -3055,7 +3056,8 @@ static rtx
add_constant (rtx x, enum machine_mode mode, rtx last_value)
{
int i;
- rtx lab, new, ref, newref;
+ rtx lab, new;
+ label_ref_list_t ref, newref;
/* First see if we've already got it. */
for (i = 0; i < pool_size; i++)
@@ -3081,9 +3083,10 @@ add_constant (rtx x, enum machine_mode m
}
if (lab && pool_window_label)
{
- newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
+ newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
+ newref->label = pool_window_label;
ref = pool_vector[pool_window_last].wend;
- LABEL_NEXTREF (newref) = ref;
+ newref->next = ref;
pool_vector[pool_window_last].wend = newref;
}
if (new)
@@ -3105,13 +3108,14 @@ add_constant (rtx x, enum machine_mode m
lab = gen_label_rtx ();
pool_vector[pool_size].mode = mode;
pool_vector[pool_size].label = lab;
- pool_vector[pool_size].wend = NULL_RTX;
+ pool_vector[pool_size].wend = NULL;
pool_vector[pool_size].part_of_sequence_p = (lab == 0);
if (lab && pool_window_label)
{
- newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
+ newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
+ newref->label = pool_window_label;
ref = pool_vector[pool_window_last].wend;
- LABEL_NEXTREF (newref) = ref;
+ newref->next = ref;
pool_vector[pool_window_last].wend = newref;
}
if (lab)
@@ -3133,7 +3137,8 @@ dump_table (rtx start, rtx barrier)
rtx scan = barrier;
int i;
int need_align = 1;
- rtx lab, ref;
+ rtx lab;
+ label_ref_list_t ref;
int have_df = 0;
/* Do two passes, first time dump out the HI sized constants. */
@@ -3153,9 +3158,9 @@ dump_table (rtx start, rtx barrier)
scan = emit_label_after (lab, scan);
scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
scan);
- for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
+ for (ref = p->wend; ref; ref = ref->next)
{
- lab = XEXP (ref, 0);
+ lab = ref->label;
scan = emit_insn_after (gen_consttable_window_end (lab), scan);
}
}
@@ -3203,9 +3208,9 @@ dump_table (rtx start, rtx barrier)
emit_label_before (lab, align_insn);
emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
align_insn);
- for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
+ for (ref = p->wend; ref; ref = ref->next)
{
- lab = XEXP (ref, 0);
+ lab = ref->label;
emit_insn_before (gen_consttable_window_end (lab),
align_insn);
}
@@ -3241,9 +3246,9 @@ dump_table (rtx start, rtx barrier)
if (p->mode != HImode)
{
- for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
+ for (ref = p->wend; ref; ref = ref->next)
{
- lab = XEXP (ref, 0);
+ lab = ref->label;
scan = emit_insn_after (gen_consttable_window_end (lab),
scan);
}
@@ -3293,9 +3298,9 @@ dump_table (rtx start, rtx barrier)
if (p->mode != HImode)
{
- for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
+ for (ref = p->wend; ref; ref = ref->next)
{
- lab = XEXP (ref, 0);
+ lab = ref->label;
scan = emit_insn_after (gen_consttable_window_end (lab), scan);
}
}
@@ -4517,9 +4522,12 @@ sh_reorg (void)
mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
shorten_branches (first);
}
+
/* Scan the function looking for move instructions which have to be
changed to pc-relative loads and insert the literal tables. */
-
+ label_ref_list_pool = create_alloc_pool ("label references list",
+ sizeof (struct label_ref_list_d),
+ 30);
mdep_reorg_phase = SH_FIXUP_PCLOAD;
for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
{
@@ -4700,7 +4708,8 @@ sh_reorg (void)
insn = barrier;
}
}
-
+ free_alloc_pool (label_ref_list_pool);
+
mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
INSN_ADDRESSES_FREE ();
split_branches (first);
@@ -10174,275 +10183,6 @@ lose:
return 0;
}
-#ifdef TARGET_ADJUST_UNROLL_MAX
-static int
-sh_adjust_unroll_max (struct loop * loop, int insn_count,
- int max_unrolled_insns, int strength_reduce_p,
- int unroll_type)
-{
-/* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
- if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
- {
- /* Throttle back loop unrolling so that the costs of using more
- targets than the eight target register we have don't outweigh
- the benefits of unrolling. */
- rtx insn;
- int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
- int n_barriers = 0;
- rtx dest;
- int i;
- rtx exit_dest[8];
- int threshold;
- int unroll_benefit = 0, mem_latency = 0;
- int base_cost, best_cost, cost;
- int factor, best_factor;
- int n_dest;
- unsigned max_iterations = 32767;
- int n_iterations;
- int need_precond = 0, precond = 0;
- basic_block * bbs = get_loop_body (loop);
- struct niter_desc *desc;
-
- /* Assume that all labels inside the loop are used from inside the
- loop. If the loop has multiple entry points, it is unlikely to
- be unrolled anyways.
- Also assume that all calls are to different functions. That is
- somewhat pessimistic, but if you have lots of calls, unrolling the
- loop is not likely to gain you much in the first place. */
- i = loop->num_nodes - 1;
- for (insn = BB_HEAD (bbs[i]); ; )
- {
- if (GET_CODE (insn) == CODE_LABEL)
- n_labels++;
- else if (GET_CODE (insn) == CALL_INSN)
- n_calls++;
- else if (GET_CODE (insn) == NOTE
- && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
- n_inner_loops++;
- else if (GET_CODE (insn) == BARRIER)
- n_barriers++;
- if (insn != BB_END (bbs[i]))
- insn = NEXT_INSN (insn);
- else if (--i >= 0)
- insn = BB_HEAD (bbs[i]);
- else
- break;
- }
- free (bbs);
- /* One label for the loop top is normal, and it won't be duplicated by
- unrolling. */
- if (n_labels <= 1)
- return max_unrolled_insns;
- if (n_inner_loops > 0)
- return 0;
- for (dest = loop->exit_labels; dest && n_exit_dest < 8;
- dest = LABEL_NEXTREF (dest))
- {
- for (i = n_exit_dest - 1;
- i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
- if (i < 0)
- exit_dest[n_exit_dest++] = dest;
- }
- /* If the loop top and call and exit destinations are enough to fill up
- the target registers, we're unlikely to do any more damage by
- unrolling. */
- if (n_calls + n_exit_dest >= 7)
- return max_unrolled_insns;
-
- /* ??? In the new loop unroller, there is no longer any strength
- reduction information available. Thus, when it comes to unrolling,
- we know the cost of everything, but we know the value of nothing. */
-#if 0
- if (strength_reduce_p
- && (unroll_type == LPT_UNROLL_RUNTIME
- || unroll_type == LPT_UNROLL_CONSTANT
- || unroll_type == LPT_PEEL_COMPLETELY))
- {
- struct loop_ivs *ivs = LOOP_IVS (loop);
- struct iv_class *bl;
-
- /* We'll save one compare-and-branch in each loop body copy
- but the last one. */
- unroll_benefit = 1;
- /* Assess the benefit of removing biv & giv updates. */
- for (bl = ivs->list; bl; bl = bl->next)
- {
- rtx increment = biv_total_increment (bl);
- struct induction *v;
-
- if (increment && GET_CODE (increment) == CONST_INT)
- {
- unroll_benefit++;
- for (v = bl->giv; v; v = v->next_iv)
- {
- if (! v->ignore && v->same == 0
- && GET_CODE (v->mult_val) == CONST_INT)
- unroll_benefit++;
- /* If this giv uses an array, try to determine
- a maximum iteration count from the size of the
- array. This need not be correct all the time,
- but should not be too far off the mark too often. */
- while (v->giv_type == DEST_ADDR)
- {
- rtx mem = PATTERN (v->insn);
- tree mem_expr, type, size_tree;
-
- if (GET_CODE (SET_SRC (mem)) == MEM)
- mem = SET_SRC (mem);
- else if (GET_CODE (SET_DEST (mem)) == MEM)
- mem = SET_DEST (mem);
- else
- break;
- mem_expr = MEM_EXPR (mem);
- if (! mem_expr)
- break;
- type = TREE_TYPE (mem_expr);
- if (TREE_CODE (type) != ARRAY_TYPE
- || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
- break;
- size_tree = fold_build2 (TRUNC_DIV_EXPR,
- bitsizetype,
- TYPE_SIZE (type),
- TYPE_SIZE_UNIT (type));
- if (TREE_CODE (size_tree) == INTEGER_CST
- && ! TREE_INT_CST_HIGH (size_tree)
- && TREE_INT_CST_LOW (size_tree) < max_iterations)
- max_iterations = TREE_INT_CST_LOW (size_tree);
- break;
- }
- }
- }
- }
- }
-#else /* 0 */
- /* Assume there is at least some benefit. */
- unroll_benefit = 1;
-#endif /* 0 */
-
- desc = get_simple_loop_desc (loop);
- n_iterations = desc->const_iter ? desc->niter : 0;
- max_iterations
- = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
-
- if (! strength_reduce_p || ! n_iterations)
- need_precond = 1;
- if (! n_iterations)
- {
- n_iterations
- = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
- if (! n_iterations)
- return 0;
- }
-#if 0 /* ??? See above - missing induction variable information. */
- while (unroll_benefit > 1) /* no loop */
- {
- /* We include the benefit of biv/ giv updates. Check if some or
- all of these updates are likely to fit into a scheduling
- bubble of a load.
- We check for the following case:
- - All the insns leading to the first JUMP_INSN are in a strict
- dependency chain.
- - there is at least one memory reference in them.
-
- When we find such a pattern, we assume that we can hide as many
- updates as the total of the load latency is, if we have an
- unroll factor of at least two. We might or might not also do
- this without unrolling, so rather than considering this as an
- extra unroll benefit, discount it in the unroll benefits of unroll
- factors higher than two. */
-
- rtx set, last_set;
-
- insn = next_active_insn (loop->start);
- last_set = single_set (insn);
- if (! last_set)
- break;
- if (GET_CODE (SET_SRC (last_set)) == MEM)
- mem_latency += 2;
- for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
- {
- if (! INSN_P (insn))
- continue;
- if (GET_CODE (insn) == JUMP_INSN)
- break;
- if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
- {
- /* Check if this is a to-be-reduced giv insn. */
- struct loop_ivs *ivs = LOOP_IVS (loop);
- struct iv_class *bl;
- struct induction *v;
- for (bl = ivs->list; bl; bl = bl->next)
- {
- if (bl->biv->insn == insn)
- goto is_biv;
- for (v = bl->giv; v; v = v->next_iv)
- if (v->insn == insn)
- goto is_giv;
- }
- mem_latency--;
- is_biv:
- is_giv:
- continue;
- }
- set = single_set (insn);
- if (! set)
- continue;
- if (GET_CODE (SET_SRC (set)) == MEM)
- mem_latency += 2;
- last_set = set;
- }
- if (mem_latency < 0)
- mem_latency = 0;
- else if (mem_latency > unroll_benefit - 1)
- mem_latency = unroll_benefit - 1;
- break;
- }
-#endif /* 0 */
- if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
- <= unroll_benefit)
- return max_unrolled_insns;
-
- n_dest = n_labels + n_calls + n_exit_dest;
- base_cost = n_dest <= 8 ? 0 : n_dest - 7;
- best_cost = 0;
- best_factor = 1;
- if (n_barriers * 2 > n_labels - 1)
- n_barriers = (n_labels - 1) / 2;
- for (factor = 2; factor <= 8; factor++)
- {
- /* Bump up preconditioning cost for each power of two. */
- if (! (factor & (factor-1)))
- precond += 4;
- /* When preconditioning, only powers of two will be considered. */
- else if (need_precond)
- continue;
- n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
- + (n_labels - 1) * factor + n_calls + n_exit_dest
- - (n_barriers * factor >> 1)
- + need_precond);
- cost
- = ((n_dest <= 8 ? 0 : n_dest - 7)
- - base_cost * factor
- - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
- * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
- + ((unroll_benefit + 1 + (n_labels - 1) * factor)
- / n_iterations));
- if (need_precond)
- cost += (precond + unroll_benefit * factor / 2) / n_iterations;
- if (cost < best_cost)
- {
- best_cost = cost;
- best_factor = factor;
- }
- }
- threshold = best_factor * insn_count;
- if (max_unrolled_insns > threshold)
- max_unrolled_insns = threshold;
- }
- return max_unrolled_insns;
-}
-#endif /* TARGET_ADJUST_UNROLL_MAX */
-
/* Replace any occurrence of FROM(n) in X with TO(n). The function does
not enter into CONST_DOUBLE for the replace.