This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: target register load optimizations (Was: Re: Porting gcc forF-CPU without direct JMP)
- From: Joern Rennecke <joern dot rennecke at superh dot com>
- To: "Joseph S. Myers" <jsm28 at cam dot ac dot uk>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 04 Mar 2003 17:48:08 +0000
- Subject: Re: target register load optimizations (Was: Re: Porting gcc forF-CPU without direct JMP)
- Organization: SuperH UK Ltd.
- References: <Pine.LNX.4.33.0302220040170.12800-100000@kern.srcf.societies.cam.ac.uk>
"Joseph S. Myers" wrote:
> This patch includes bogus changes to invoke.texi inside @math.
Sorry about that. I now have makeinfo 4.5 available on my test systems, and excised
these patches (which were workarounds for problems with an earlier makeinfo version)
from my sources.
--
--------------------------
SuperH (UK) Ltd.
2410 Aztec West / Almondsbury / BRISTOL / BS32 4QX
T:+44 1454 465658
Fri Feb 21 21:02:01 2003 Stephen Clarke <stephen dot clarke at superh dot com>
J"orn Rennecke <joern dot rennecke at superh dot com>
* late-loop.c: New file.
* Makefile.in (OBJS): Include late-loop.o
(late-loop.o): Add dependencies.
* flags.h (flag_optimize_target_registers): Declare.
(flag_optimize_target_registers_2): Likewise.
* hooks.c (hook_reg_class_void_no_regs): New function.
(hook_bool_bool_false): Likewise.
* hooks.h (hook_reg_class_void_no_regs, hook_bool_bool_false): Declare.
* rtl.h (target_registers_optimize): Declare.
* target-def.h (TARGET_OPTIMIZE_TARGET_REGISTER_CLASS): Define.
(TARGET_OPTIMIZE_TARGET_REGISTER_CALLEE_SAVED): Likewise.
(TARGET_INITIALIZER): Include these.
* target.h (struct gcc_target): Add optimize_target_register_class
and optimize_target_register_callee_saved members.
* toplev.c (enum dump_file_index): Add DFI_targetregs.
(dump_file) Add "tars" entry.
(flag_optimize_target_registers): New variable.
(flag_optimize_target_registers_2): Likewise.
(lang_independent_options): Add entries for new options.
(rest_of_compilation): Call target_registers_optimize.
* doc/tm.texi (TARGET_OPTIMIZE_TARGET_REGISTER_CLASS): Document.
(TARGET_OPTIMIZE_TARGET_REGISTER_CALLEE_SAVED): Likewise.
* doc/invoke.texi: Document -foptimize-target-registers and
-foptimize-target-registers-2.
* sh.c (shmedia_space_reserved_for_target_registers): New variable.
(sh_target_reg_class): New function.
(sh_optimize_target_register_callee_saved): Likwise.
(shmedia_target_regs_stack_space): Likewise.
(shmedia_reserve_space_for_target_registers_p): Likewise.
(shmedia_target_regs_stack_adjust): Likewise.
(TARGET_OPTIMIZE_TARGET_REGISTER_CLASS): Override.
(TARGET_OPTIMIZE_TARGET_REGISTER_CALLEE_SAVED): Likewise.
(calc_live_regs): If flag_optimize_target_registers_2 and
TARGET_SAVE_ALL_TARGET_REGS is enabled, and we have space reserved
for target registers, make sure that we save all target registers.
(sh_expand_prologue, sh_expand_epilogue): Take target register
optimizations into account. Collapse stack adjustments if that
is beneficial.
(initial_elimination_offset): Reserve space for target registers
if necessary.
* sh.h (SAVE_ALL_TR_BIT, TARGET_SAVE_ALL_TARGET_REGS): Define.
(OPTIMIZATION_OPTIONS): Enable flag_optimize_target_registers.
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.998
diff -p -r1.998 Makefile.in
*** Makefile.in 19 Feb 2003 18:03:02 -0000 1.998
--- Makefile.in 4 Mar 2003 16:55:00 -0000
*************** OBJS = alias.o bb-reorder.o bitmap.o bui
*** 785,791 ****
sibcall.o simplify-rtx.o sreal.o ssa.o ssa-ccp.o ssa-dce.o stmt.o \
stor-layout.o stringpool.o timevar.o toplev.o tracer.o tree.o tree-dump.o \
tree-inline.o unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \
! alloc-pool.o et-forest.o cgraph.o \
$(GGC) $(out_object_file) $(EXTRA_OBJS) $(host_hook_obj)
BACKEND = main.o libbackend.a
--- 785,791 ----
sibcall.o simplify-rtx.o sreal.o ssa.o ssa-ccp.o ssa-dce.o stmt.o \
stor-layout.o stringpool.o timevar.o toplev.o tracer.o tree.o tree-dump.o \
tree-inline.o unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \
! alloc-pool.o et-forest.o cgraph.o late-loop.o \
$(GGC) $(out_object_file) $(EXTRA_OBJS) $(host_hook_obj)
BACKEND = main.o libbackend.a
*************** loop.o : loop.c $(CONFIG_H) $(SYSTEM_H)
*** 1572,1577 ****
--- 1572,1580 ----
insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \
real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h cfgloop.h \
toplev.h varray.h except.h cselib.h $(OPTABS_H) $(TM_P_H)
+ late-loop.o : late-loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(BASIC_BLOCK_H) $(RTL_H) hard-reg-set.h $(REGS_H) $(OBSTACK_H) \
+ $(FIBHEAP_H) output.h $(TARGET_H) $(EXPR_H) flags.h $(INSN_ATTR_H)
doloop.o : doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h \
$(LOOP_H) $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) toplev.h \
cfgloop.h
Index: flags.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/flags.h,v
retrieving revision 1.99
diff -p -r1.99 flags.h
*** flags.h 20 Feb 2003 20:56:51 -0000 1.99
--- flags.h 4 Mar 2003 16:55:00 -0000
*************** extern int flag_gcse_lm;
*** 636,641 ****
--- 636,651 ----
extern int flag_gcse_sm;
+ /* Perform target register optimization before prologue / epilogue
+ threading. */
+
+ extern int flag_optimize_target_registers;
+
+ /* Perform target register optimization after prologue / epilogue
+ threading and jump2. */
+
+ extern int flag_optimize_target_registers_2;
+
/* Nonzero means we should do dwarf2 duplicate elimination. */
Index: hooks.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/hooks.c,v
retrieving revision 1.12
diff -p -r1.12 hooks.c
*** hooks.c 3 Feb 2003 19:40:57 -0000 1.12
--- hooks.c 4 Mar 2003 16:55:00 -0000
*************** hook_bool_void_false ()
*** 41,46 ****
--- 41,62 ----
return false;
}
+ /* The same, but formally returning an enum reg_class. */
+ enum reg_class
+ hook_reg_class_void_no_regs ()
+ {
+ return NO_REGS;
+ }
+
+ /* Generic hook that takes (bool) and returns false. */
+ bool
+ hook_bool_bool_false (a)
+ bool a ATTRIBUTE_UNUSED;
+ {
+ return false;
+ }
+
+
/* Generic hook that takes (tree, int) and does nothing. */
void
hook_void_tree_int (a, b)
Index: hooks.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/hooks.h,v
retrieving revision 1.12
diff -p -r1.12 hooks.h
*** hooks.h 3 Feb 2003 19:40:57 -0000 1.12
--- hooks.h 4 Mar 2003 16:55:00 -0000
*************** Foundation, 59 Temple Place - Suite 330,
*** 23,28 ****
--- 23,30 ----
#define GCC_HOOKS_H
bool hook_bool_void_false PARAMS ((void));
+ enum reg_class hook_reg_class_void_no_regs PARAMS ((void));
+ bool hook_bool_bool_false PARAMS ((bool));
bool hook_bool_tree_false PARAMS ((tree));
bool hook_bool_tree_hwi_hwi_tree_false
PARAMS ((tree, HOST_WIDE_INT, HOST_WIDE_INT, tree));
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/rtl.h,v
retrieving revision 1.385
diff -p -r1.385 rtl.h
*** rtl.h 20 Feb 2003 20:56:52 -0000 1.385
--- rtl.h 4 Mar 2003 16:55:00 -0000
*************** extern rtx libcall_other_reg PARAMS ((r
*** 2058,2063 ****
--- 2058,2064 ----
#ifdef BUFSIZ
extern void loop_optimize PARAMS ((rtx, FILE *, int));
#endif
+ extern void target_registers_optimize PARAMS ((rtx, bool));
extern void record_excess_regs PARAMS ((rtx, rtx, rtx *));
/* In function.c */
Index: target-def.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/target-def.h,v
retrieving revision 1.45
diff -p -r1.45 target-def.h
*** target-def.h 19 Feb 2003 00:51:16 -0000 1.45
--- target-def.h 4 Mar 2003 16:55:00 -0000
*************** Foundation, 59 Temple Place - Suite 330,
*** 262,267 ****
--- 262,269 ----
/* In hook.c. */
#define TARGET_CANNOT_MODIFY_JUMPS_P hook_bool_void_false
+ #define TARGET_OPTIMIZE_TARGET_REGISTER_CLASS hook_reg_class_void_no_regs
+ #define TARGET_OPTIMIZE_TARGET_REGISTER_CALLEE_SAVED hook_bool_bool_false
#define TARGET_CANNOT_FORCE_CONST_MEM hook_bool_rtx_false
#define TARGET_CANNOT_COPY_INSN_P NULL
#define TARGET_DELEGITIMIZE_ADDRESS hook_rtx_rtx_identity
*************** Foundation, 59 Temple Place - Suite 330,
*** 298,303 ****
--- 300,307 ----
TARGET_EXPAND_BUILTIN, \
TARGET_SECTION_TYPE_FLAGS, \
TARGET_CANNOT_MODIFY_JUMPS_P, \
+ TARGET_OPTIMIZE_TARGET_REGISTER_CLASS, \
+ TARGET_OPTIMIZE_TARGET_REGISTER_CALLEE_SAVED, \
TARGET_CANNOT_FORCE_CONST_MEM, \
TARGET_CANNOT_COPY_INSN_P, \
TARGET_DELEGITIMIZE_ADDRESS, \
Index: target.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/target.h,v
retrieving revision 1.51
diff -p -r1.51 target.h
*** target.h 19 Feb 2003 00:51:16 -0000 1.51
--- target.h 4 Mar 2003 16:55:00 -0000
*************** struct gcc_target
*** 289,294 ****
--- 289,297 ----
not, at the current point in the compilation. */
bool (* cannot_modify_jumps_p) PARAMS ((void));
+ enum reg_class (* optimize_target_register_class) PARAMS ((void));
+ bool (* optimize_target_register_callee_saved) PARAMS ((bool));
+
/* True if the constant X cannot be placed in the constant pool. */
bool (* cannot_force_const_mem) PARAMS ((rtx));
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.714
diff -p -r1.714 toplev.c
*** toplev.c 21 Feb 2003 09:40:24 -0000 1.714
--- toplev.c 4 Mar 2003 16:55:00 -0000
*************** enum dump_file_index
*** 253,258 ****
--- 253,259 ----
DFI_flow2,
DFI_peephole2,
DFI_rnreg,
+ DFI_targetregs,
DFI_ce3,
DFI_bbro,
DFI_sched2,
*************** enum dump_file_index
*** 268,274 ****
Remaining -d letters:
" o q "
! " H JK OPQ TUV YZ"
*/
static struct dump_file_info dump_file[DFI_MAX] =
--- 269,275 ----
Remaining -d letters:
" o q "
! " H JK OPQ UV YZ"
*/
static struct dump_file_info dump_file[DFI_MAX] =
*************** static struct dump_file_info dump_file[D
*** 304,309 ****
--- 305,311 ----
{ "flow2", 'w', 1, 0, 0 },
{ "peephole2", 'z', 1, 0, 0 },
{ "rnreg", 'n', 1, 0, 0 },
+ { "tars", 'T', 1, 0, 0 },
{ "ce3", 'E', 1, 0, 0 },
{ "bbro", 'B', 1, 0, 0 },
{ "sched2", 'R', 1, 0, 0 },
*************** int flag_gcse_lm = 1;
*** 654,659 ****
--- 656,671 ----
int flag_gcse_sm = 1;
+ /* Perform target register optimization before prologue / epilogue
+ threading. */
+
+ int flag_optimize_target_registers = 0;
+
+ /* Perform target register optimization after prologue / epilogue
+ threading and jump2. */
+
+ int flag_optimize_target_registers_2 = 0;
+
/* Nonzero means to rerun cse after loop optimization. This increases
compilation time about 20% and picks up a few more common expressions. */
*************** static const lang_independent_options f_
*** 1060,1065 ****
--- 1072,1081 ----
N_("Perform enhanced load motion during global subexpression elimination") },
{"gcse-sm", &flag_gcse_sm, 1,
N_("Perform store motion after global subexpression elimination") },
+ {"optimize-target-registers", &flag_optimize_target_registers, 1,
+ N_("Perform target register optimization before prologue / epilogue threading") },
+ {"optimize-target-registers-2", &flag_optimize_target_registers_2, 1,
+ N_("Perform target register optimization after prologue / epilogue threading") },
{"loop-optimize", &flag_loop_optimize, 1,
N_("Perform the loop optimizations") },
{"crossjumping", &flag_crossjumping, 1,
*************** rest_of_compilation (decl)
*** 3428,3433 ****
--- 3444,3460 ----
if (optimize > 0)
split_all_insns (0);
+ if (flag_optimize_target_registers)
+ {
+ open_dump_file (DFI_targetregs, decl);
+
+ target_registers_optimize (insns, false);
+
+ close_dump_file (DFI_targetregs, print_rtl_with_bb, insns);
+
+ ggc_collect ();
+ }
+
cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
/* On some machines, the prologue and epilogue code, or parts thereof,
*************** rest_of_compilation (decl)
*** 3496,3501 ****
--- 3523,3540 ----
close_dump_file (DFI_ce3, print_rtl_with_bb, insns);
timevar_pop (TV_IFCVT2);
}
+
+ if (flag_optimize_target_registers_2)
+ {
+ open_dump_file (DFI_targetregs, decl);
+
+ target_registers_optimize (insns, true);
+
+ close_dump_file (DFI_targetregs, print_rtl_with_bb, insns);
+
+ ggc_collect ();
+ }
+
#ifdef STACK_REGS
if (optimize)
split_all_insns (1);
Index: config/sh/sh.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.c,v
retrieving revision 1.199
diff -p -r1.199 sh.c
*** config/sh/sh.c 21 Feb 2003 20:51:32 -0000 1.199
--- config/sh/sh.c 4 Mar 2003 16:55:01 -0000
*************** enum reg_class reg_class_from_letter[] =
*** 172,177 ****
--- 172,179 ----
int assembler_dialect;
+ static bool shmedia_space_reserved_for_target_registers;
+
static void split_branches PARAMS ((rtx));
static int branch_dest PARAMS ((rtx));
static void force_into PARAMS ((rtx, rtx));
*************** static int sh_issue_rate PARAMS ((void))
*** 205,210 ****
--- 207,214 ----
static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
static bool sh_cannot_modify_jumps_p PARAMS ((void));
+ static enum reg_class sh_target_reg_class PARAMS ((void));
+ static bool sh_optimize_target_register_callee_saved PARAMS ((bool));
static bool sh_ms_bitfield_layout_p PARAMS ((tree));
static void sh_encode_section_info PARAMS ((tree, int));
*************** static bool unspec_caller_rtx_p PARAMS (
*** 222,227 ****
--- 226,235 ----
static bool sh_cannot_copy_insn_p PARAMS ((rtx));
static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
static int sh_address_cost PARAMS ((rtx));
+ static int shmedia_target_regs_stack_space PARAMS ((HOST_WIDE_INT *));
+ static int shmedia_reserve_space_for_target_registers_p
+ PARAMS ((int, HOST_WIDE_INT *));
+ static int shmedia_target_regs_stack_adjust PARAMS ((HOST_WIDE_INT *));
/* Initialize the GCC target structure. */
#undef TARGET_ATTRIBUTE_TABLE
*************** static int sh_address_cost PARAMS ((rtx)
*** 256,261 ****
--- 264,274 ----
#undef TARGET_CANNOT_MODIFY_JUMPS_P
#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
+ #undef TARGET_OPTIMIZE_TARGET_REGISTER_CLASS
+ #define TARGET_OPTIMIZE_TARGET_REGISTER_CLASS sh_target_reg_class
+ #undef TARGET_OPTIMIZE_TARGET_REGISTER_CALLEE_SAVED
+ #define TARGET_OPTIMIZE_TARGET_REGISTER_CALLEE_SAVED \
+ sh_optimize_target_register_callee_saved
#undef TARGET_MS_BITFIELD_LAYOUT_P
#define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
*************** push_regs (mask)
*** 4587,4592 ****
--- 4600,4655 ----
push (PR_REG);
}
+ /* Calculate how much extra space is needed to save all callee-saved
+ target registers.
+ LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
+
+ static int
+ shmedia_target_regs_stack_space (live_regs_mask)
+ HOST_WIDE_INT *live_regs_mask;
+ {
+ int reg;
+ int stack_space = 0;
+ int interrupt_handler = sh_cfun_interrupt_handler_p ();
+
+ for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
+ if ((! call_used_regs[reg] || interrupt_handler)
+ && !(live_regs_mask[reg / 32] & (1 << (reg % 32))))
+ /* Leave space to save this target register on the stack,
+ in case target register allocation wants to use it. */
+ stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+ return stack_space;
+ }
+
+ /* Decide whether we should reserve space for callee-save target registers,
+ in case target register allocation wants to use them. REGS_SAVED is
+ the space, in bytes, that is already required for register saves.
+ LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
+
+ static int
+ shmedia_reserve_space_for_target_registers_p (regs_saved, live_regs_mask)
+ int regs_saved;
+ HOST_WIDE_INT *live_regs_mask;
+ {
+ if (optimize_size)
+ return 0;
+ return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
+ }
+
+ /* Decide how much space to reserve for callee-save target registers
+ in case target register allocation wants to use them.
+ LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
+
+ static int
+ shmedia_target_regs_stack_adjust (live_regs_mask)
+ HOST_WIDE_INT *live_regs_mask;
+ {
+ if (shmedia_space_reserved_for_target_registers)
+ return shmedia_target_regs_stack_space (live_regs_mask);
+ else
+ return 0;
+ }
+
/* Work out the registers which need to be saved, both as a mask and a
count of saved words.
*************** calc_live_regs (count_ptr, live_regs_mas
*** 4688,4693 ****
--- 4751,4769 ----
}
}
}
+ /* If we have a target register optimization pass after prologue / epilogue
+ threading, we need to assume all target registers will be live even if
+ they aren't now. */
+ if (flag_optimize_target_registers_2
+ && TARGET_SAVE_ALL_TARGET_REGS
+ && shmedia_space_reserved_for_target_registers)
+ for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
+ if ((! call_used_regs[reg] || interrupt_handler)
+ && !(live_regs_mask[reg / 32] & (1 << (reg % 32))))
+ {
+ live_regs_mask[reg / 32] |= 1 << (reg % 32);
+ count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+ }
*count_ptr = count;
}
*************** sh_expand_prologue ()
*** 4837,4849 ****
rtx r0 = gen_rtx_REG (Pmode, R0_REG);
int offset_in_r0 = -1;
int sp_in_r0 = 0;
! if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
! - d % (STACK_BOUNDARY / BITS_PER_UNIT));
offset = d + d_rounding;
! output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
/* We loop twice: first, we save 8-byte aligned registers in the
higher addresses, that are known to be aligned. Then, we
--- 4913,4949 ----
rtx r0 = gen_rtx_REG (Pmode, R0_REG);
int offset_in_r0 = -1;
int sp_in_r0 = 0;
+ int tregs_space = shmedia_target_regs_stack_adjust (live_regs_mask);
+ int total_size, save_size;
! /* D is the actual number of bytes that we need for saving registers,
! however, in initial_elimination_offset we have committed to using
! an additional TREGS_SPACE amount of bytes - in order to keep both
! addresses to arguments supplied by the caller and local variables
! valid, we must keep this gap. Place it between the incoming
! arguments and the actually saved registers in a bid to optimize
! locality of reference. */
! total_size = d + tregs_space;
! total_size += rounded_frame_size (total_size);
! save_size = total_size - rounded_frame_size (d);
! if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
! - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
!
! /* If adjusting the stack in a single step costs nothing extra, do so.
! I.e. either if a single addi is enough, or we need a movi anyway,
! and we don't exceed the maximum offset range (the test for the
! latter is conservative for simplicity). */
! if (TARGET_SHMEDIA
! && (CONST_OK_FOR_P (-total_size)
! || (! CONST_OK_FOR_P (-(save_size + d_rounding))
! && total_size <= 2044)))
! d_rounding = total_size - save_size;
offset = d + d_rounding;
!
! output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
! 1, frame_insn);
/* We loop twice: first, we save 8-byte aligned registers in the
higher addresses, that are known to be aligned. Then, we
*************** sh_expand_epilogue ()
*** 5057,5071 ****
int d_rounding = 0;
int save_flags = target_flags;
! int frame_size;
calc_live_regs (&d, live_regs_mask);
! if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
! d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
! - d % (STACK_BOUNDARY / BITS_PER_UNIT));
- frame_size = rounded_frame_size (d) - d_rounding;
if (frame_pointer_needed)
{
--- 5157,5195 ----
int d_rounding = 0;
int save_flags = target_flags;
! int frame_size, save_size;
calc_live_regs (&d, live_regs_mask);
! save_size = d;
! frame_size = rounded_frame_size (d);
!
! if (TARGET_SH5)
! {
! int tregs_space = shmedia_target_regs_stack_adjust (live_regs_mask);
! int total_size;
! if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
! d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
! - d % (STACK_BOUNDARY / BITS_PER_UNIT));
!
! total_size = d + tregs_space;
! total_size += rounded_frame_size (total_size);
! save_size = total_size - frame_size;
!
! /* If adjusting the stack in a single step costs nothing extra, do so.
! I.e. either if a single addi is enough, or we need a movi anyway,
! and we don't exceed the maximum offset range (the test for the
! latter is conservative for simplicity). */
! if (TARGET_SHMEDIA
! && ! frame_pointer_needed
! && (CONST_OK_FOR_P (total_size)
! || (! CONST_OK_FOR_P (save_size + d_rounding)
! && total_size <= 2044)))
! d_rounding = frame_size;
!
! frame_size -= d_rounding;
! }
if (frame_pointer_needed)
{
*************** sh_expand_epilogue ()
*** 5239,5265 ****
if (offset != d + d_rounding)
abort ();
-
- goto finish;
}
! else
! d = 0;
! if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
! pop (PR_REG);
! for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
{
! int j = (FIRST_PSEUDO_REGISTER - 1) - i;
! if (j != PR_REG && live_regs_mask[j / 32] & (1 << (j % 32)))
! pop (j);
}
! finish:
if (target_flags != save_flags)
emit_insn (gen_toggle_sz ());
target_flags = save_flags;
output_stack_adjust (extra_push + current_function_pretend_args_size
! + d + d_rounding
+ current_function_args_info.stack_regs * 8,
stack_pointer_rtx, 7, emit_insn);
--- 5363,5389 ----
if (offset != d + d_rounding)
abort ();
}
! else /* ! TARGET_SH5 */
{
! save_size = 0;
! if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
! pop (PR_REG);
! for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
! {
! int j = (FIRST_PSEUDO_REGISTER - 1) - i;
! if (j != PR_REG && live_regs_mask[j / 32] & (1 << (j % 32)))
! pop (j);
! }
}
!
if (target_flags != save_flags)
emit_insn (gen_toggle_sz ());
target_flags = save_flags;
output_stack_adjust (extra_push + current_function_pretend_args_size
! + save_size + d_rounding
+ current_function_args_info.stack_regs * 8,
stack_pointer_rtx, 7, emit_insn);
*************** initial_elimination_offset (from, to)
*** 5840,5847 ****
--- 5964,5979 ----
int copy_flags;
HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
+ shmedia_space_reserved_for_target_registers = false;
calc_live_regs (®s_saved, live_regs_mask);
regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
+
+ if (shmedia_reserve_space_for_target_registers_p (regs_saved, live_regs_mask))
+ {
+ shmedia_space_reserved_for_target_registers = true;
+ regs_saved += shmedia_target_regs_stack_adjust (live_regs_mask);
+ }
+
if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
- regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
*************** static bool
*** 7523,7528 ****
--- 7655,7674 ----
sh_cannot_modify_jumps_p ()
{
return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
+ }
+
+ static enum reg_class
+ sh_target_reg_class ()
+ {
+ return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
+ }
+
+ static bool
+ sh_optimize_target_register_callee_saved (after_prologue_epilogue_gen)
+ bool after_prologue_epilogue_gen;
+ {
+ return (shmedia_space_reserved_for_target_registers
+ && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
}
static bool
Index: config/sh/sh.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.h,v
retrieving revision 1.190
diff -p -r1.190 sh.h
*** config/sh/sh.h 21 Feb 2003 20:51:33 -0000 1.190
--- config/sh/sh.h 4 Mar 2003 16:55:02 -0000
*************** extern int target_flags;
*** 151,156 ****
--- 151,157 ----
#define PADSTRUCT_BIT (1<<28)
#define LITTLE_ENDIAN_BIT (1<<29)
#define IEEE_BIT (1<<30)
+ #define SAVE_ALL_TR_BIT (1<<2)
/* Nonzero if we should dump out instruction size info. */
#define TARGET_DUMPISIZE (target_flags & ISIZE_BIT)
*************** extern int target_flags;
*** 258,263 ****
--- 259,266 ----
/* Nonzero if we should prefer @GOT calls when generating PIC. */
#define TARGET_PREFERGOT (target_flags & PREFERGOT_BIT)
+ #define TARGET_SAVE_ALL_TARGET_REGS (target_flags & SAVE_ALL_TR_BIT)
+
#define SELECT_SH1 (SH1_BIT)
#define SELECT_SH2 (SH2_BIT | SELECT_SH1)
#define SELECT_SH2E (SH_E_BIT | SH2_BIT | SH1_BIT | FPU_SINGLE_BIT)
*************** do { \
*** 404,409 ****
--- 407,418 ----
flag_omit_frame_pointer = -1; \
if (SIZE) \
target_flags |= SPACE_BIT; \
+ if (TARGET_SHMEDIA && LEVEL > 1) \
+ { \
+ flag_optimize_target_registers = 1; \
+ if (! (SIZE)) \
+ target_flags |= SAVE_ALL_TR_BIT; \
+ } \
} while (0)
#define ASSEMBLER_DIALECT assembler_dialect
Index: doc/invoke.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/invoke.texi,v
retrieving revision 1.244
diff -p -r1.244 invoke.texi
*** doc/invoke.texi 20 Feb 2003 20:56:53 -0000 1.244
--- doc/invoke.texi 4 Mar 2003 16:55:02 -0000
*************** in the following sections.
*** 281,287 ****
-funsafe-math-optimizations -ffinite-math-only @gol
-fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move @gol
! -foptimize-sibling-calls -fprefetch-loop-arrays @gol
-freduce-all-givs -fregmove -frename-registers @gol
-freorder-blocks -freorder-functions @gol
-frerun-cse-after-loop -frerun-loop-opt @gol
--- 281,288 ----
-funsafe-math-optimizations -ffinite-math-only @gol
-fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move @gol
! -foptimize-sibling-calls -foptimize-target-registers @gol
! -foptimize-target-registers-2 -fprefetch-loop-arrays @gol
-freduce-all-givs -fregmove -frename-registers @gol
-freorder-blocks -freorder-functions @gol
-frerun-cse-after-loop -frerun-loop-opt @gol
*************** Like @option{-fssa}, this is an experime
*** 4357,4362 ****
--- 4358,4374 ----
+
+ @item -foptimize-target-registers
+ @opindex foptimize-target-registers
+ Perform target register optimization before prologue / epilogue threading.
+ The use of target registers can typically be exposed only during reload,
+ thus hoisting loads out of loops and doing inter-block scheduling needs
+ a separate optimization pass.
+
+ @item -foptimize-target-registers-2
+ @opindex foptimize-target-registers-2
+ Perform target register optimization after prologue / epilogue threading.
@item --param @var{name}= at var{value}
@opindex param
Index: doc/tm.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/tm.texi,v
retrieving revision 1.203
diff -p -r1.203 tm.texi
*** doc/tm.texi 19 Feb 2003 21:55:22 -0000 1.203
--- doc/tm.texi 4 Mar 2003 16:55:03 -0000
*************** cannot_modify_jumps_past_reload_p ()
*** 9282,9284 ****
--- 9282,9305 ----
@}
@end smallexample
@end deftypefn
+
+ @deftypefn {Target Hook} enum reg_class TARGET_OPTIMIZE_TARGET_REGISTER_CLASS (void)
+ This target hook returns a register class for which target register
+ optimizations should be applied. All registers in this class should be
+ usable interchangably. After reload, registers in this class will be
+ re-allocated and loads will be hoisted out of loops and be subjected
+ to inter-block scheduling.
+ @end deftypefn
+
+ @deftypefn {Target Hook} bool TARGET_OPTIMIZE_TARGET_REGISTER_CALLEE_SAVED (bool @var{after_prologue_epilogue_gen})
+ Target register optimization will by default exclude callee-saved registers
+ that are not already live during the current function; if this target hook
+ returns true, they will be included. The target code must than make sure
+ that all target registers in the class returned by
+ @samp{TARGET_OPTIMIZE_TARGET_REGISTER_CLASS} that might need saving are
+ saved. @var{after_prologue_epilogue_gen} indicates if prologues and
+ epilogues have already been generated. Note, even if you only return
+ true when @var{after_prologue_epilogue_gen} is false, you still are likely
+ to have to make special provisions in @code{INITIAL_ELIMINATION_OFFSET}
+ to reserve space for caller-saved target registers.
+ @end deftypefn
*** /dev/null Thu Aug 30 21:30:55 2001
--- late-loop.c Fri Feb 21 21:02:52 2003
***************
*** 0 ****
--- 1,1461 ----
+ /* Perform post-reload loop optimizations.
+ Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "tm.h"
+ #include "bitmap.h"
+ #include "sbitmap.h"
+ #include "rtl.h"
+ #include "hard-reg-set.h"
+ #include "basic-block.h"
+ #include "regs.h"
+ #include "obstack.h"
+ #include "fibheap.h"
+ #include "output.h"
+ #include "target.h"
+ #include "expr.h"
+ #include "flags.h"
+ #include "insn-attr.h"
+ #include "function.h"
+
+ /* Target register optimizations - these are performed after reload. */
+
+ typedef struct tr_def_group_s
+ {
+ struct tr_def_group_s *next;
+ rtx src;
+ struct tr_def_s *members;
+ } *tr_def_group;
+
+ typedef struct tr_user_s
+ {
+ struct tr_user_s *next;
+ basic_block bb;
+ int luid;
+ rtx insn;
+ /* If INSN has a single use of a single branch register, then
+ USE points to it within INSN. If there is more than
+ one branch register use, or the use is in some way ambiguous,
+ then USE is NULL. */
+ rtx use;
+ int n_reaching_defs;
+ int first_reaching_def;
+ char other_use_this_block;
+ } *tr_user;
+
+ /* tr_def structs appear on three lists:
+ 1. A list of all tr_def structures (head is
+ ALL_TR_DEFS, linked by the NEXT field).
+ 2. A list of branch reg definitions per basic block (head is
+ BB_TR_DEFS[i], linked by the NEXT_THIS_BB field).
+ 3. A list of all branch reg definitions belonging to the same
+ group (head is in a TR_DEF_GROUP struct, linked by
+ NEXT_THIS_GROUP field). */
+
+ typedef struct tr_def_s
+ {
+ struct tr_def_s *next_this_bb;
+ struct tr_def_s *next_this_group;
+ basic_block bb;
+ int luid;
+ rtx insn;
+ int tr;
+ int cost;
+ /* For a branch register setting insn that has a constant
+ source (i.e. a label), group links together all the
+ insns with the same source. For other branch register
+ setting insns, group is NULL. */
+ tr_def_group group;
+ tr_user uses;
+ /* If this def has a reaching use which is not a simple use
+ in a branch instruction, then has_ambiguous_use will be true,
+ and we will not attempt to migrate this definition. */
+ char has_ambiguous_use;
+ /* live_range is an approximation to the true live range for this
+ def/use web, because it records the set of blocks that contain
+ the live range. There could be other live ranges for the same
+ branch register in that set of blocks, either in the block
+ containing the def (before the def), or in a block containing
+ a use (after the use). If there are such other live ranges, then
+ other_tr_uses_before_def or other_tr_uses_after_use must be set true
+ as appropriate. */
+ char other_tr_uses_before_def;
+ char other_tr_uses_after_use;
+ bitmap live_range;
+ } *tr_def;
+
+ static int issue_rate;
+
+ static int basic_block_freq PARAMS ((basic_block));
+ static int insn_sets_tr_p PARAMS ((rtx, int, int *));
+ static rtx *find_tr_use PARAMS ((rtx));
+ static int tr_referenced_p PARAMS ((rtx, rtx *));
+ static int find_tr_reference PARAMS ((rtx *, void *));
+ static void find_tr_def_group PARAMS ((tr_def_group *, tr_def));
+ static tr_def add_tr_def PARAMS ((fibheap_t, basic_block, int, rtx,
+ unsigned int, int, tr_def_group *));
+ static tr_user new_tr_user PARAMS ((basic_block, int, rtx));
+ static void dump_hard_reg_set PARAMS ((HARD_REG_SET));
+ static void dump_trs_live PARAMS ((int));
+ static void note_other_use_this_block PARAMS((unsigned int, tr_user));
+ static void compute_defs_uses_and_gen PARAMS((fibheap_t, tr_def *,tr_user *,
+ sbitmap *, sbitmap *, HARD_REG_SET *));
+ static void compute_kill PARAMS((sbitmap *, sbitmap *, HARD_REG_SET *));
+ static void compute_out PARAMS((sbitmap *bb_out, sbitmap *, sbitmap *, int));
+ static void link_tr_uses PARAMS((tr_def *, tr_user *, sbitmap *, sbitmap *,
+ int));
+ static void build_tr_def_use_webs PARAMS ((fibheap_t));
+ static int block_at_edge_of_live_range_p PARAMS ((int, tr_def));
+ static void clear_tr_from_live_range PARAMS ((tr_def def));
+ static void add_tr_to_live_range PARAMS ((tr_def));
+ static void augment_live_range PARAMS ((bitmap, HARD_REG_SET *, basic_block,
+ basic_block));
+ static int choose_tr PARAMS ((HARD_REG_SET));
+ static void combine_tr_defs PARAMS ((tr_def, HARD_REG_SET *));
+ static void tr_def_live_range PARAMS ((tr_def,
+ HARD_REG_SET *));
+ static void move_tr_def PARAMS ((basic_block, int, tr_def, bitmap,
+ HARD_REG_SET *));
+ static int migrate_tr_def PARAMS ((tr_def, int));
+ static void migrate_tr_defs PARAMS ((enum reg_class, int));
+ static int flowgraph_contains_abnormal_edges_p PARAMS ((void));
+ static int can_move_up PARAMS ((basic_block, rtx, int));
+ static void note_tr_set PARAMS ((rtx, rtx, void *));
+
+ /* The following code performs code motion of PT instructions
+ (instructions that set target registers), to move them
+ forward away from the branch instructions and out of loops (or, more
+ generally, from a more frequently executed place to a less
+ frequently executed place).
+ Moving PT instructions further in front of the branch instruction
+ that uses the target register value means that the hardware
+ has a better chance of preloading the instructions at the branch
+ target by the time the branch is reached. This avoids bubbles
+ when a taken branch needs to flush out the pipeline.
+ Moving PT instructions out of loops means they are executed
+ less frequently. */
+
+ /* An obstack to hold the def-use web data structures built up for
+ migrating PT instructions. */
+ static struct obstack migrate_pts_obstack;
+
+ /* Basic block dominator information used when migrating PT instructions */
+ static dominance_info dom;
+
+ /* Array indexed by basic block number, giving the set of registers
+ live in that block. */
+ static HARD_REG_SET *trs_live;
+
+ /* Set of all target registers that we are willing to allocate. */
+ static HARD_REG_SET all_trs;
+
+ /* Provide lower and upper bounds for target register numbers, so that
+ we don't need to search through all the hard registers all the time. */
+ static int first_tr, last_tr;
+
+
+
+ /* Return an estimate of the frequency of execution of block bb.
+ /If we have a profiling count available, we could use it here. */
+ static int
+ basic_block_freq (bb)
+ basic_block bb;
+ {
+ int loop_depth = MIN (10, (bb->loop_depth));
+ return 1 << (loop_depth * 3);
+ }
+
+ static rtx *tr_reference_found;
+
+ /* A subroutine of tr_referenced_p, called through for_each_rtx.
+ PREG is a pointer to an rtx that is to be excluded from the
+ traversal. If we find a reference to a target register anywhere
+ else, return 1, and put a pointer to it into tr_reference_found. */
+ static int
+ find_tr_reference (px, preg)
+ rtx *px;
+ void *preg;
+ {
+ rtx x;
+ int regno, i;
+
+ if (px == preg)
+ return -1;
+ x = *px;
+ if (GET_CODE (x) != REG)
+ return 0;
+ regno = REGNO (x);
+ for (i = HARD_REGNO_NREGS (regno, GET_MODE (x)) - 1; i >= 0; i--)
+ if (TEST_HARD_REG_BIT (all_trs, regno+i))
+ {
+ tr_reference_found = px;
+ return 1;
+ }
+ return -1;
+ }
+
+ /* Return nonzero if X references (sets or reads) any target register.
+ If EXCLUDEP is set, disregard any references within the rtx pointed to
+ by it. If returning nonzero, also set tr_reference_found as above. */
+ static int
+ tr_referenced_p (x, excludep)
+ rtx x;
+ rtx *excludep;
+ {
+ return for_each_rtx (&x, find_tr_reference, excludep);
+ }
+
+ /* Return true if insn is an instruction that sets a target register. */
+ static int
+ insn_sets_tr_p (insn, check_const, regno)
+ rtx insn;
+ int check_const;
+ int *regno;
+ {
+ rtx set;
+
+ if (GET_CODE (insn) == INSN
+ && (set = single_set (insn)))
+ {
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+
+ if (GET_CODE (dest) == SUBREG)
+ dest = XEXP (dest, 0);
+
+ if (GET_CODE (dest) == REG
+ && TEST_HARD_REG_BIT (all_trs, REGNO (dest)))
+ {
+ if (tr_referenced_p (src, NULL))
+ abort();
+ if (!check_const || CONSTANT_P (src))
+ {
+ if (regno)
+ *regno = REGNO (dest);
+ return 1;
+ }
+ }
+ }
+ return 0;
+ }
+
+ /* Find and return a use of a target register within an instruction. */
+ static rtx *
+ find_tr_use (insn)
+ rtx insn;
+ {
+ return tr_referenced_p (insn, NULL) ? tr_reference_found : NULL;
+ }
+
+ /* Find the group that the target register definition DEF belongs
+ to. If no such group exists, create one. Add def to the group. */
+ static void
+ find_tr_def_group (all_tr_def_groups, def)
+ tr_def_group *all_tr_def_groups;
+ tr_def def;
+ {
+ if (insn_sets_tr_p (def->insn, 1, NULL))
+ {
+ tr_def_group this_group;
+ rtx def_src = SET_SRC (single_set (def->insn));
+
+ /* ?? This linear search is an efficiency concern, particularly
+ as the search will almost always fail to find a match. */
+ for (this_group = *all_tr_def_groups;
+ this_group != NULL;
+ this_group = this_group->next)
+ if (rtx_equal_p (def_src, this_group->src))
+ break;
+
+ if (!this_group)
+ {
+ this_group = (tr_def_group)
+ obstack_alloc (&migrate_pts_obstack,
+ sizeof (struct tr_def_group_s));
+ this_group->src = def_src;
+ this_group->members = NULL;
+ this_group->next = *all_tr_def_groups;
+ *all_tr_def_groups = this_group;
+ }
+ def->group = this_group;
+ def->next_this_group = this_group->members;
+ this_group->members = def;
+ }
+ else
+ def->group = NULL;
+ }
+
+ /* Create a new target register definition structure, for a definition in
+ block BB, instruction INSN. Return the new definition. */
+ static tr_def
+ add_tr_def (all_tr_defs, bb, insn_luid, insn,
+ dest_reg,
+ other_tr_uses_before_def,
+ all_tr_def_groups)
+ fibheap_t all_tr_defs;
+ basic_block bb;
+ int insn_luid;
+ rtx insn;
+ unsigned int dest_reg;
+ int other_tr_uses_before_def;
+ tr_def_group *all_tr_def_groups;
+ {
+ tr_def this = (tr_def)
+ obstack_alloc (&migrate_pts_obstack, sizeof (struct tr_def_s));
+ this->bb = bb;
+ this->luid = insn_luid;
+ this->insn = insn;
+ this->tr = dest_reg;
+ this->cost = basic_block_freq (bb);
+ this->has_ambiguous_use = 0;
+ this->other_tr_uses_before_def = other_tr_uses_before_def;
+ this->other_tr_uses_after_use = 0;
+ this->next_this_bb = NULL;
+ this->next_this_group = NULL;
+ this->uses = NULL;
+ this->live_range = NULL;
+ find_tr_def_group (all_tr_def_groups, this);
+
+ fibheap_insert (all_tr_defs, -this->cost, this);
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ "Found target reg definition: sets %u { bb %d, insn %d }%s priority %d\n",
+ dest_reg, bb->index, INSN_UID (insn), (this->group ? "" : ":not const"),
+ this->cost);
+
+ return this;
+ }
+
+ /* Create a new target register user structure, for a use in block BB,
+ instruction INSN. Return the new user. */
+ static tr_user
+ new_tr_user (bb, insn_luid, insn)
+ basic_block bb;
+ int insn_luid;
+ rtx insn;
+ {
+ /* This instruction reads target registers. We need
+ to decide whether we can replace all target register
+ uses easily.
+ */
+ rtx *usep = find_tr_use (PATTERN (insn));
+ rtx use;
+ tr_user user = NULL;
+
+ if (usep)
+ {
+ int unambiguous_single_use;
+
+ /* We want to ensure that USE is the only use of a target
+ register in INSN, so that we know that to rewrite INSN to use
+ a different target register, all we have to do is replace USE. */
+ unambiguous_single_use = !tr_referenced_p (PATTERN (insn), usep);
+ if (!unambiguous_single_use)
+ usep = NULL;
+ }
+ use = usep ? *usep : NULL_RTX;
+ user = (tr_user)
+ obstack_alloc (&migrate_pts_obstack, sizeof (struct tr_user_s));
+ user->bb = bb;
+ user->luid = insn_luid;
+ user->insn = insn;
+ user->use = use;
+ user->other_use_this_block = 0;
+ user->next = NULL;
+ user->n_reaching_defs = 0;
+ user->first_reaching_def = -1;
+
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file, "Uses target reg: { bb %d, insn %d }",
+ bb->index, INSN_UID (insn));
+
+ if (user->use)
+ fprintf (rtl_dump_file, ": unambiguous use of reg %d\n",
+ REGNO (user->use));
+ }
+
+ return user;
+ }
+
+ /* Write a hard reg set contains to the dump file. */
+ static void
+ dump_hard_reg_set (s)
+ HARD_REG_SET s;
+ {
+ int reg;
+ for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
+ if (TEST_HARD_REG_BIT (s, reg))
+ fprintf (rtl_dump_file, " %d", reg);
+ }
+
+ /* Write the set of target regs live in block BB to the dump file. */
+ static void
+ dump_trs_live (bb)
+ int bb;
+ {
+ fprintf (rtl_dump_file, "BB%d live:", bb);
+ dump_hard_reg_set (trs_live[bb]);
+ fprintf (rtl_dump_file, "\n");
+ }
+
+ static void
+ note_other_use_this_block (regno, users_this_bb)
+ unsigned int regno;
+ tr_user users_this_bb;
+ {
+ tr_user user;
+
+ for (user = users_this_bb; user != NULL; user = user->next)
+ if (user->use && REGNO (user->use) == regno)
+ user->other_use_this_block = 1;
+ }
+
+ typedef struct {
+ tr_user users_this_bb;
+ HARD_REG_SET trs_written_in_block;
+ HARD_REG_SET trs_live_in_block;
+ sbitmap bb_gen;
+ sbitmap *tr_defset;
+ } defs_uses_info;
+
+ static void
+ note_tr_set (dest, set, data)
+ rtx dest, set ATTRIBUTE_UNUSED;
+ void *data;
+ {
+ defs_uses_info *info = data;
+ int regno, end_regno;
+
+ if (GET_CODE (dest) != REG)
+ return;
+ regno = REGNO (dest);
+ end_regno = regno + HARD_REGNO_NREGS (regno, GET_MODE (dest));
+ for (; regno < end_regno; regno++)
+ if (TEST_HARD_REG_BIT (all_trs, regno))
+ {
+ note_other_use_this_block (regno, info->users_this_bb);
+ SET_HARD_REG_BIT (info->trs_written_in_block, regno);
+ SET_HARD_REG_BIT (info->trs_live_in_block, regno);
+ sbitmap_difference (info->bb_gen, info->bb_gen,
+ info->tr_defset[regno - first_tr]);
+ }
+ }
+
+ static void
+ compute_defs_uses_and_gen (all_tr_defs, def_array, use_array, tr_defset,
+ bb_gen, trs_written)
+ fibheap_t all_tr_defs;
+ tr_def *def_array;
+ tr_user *use_array;
+ sbitmap *tr_defset;
+ sbitmap *bb_gen;
+ HARD_REG_SET *trs_written;
+ {
+ /* Scan the code building up the set of all defs and all uses.
+ For each target register, build the set of defs of that register.
+ For each block, calculate the set of target registers
+ written in that block.
+ Also calculate the set of trs ever live in that block.
+ */
+ int i;
+ int insn_luid = 0;
+ tr_def_group all_tr_def_groups = NULL;
+ defs_uses_info info;
+
+ sbitmap_vector_zero (bb_gen, n_basic_blocks);
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ basic_block bb = BASIC_BLOCK (i);
+ int reg;
+ tr_def defs_this_bb = NULL;
+ rtx insn;
+ rtx last;
+
+ info.users_this_bb = NULL;
+ info.bb_gen = bb_gen[i];
+ info.tr_defset = tr_defset;
+
+ CLEAR_HARD_REG_SET (info.trs_live_in_block);
+ CLEAR_HARD_REG_SET (info.trs_written_in_block);
+ for (reg = first_tr; reg <= last_tr; reg++)
+ if (TEST_HARD_REG_BIT (all_trs, reg)
+ && REGNO_REG_SET_P (bb->global_live_at_start, reg))
+ SET_HARD_REG_BIT (info.trs_live_in_block, reg);
+
+ for (insn = bb->head, last = NEXT_INSN (bb->end);
+ insn != last;
+ insn = NEXT_INSN (insn), insn_luid++)
+ {
+ if (INSN_P (insn))
+ {
+ int regno;
+ int insn_uid = INSN_UID (insn);
+
+ if (insn_sets_tr_p (insn, 0, ®no))
+ {
+ tr_def def = add_tr_def (
+ all_tr_defs, bb, insn_luid, insn, regno,
+ TEST_HARD_REG_BIT (info.trs_live_in_block, regno),
+ &all_tr_def_groups);
+
+ def_array[insn_uid] = def;
+ SET_HARD_REG_BIT (info.trs_written_in_block, regno);
+ SET_HARD_REG_BIT (info.trs_live_in_block, regno);
+ sbitmap_difference (bb_gen[i], bb_gen[i],
+ tr_defset[regno - first_tr]);
+ SET_BIT (bb_gen[i], insn_uid);
+ def->next_this_bb = defs_this_bb;
+ defs_this_bb = def;
+ SET_BIT (tr_defset[regno - first_tr], insn_uid);
+ note_other_use_this_block (regno, info.users_this_bb);
+ }
+ else
+ {
+ if (tr_referenced_p (PATTERN (insn), NULL))
+ {
+ tr_user user = new_tr_user (bb, insn_luid, insn);
+
+ use_array[insn_uid] = user;
+ if (user->use)
+ SET_HARD_REG_BIT (info.trs_live_in_block,
+ REGNO (user->use));
+ else
+ {
+ int reg;
+ for (reg = first_tr; reg <= last_tr; reg++)
+ if (TEST_HARD_REG_BIT (all_trs, reg)
+ && refers_to_regno_p (reg, reg + 1, user->insn,
+ NULL))
+ {
+ note_other_use_this_block (reg,
+ info.users_this_bb);
+ SET_HARD_REG_BIT (info.trs_live_in_block, reg);
+ }
+ note_stores (PATTERN (insn), note_tr_set, &info);
+ }
+ user->next = info.users_this_bb;
+ info.users_this_bb = user;
+ }
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ for (regno = first_tr; regno <= last_tr; regno++)
+ if (TEST_HARD_REG_BIT (call_used_reg_set, regno))
+ note_tr_set (regno_reg_rtx[regno], NULL_RTX, &info);
+ }
+ }
+ }
+ }
+
+ COPY_HARD_REG_SET (trs_live[i], info.trs_live_in_block);
+ COPY_HARD_REG_SET (trs_written[i], info.trs_written_in_block);
+ if (rtl_dump_file)
+ dump_trs_live(i);
+ }
+ }
+
+ static void
+ compute_kill (bb_kill, tr_defset, trs_written)
+ sbitmap *bb_kill;
+ sbitmap *tr_defset;
+ HARD_REG_SET *trs_written;
+ {
+ int i;
+ int regno;
+
+ /* For each basic block, form the set BB_KILL - the set
+ of definitions that the block kills. */
+ sbitmap_vector_zero (bb_kill, n_basic_blocks);
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ for (regno = first_tr; regno <= last_tr; regno++)
+ if (TEST_HARD_REG_BIT (all_trs, regno)
+ && TEST_HARD_REG_BIT (trs_written[i], regno))
+ sbitmap_a_or_b (bb_kill[i], bb_kill[i],
+ tr_defset[regno - first_tr]);
+ }
+ }
+
+ static void
+ compute_out (bb_out, bb_gen, bb_kill, max_uid)
+ sbitmap *bb_out;
+ sbitmap *bb_gen;
+ sbitmap *bb_kill;
+ int max_uid;
+ {
+ /* Perform iterative dataflow:
+ Initially, for all blocks, BB_OUT = BB_GEN.
+ For each block,
+ BB_IN = union over predecessors of BB_OUT(pred)
+ BB_OUT = (BB_IN - BB_KILL) + BB_GEN
+ Iterate until the bb_out sets stop growing. */
+ int i;
+ int changed;
+ sbitmap bb_in = sbitmap_alloc (max_uid);
+
+ for (i = 0; i < n_basic_blocks; i++)
+ sbitmap_copy (bb_out[i], bb_gen[i]);
+
+ changed = 1;
+ while (changed)
+ {
+ changed = 0;
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ sbitmap_union_of_preds (bb_in, bb_out, i);
+ changed |= sbitmap_union_of_diff_cg (bb_out[i], bb_gen[i],
+ bb_in, bb_kill[i]);
+ }
+ }
+ sbitmap_free (bb_in);
+ }
+
+ static void
+ link_tr_uses (def_array, use_array, bb_out, tr_defset, max_uid)
+ tr_def *def_array;
+ tr_user *use_array;
+ sbitmap *bb_out;
+ sbitmap *tr_defset;
+ int max_uid;
+ {
+ int i;
+ sbitmap reaching_defs = sbitmap_alloc (max_uid);
+
+ /* Link uses to the uses lists of all of their reaching defs.
+ Count up the number of reaching defs of each use. */
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ basic_block bb = BASIC_BLOCK (i);
+ rtx insn;
+ rtx last;
+
+ sbitmap_union_of_preds (reaching_defs, bb_out, i);
+ for (insn = bb->head, last = NEXT_INSN (bb->end);
+ insn != last;
+ insn = NEXT_INSN (insn))
+ {
+ if (INSN_P (insn))
+ {
+ int insn_uid = INSN_UID (insn);
+
+ tr_def def = def_array[insn_uid];
+ tr_user user = use_array[insn_uid];
+ if (def != NULL)
+ {
+ /* Remove all reaching defs of regno except
+ for this one. */
+ sbitmap_difference (reaching_defs, reaching_defs,
+ tr_defset[def->tr - first_tr]);
+ SET_BIT(reaching_defs, insn_uid);
+ }
+
+ if (user != NULL)
+ {
+ /* Find all the reaching defs for this use */
+ sbitmap reaching_defs_of_reg = sbitmap_alloc(max_uid);
+ int uid;
+
+ if (user->use)
+ sbitmap_a_and_b (
+ reaching_defs_of_reg,
+ reaching_defs,
+ tr_defset[REGNO (user->use) - first_tr]);
+ else
+ {
+ int reg;
+
+ sbitmap_zero (reaching_defs_of_reg);
+ for (reg = first_tr; reg <= last_tr; reg++)
+ if (TEST_HARD_REG_BIT (all_trs, reg)
+ && refers_to_regno_p (reg, reg + 1, user->insn,
+ NULL))
+ sbitmap_a_or_b_and_c (reaching_defs_of_reg,
+ reaching_defs_of_reg,
+ reaching_defs,
+ tr_defset[reg - first_tr]);
+ }
+ EXECUTE_IF_SET_IN_SBITMAP (reaching_defs_of_reg, 0, uid,
+ {
+ tr_def def = def_array[uid];
+
+ /* We now know that def reaches user */
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ "Def in insn %d reaches use in insn %d\n",
+ uid, insn_uid);
+
+ user->n_reaching_defs++;
+ if (!user->use)
+ def->has_ambiguous_use = 1;
+ if (user->first_reaching_def != -1)
+ { /* There is more than one reaching def. This is
+ a rare case, so just give up on this def/use
+ web when it occurs. */
+ def->has_ambiguous_use = 1;
+ def_array[user->first_reaching_def]
+ ->has_ambiguous_use = 1;
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ "(use %d has multiple reaching defs)\n",
+ insn_uid);
+ }
+ else
+ user->first_reaching_def = uid;
+ if (user->other_use_this_block)
+ def->other_tr_uses_after_use = 1;
+ user->next = def->uses;
+ def->uses = user;
+ });
+ sbitmap_free (reaching_defs_of_reg);
+ }
+
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ int regno;
+
+ for (regno = first_tr; regno <= last_tr; regno++)
+ if (TEST_HARD_REG_BIT (all_trs, regno)
+ && TEST_HARD_REG_BIT (call_used_reg_set, regno))
+ sbitmap_difference (reaching_defs, reaching_defs,
+ tr_defset[regno - first_tr]);
+ }
+ }
+ }
+ }
+ sbitmap_free (reaching_defs);
+ }
+
+ static void
+ build_tr_def_use_webs (all_tr_defs)
+ fibheap_t all_tr_defs;
+ {
+ const int max_uid = get_max_uid ();
+ tr_def *def_array = xcalloc (max_uid, sizeof (tr_def));
+ tr_user *use_array = xcalloc (max_uid, sizeof (tr_user));
+ sbitmap *tr_defset = sbitmap_vector_alloc (
+ (last_tr - first_tr) + 1, max_uid);
+ sbitmap *bb_gen = sbitmap_vector_alloc (n_basic_blocks, max_uid);
+ HARD_REG_SET *trs_written = (HARD_REG_SET *) xcalloc (
+ n_basic_blocks, sizeof (HARD_REG_SET));
+ sbitmap *bb_kill;
+ sbitmap *bb_out;
+
+ sbitmap_vector_zero (tr_defset, (last_tr - first_tr) + 1);
+
+ compute_defs_uses_and_gen (all_tr_defs, def_array, use_array, tr_defset,
+ bb_gen, trs_written);
+
+ bb_kill = sbitmap_vector_alloc (n_basic_blocks, max_uid);
+ compute_kill (bb_kill, tr_defset, trs_written);
+ free (trs_written);
+
+ bb_out = sbitmap_vector_alloc (n_basic_blocks, max_uid);
+ compute_out (bb_out, bb_gen, bb_kill, max_uid);
+
+ sbitmap_vector_free (bb_gen);
+ sbitmap_vector_free (bb_kill);
+
+ link_tr_uses (def_array, use_array, bb_out, tr_defset, max_uid);
+
+ sbitmap_vector_free (bb_out);
+ sbitmap_vector_free (tr_defset);
+ free (use_array);
+ free (def_array);
+ }
+
+ /* Return true if basic block BB contains the start or end of the
+ live range of the definition DEF, AND there are other live
+ ranges of the same target register that include BB. */
+ static int
+ block_at_edge_of_live_range_p (bb, def)
+ int bb;
+ tr_def def;
+ {
+ if (def->other_tr_uses_before_def && BASIC_BLOCK (bb) == def->bb)
+ return 1;
+ else if (def->other_tr_uses_after_use)
+ {
+ tr_user user;
+ for (user = def->uses; user != NULL; user = user->next)
+ if (BASIC_BLOCK (bb) == user->bb)
+ return 1;
+ }
+ return 0;
+ }
+
+ /* We are removing the def/use web DEF. The target register
+ used in this web is therefore no longer live in the live range
+ of this web, so remove it from the live set of all basic blocks
+ in the live range of the web.
+ Blocks at the boundary of the live range may contain other live
+ ranges for the same target register, so we have to be careful
+ to remove the target register from the live set of these blocks
+ only if they do not contain other live ranges for the same register. */
+ static void
+ clear_tr_from_live_range (def)
+ tr_def def;
+ {
+ int bb;
+
+ EXECUTE_IF_SET_IN_BITMAP
+ (def->live_range, 0, bb,
+ {
+ if ((!def->other_tr_uses_before_def
+ && !def->other_tr_uses_after_use)
+ || !block_at_edge_of_live_range_p (bb, def))
+ {
+ CLEAR_HARD_REG_BIT (trs_live[bb], def->tr);
+ if (rtl_dump_file)
+ dump_trs_live (bb);
+ }
+ });
+ }
+
+
+ /* We are adding the def/use web DEF. Add the target register used
+ in this web to the live set of all of the basic blocks that contain
+ the live range of the web. */
+ static void
+ add_tr_to_live_range (def)
+ tr_def def;
+ {
+ int bb;
+ EXECUTE_IF_SET_IN_BITMAP
+ (def->live_range, 0, bb,
+ {
+ SET_HARD_REG_BIT (trs_live[bb], def->tr);
+ if (rtl_dump_file)
+ dump_trs_live (bb);
+ });
+ }
+
+ /* Update a live range to contain the basic block NEW_BLOCK, and all
+ blocks on paths between the existing live range and NEW_BLOCK.
+ HEAD is a block contained in the existing live range that dominates
+ all other blocks in the existing live range.
+ Also add to the set TRS_LIVE_IN_RANGE all target registers that
+ are live in the blocks that we add to the live range.
+ It is a precondition that either NEW_BLOCK dominates HEAD,or
+ HEAD dom NEW_BLOCK. This is used to speed up the
+ implementation of this function. */
+ static void
+ augment_live_range (live_range, trs_live_in_range, head_bb, new_bb)
+ bitmap live_range;
+ HARD_REG_SET *trs_live_in_range;
+ basic_block head_bb;
+ basic_block new_bb;
+ {
+ basic_block *worklist, *tos;
+
+ tos = worklist =
+ (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
+
+ if (dominated_by_p (dom, new_bb, head_bb))
+ *tos++ = new_bb;
+ else if (dominated_by_p (dom, head_bb, new_bb))
+ {
+ edge e;
+ int new_block = new_bb->index;
+
+ bitmap_set_bit (live_range, new_block);
+ IOR_HARD_REG_SET (*trs_live_in_range, trs_live[new_block]);
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file,
+ "Adding block %d to live range\n", new_block);
+ fprintf (rtl_dump_file,"Now live trs are ");
+ dump_hard_reg_set (*trs_live_in_range);
+ fprintf (rtl_dump_file, "\n");
+ }
+ for (e = head_bb->pred; e; e = e->pred_next)
+ *tos++ = e->src;
+ }
+ else
+ abort();
+
+ while (tos != worklist)
+ {
+ basic_block bb = *--tos;
+ if (!bitmap_bit_p (live_range, bb->index))
+ {
+ edge e;
+
+ bitmap_set_bit (live_range, bb->index);
+ IOR_HARD_REG_SET (*trs_live_in_range,
+ trs_live[bb->index]);
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file,
+ "Adding block %d to live range\n", bb->index);
+ fprintf (rtl_dump_file,"Now live trs are ");
+ dump_hard_reg_set (*trs_live_in_range);
+ fprintf (rtl_dump_file, "\n");
+ }
+
+ for (e = bb->pred; e != NULL; e = e->pred_next)
+ {
+ basic_block pred = e->src;
+ if (!bitmap_bit_p (live_range, pred->index))
+ *tos++ = pred;
+ }
+ }
+ }
+
+ free (worklist);
+ }
+
+ /* Return the most desirable target register that is not in
+ the set USED_TRS. */
+ static int
+ choose_tr (used_trs)
+ HARD_REG_SET used_trs;
+ {
+ int i;
+ GO_IF_HARD_REG_SUBSET (all_trs, used_trs, give_up);
+
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ {
+ #ifdef REG_ALLOC_ORDER
+ int regno = reg_alloc_order[i];
+ #else
+ int regno = i;
+ #endif
+ if (TEST_HARD_REG_BIT (all_trs, regno)
+ && !TEST_HARD_REG_BIT (used_trs, regno))
+ return regno;
+ }
+ give_up:
+ return -1;
+ }
+
+ /* Calculate the set of basic blocks that contain the live range of
+ the def/use web DEF.
+ Also calculate the set of target registers that are live at time
+ in this live range, but ignore the live range represented by DEF
+ when calculating this set. */
+ static void
+ tr_def_live_range (def, trs_live_in_range)
+ tr_def def;
+ HARD_REG_SET *trs_live_in_range;
+ {
+ if (!def->live_range)
+ {
+ tr_user user;
+
+ def->live_range = BITMAP_XMALLOC ();
+
+ bitmap_set_bit (def->live_range, def->bb->index);
+ COPY_HARD_REG_SET (*trs_live_in_range, trs_live[def->bb->index]);
+
+ for (user = def->uses; user != NULL; user = user->next)
+ augment_live_range (def->live_range, trs_live_in_range,
+ def->bb, user->bb);
+ }
+ else
+ {
+ /* def->live_range is accurate, but we need to recompute
+ the set of target registers live over it, because migration
+ of other PT instructions may have affected it.
+ */
+ int bb;
+
+ CLEAR_HARD_REG_SET (*trs_live_in_range);
+ EXECUTE_IF_SET_IN_BITMAP
+ (def->live_range, 0, bb,
+ {
+ IOR_HARD_REG_SET (*trs_live_in_range,
+ trs_live[bb]);
+ });
+ }
+ if (!def->other_tr_uses_before_def &&
+ !def->other_tr_uses_after_use)
+ CLEAR_HARD_REG_BIT (*trs_live_in_range, def->tr);
+ }
+
+ /* Merge into the def/use web DEF any other def/use webs in the same
+ group that are dominated by DEF, provided that there is a target
+ register available to allocate to the merged web. */
+ static void
+ combine_tr_defs (def, trs_live_in_range)
+ tr_def def;
+ HARD_REG_SET *trs_live_in_range;
+ {
+ tr_def other_def;
+
+ for (other_def = def->group->members;
+ other_def != NULL;
+ other_def = other_def->next_this_group)
+ {
+ if (other_def != def
+ && other_def->uses != NULL
+ && ! other_def->has_ambiguous_use
+ && dominated_by_p (dom, other_def->bb, def->bb))
+ {
+ /* def->bb dominates the other def, so def and other_def could
+ be combined. */
+ /* Merge their live ranges, and get the set of
+ target registers live over the merged range. */
+ int tr;
+ HARD_REG_SET combined_trs_live;
+ bitmap combined_live_range = BITMAP_XMALLOC ();
+ tr_user user;
+
+ if (other_def->live_range == NULL)
+ {
+ HARD_REG_SET dummy_trs_live_in_range;
+ tr_def_live_range (other_def, &dummy_trs_live_in_range);
+ }
+ COPY_HARD_REG_SET (combined_trs_live, *trs_live_in_range);
+ bitmap_copy (combined_live_range, def->live_range);
+
+ for (user = other_def->uses; user != NULL; user = user->next)
+ augment_live_range (combined_live_range, &combined_trs_live,
+ def->bb, user->bb);
+
+ tr = choose_tr (combined_trs_live);
+ if (tr != -1)
+ {
+ /* We can combine them */
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ "Combining def in insn %d with def in insn %d\n",
+ INSN_UID (other_def->insn), INSN_UID (def->insn));
+
+ def->tr = tr;
+ user = other_def->uses;
+ while (user != NULL)
+ {
+ tr_user next = user->next;
+
+ user->next = def->uses;
+ def->uses = user;
+ user = next;
+ }
+ /* Combining def/use webs can make target registers live
+ after uses where they previously were not. This means
+ some REG_DEAD notes may no longer be correct. We could
+ be more precise about this if we looked at the combined
+ live range, but here I just delete any REG_DEAD notes
+ in case they are no longer correct. */
+ for (user = def->uses; user != NULL; user = user->next)
+ remove_note (user->insn,
+ find_regno_note (user->insn, REG_DEAD,
+ REGNO (user->use)));
+ clear_tr_from_live_range (other_def);
+ other_def->uses = NULL;
+ bitmap_copy (def->live_range, combined_live_range);
+ if (other_def->other_tr_uses_after_use)
+ def->other_tr_uses_after_use = 1;
+ COPY_HARD_REG_SET (*trs_live_in_range, combined_trs_live);
+
+ /* Delete the old target register initialization */
+ delete_insn (other_def->insn);
+
+ }
+ BITMAP_XFREE (combined_live_range);
+ }
+ }
+ }
+
+ /* Move the definition DEF from its current position to basic
+ block NEW_DEF_BB, and modify it to use target register TR.
+ Delete the old defining insn, and insert a new one in NEW_DEF_BB.
+ Update all reaching uses of DEF in the RTL to use TR.
+ If this new position means that other defs in the
+ same group can be combined with DEF then combine them. */
+ static void
+ move_tr_def (new_def_bb, tr, def, live_range, trs_live_in_range)
+ basic_block new_def_bb;
+ int tr;
+ tr_def def;
+ bitmap live_range;
+ HARD_REG_SET *trs_live_in_range;
+ {
+ /* We can move the instruction.
+ Set a target register in block NEW_DEF_BB to the value
+ needed for this target register definition.
+ Replace all uses of the old target register definition by
+ uses of the new definition. Delete the old definition. */
+ basic_block b = new_def_bb;
+ rtx insp = b->head;
+ rtx old_insn = def->insn;
+ rtx src;
+ rtx tr_rtx;
+ rtx new_insn;
+ enum machine_mode tr_mode;
+ tr_user user;
+ rtx set;
+
+ if (rtl_dump_file)
+ fprintf(rtl_dump_file, "migrating to basic block %d, using reg %d\n",
+ new_def_bb->index, tr);
+
+ clear_tr_from_live_range (def);
+ def->tr = tr;
+ def->bb = new_def_bb;
+ def->luid = 0;
+ def->cost = basic_block_freq (new_def_bb);
+ def->other_tr_uses_before_def = 0;
+ bitmap_copy (def->live_range, live_range);
+ combine_tr_defs (def, trs_live_in_range);
+ tr = def->tr;
+ add_tr_to_live_range (def);
+ if (GET_CODE (insp) == CODE_LABEL)
+ insp = NEXT_INSN (insp);
+ /* N.B.: insp is expected to be NOTE_INSN_BASIC_BLOCK now. Some
+ optimizations can result in insp being both first and last insn of
+ its basic block. */
+ /* ?? some assertions to check that insp is sensible? */
+
+ set = single_set (old_insn);
+ src = SET_SRC (set);
+ tr_mode = GET_MODE (SET_DEST (set));
+ tr_rtx = gen_rtx (REG, tr_mode, tr);
+
+ new_insn = gen_move_insn (tr_rtx, src);
+
+ /* Insert target register initialization at head of basic block. */
+ def->insn = emit_insn_after (new_insn, insp);
+
+ regs_ever_live[tr] = 1;
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, "New pt is insn %d, inserted after insn %d\n",
+ INSN_UID (def->insn), INSN_UID (insp));
+
+ /* Delete the old target register initialization */
+ delete_insn (old_insn);
+
+ /* Replace each use of the old target register by a use of the new target
+ register. */
+ for (user = def->uses; user != NULL; user = user->next)
+ {
+ /* Some extra work here to ensure consistent modes, because
+ it seems that a target register REG rtx can be given a different
+ mode depending on the context (surely that should not be
+ the case?). */
+ rtx replacement_rtx;
+ if (GET_MODE (user->use) == GET_MODE (tr_rtx)
+ || GET_MODE (user->use) == VOIDmode)
+ replacement_rtx = tr_rtx;
+ else
+ replacement_rtx = gen_rtx (REG, GET_MODE (user->use), tr);
+ replace_rtx (user->insn, user->use, replacement_rtx);
+ user->use = replacement_rtx;
+ }
+ }
+
+ /* We anticipate intra-block scheduling to be done. See if INSN could move
+ up within BB by N_INSNS. */
+ static int
+ can_move_up (bb, insn, n_insns)
+ basic_block bb;
+ rtx insn;
+ int n_insns;
+ {
+ while (insn != bb->head && n_insns > 0)
+ {
+ insn = PREV_INSN (insn);
+ /* ??? What if we have an anti-dependency that actually prevents the
+ scheduler from doing the move? We'd like to re-allocate the register,
+ but not necessarily put the load into another basic block. */
+ if (INSN_P (insn))
+ n_insns--;
+ }
+ return n_insns <= 0;
+ }
+
+ /* Attempt to migrate the target register definition DEF to an
+ earlier point in the flowgraph.
+
+ It is a precondition of this function that DEF is migratable:
+ i.e. it has a constant source, and all uses are unambiguous.
+
+ Only migrations that reduce the cost of DEF will be made.
+ MIN_COST is the lower bound on the cost of the DEF after migration.
+ If we migrate DEF so that its cost falls below MIN_COST,
+ then we do not attempt to migrate further. The idea is that
+ we migrate defintions in a priority order based on their cost,
+ when the cost of this definition falls below MIN_COST, then
+ there is another definition with cost == MIN_COST which now
+ has a higher priority than this definition.
+
+ Return non-zero if there may be benefit from attempting to
+ migrate this DEF further (i.e. we have reduced the cost below
+ MIN_COST, but we may be able to reduce it further).
+ Return zero if no further migration is possible. */
+ static int
+ migrate_tr_def (def, min_cost)
+ tr_def def;
+ int min_cost;
+ {
+ bitmap live_range;
+ HARD_REG_SET trs_live_in_range;
+ int tr_used_near_def = 0;
+ int def_basic_block_freq;
+ basic_block try;
+ int give_up = 0;
+ int def_moved = 0;
+ tr_user user;
+ int def_latency;
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ "Attempting to migrate pt from insn %d (cost = %d, min_cost = %d) ... ",
+ INSN_UID (def->insn), def->cost, min_cost);
+
+ if (!def->group || def->has_ambiguous_use)
+ /* These defs are not migratable */
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, "it's not migratable\n");
+ return 0;
+ }
+
+ if (!def->uses)
+ /* We have combined this def with another in the same group, so
+ no need to consider it further.
+ */
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, "it's already combined with another pt\n");
+ return 0;
+ }
+
+ tr_def_live_range (def, &trs_live_in_range);
+ live_range = BITMAP_XMALLOC ();
+ bitmap_copy (live_range, def->live_range);
+
+ if ((*targetm.sched.use_dfa_pipeline_interface) ())
+ def_latency = insn_default_latency (def->insn);
+ else
+ def_latency = result_ready_cost (def->insn);
+ def_latency *= issue_rate;
+
+ for (user = def->uses; user != NULL; user = user->next)
+ {
+ if (user->bb == def->bb
+ && user->luid > def->luid
+ && (def->luid + def_latency) > user->luid
+ && ! can_move_up (def->bb, def->insn,
+ (def->luid + def_latency) - user->luid))
+ {
+ tr_used_near_def = 1;
+ break;
+ }
+ }
+
+ def_basic_block_freq = basic_block_freq (def->bb);
+
+ for (try = get_immediate_dominator (dom, def->bb);
+ !give_up && try && try != ENTRY_BLOCK_PTR && def->cost >= min_cost;
+ try = get_immediate_dominator (dom, try))
+ {
+ /* Try to move the instruction that sets the target register into
+ basic block TRY. */
+ int try_freq = basic_block_freq (try);
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, "trying block %d ...", try->index);
+
+ if (try_freq < def_basic_block_freq
+ || (try_freq == def_basic_block_freq && tr_used_near_def))
+ {
+ int tr;
+ augment_live_range (live_range, &trs_live_in_range, def->bb, try);
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file, "Now trs live in range are: ");
+ dump_hard_reg_set (trs_live_in_range);
+ fprintf (rtl_dump_file, "\n");
+ }
+ tr = choose_tr (trs_live_in_range);
+ if (tr != -1)
+ {
+ move_tr_def (try, tr, def, live_range, &trs_live_in_range);
+ bitmap_copy(live_range, def->live_range);
+ tr_used_near_def = 0;
+ def_moved = 1;
+ def_basic_block_freq = basic_block_freq (def->bb);
+ }
+ else
+ {
+ /* There are no free target registers available to move
+ this far forward, so give up */
+ give_up = 1;
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ "giving up because there are no free target registers\n");
+ }
+
+ }
+ }
+ if (!def_moved)
+ {
+ give_up = 1;
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, "failed to move\n");
+ }
+ BITMAP_XFREE (live_range);
+ return !give_up;
+ }
+
+ /* Attempt to move instructions that set target registers earlier
+ in the flowgraph, away from their corresponding uses. */
+ static void
+ migrate_tr_defs (tr_class, allow_callee_save)
+ enum reg_class tr_class;
+ int allow_callee_save;
+ {
+ fibheap_t all_tr_defs = fibheap_new ();
+ int reg;
+
+ gcc_obstack_init (&migrate_pts_obstack);
+ if (rtl_dump_file)
+ {
+ int i;
+
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ basic_block bb = BASIC_BLOCK (i);
+ fprintf(rtl_dump_file,
+ "Basic block %d: count = %lld loop-depth = %d idom = %d\n",
+ i, bb->count, bb->loop_depth,
+ get_immediate_dominator (dom, bb)->index);
+ }
+ }
+
+ CLEAR_HARD_REG_SET (all_trs);
+ for (first_tr = -1, reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int) tr_class], reg)
+ && (allow_callee_save || call_used_regs[reg] || regs_ever_live[reg]))
+ {
+ SET_HARD_REG_BIT (all_trs, reg);
+ last_tr = reg;
+ if (first_tr < 0)
+ first_tr = reg;
+ }
+
+ trs_live =
+ (HARD_REG_SET *) xcalloc (n_basic_blocks, sizeof (HARD_REG_SET));
+
+ build_tr_def_use_webs (all_tr_defs);
+
+ while (!fibheap_empty (all_tr_defs))
+ {
+ tr_def def =
+ (tr_def) fibheap_extract_min (all_tr_defs);
+ int min_cost = -fibheap_min_key (all_tr_defs);
+ if (migrate_tr_def (def, min_cost))
+ {
+ fibheap_insert (all_tr_defs, -def->cost, (void *) def);
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file,
+ "Putting insn %d back on queue with priority %d\n",
+ INSN_UID (def->insn), def->cost);
+ }
+ }
+ else
+ {
+ if (def->live_range)
+ BITMAP_XFREE (def->live_range);
+ }
+ }
+
+ free (trs_live);
+ obstack_free (&migrate_pts_obstack, NULL);
+ fibheap_delete (all_tr_defs);
+ }
+
+ static int
+ flowgraph_contains_abnormal_edges_p ()
+ {
+ int i;
+ edge e;
+
+ for (i = 0; i < n_basic_blocks; ++i)
+ for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+ if (e->flags & EDGE_ABNORMAL)
+ return 1;
+ return 0;
+ }
+
+ void
+ target_registers_optimize (insns, after_prologue_epilogue_gen)
+ rtx insns;
+ bool after_prologue_epilogue_gen;
+ {
+ enum reg_class class = (*targetm.optimize_target_register_class) ();
+ if (class != NO_REGS)
+ {
+ /* Initialize issue_rate. */
+ if (targetm.sched.issue_rate)
+ issue_rate = (*targetm.sched.issue_rate) ();
+ else
+ issue_rate = 1;
+
+ /* Build the CFG for migrate_tr_defs. */
+ #if 1
+ /* This may or may not be needed, depending on where we
+ run this phase. */
+ cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
+ #endif
+ if (flowgraph_contains_abnormal_edges_p ())
+ /* If there are abnormal edges, we do not attempt
+ to optimize target register placement. The only
+ reason for this is that subsequent runs of
+ find_basic_blocks() get confused when an assignment
+ of a label to a target register is not adjacent to a
+ branch that uses that target register, and
+ introduce a whole lot _more_ abnormal edges. In
+ itself, this is pessimistic but harmless, but it can
+ introduce apparent control-paths that previously didn't
+ exist, causing basic block live-at-start sets to change,
+ and that causes an assertion in flow.c
+ (verify_local_live_at_start) to fail.
+ Abnormal edges are rare, so we just give up on target
+ register optimization for the current function if we see
+ any. */
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ "Abandoning target register optimization, because there are abnormal edges");
+ return;
+ }
+
+ life_analysis (insns, NULL, 0);
+
+ /* Dominator info is also needed for migrate_pts. */
+ dom = calculate_dominance_info (CDI_DOMINATORS);
+ migrate_tr_defs (class,
+ ((*targetm.optimize_target_register_callee_saved)
+ (after_prologue_epilogue_gen)));
+
+ free_dominance_info (dom);
+
+ update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
+ PROP_DEATH_NOTES | PROP_REG_INFO);
+ }
+ }