This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
rewrite ia64 call patterns
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 13 Mar 2003 01:04:49 -0800
- Subject: rewrite ia64 call patterns
Solves a theoretic problem with calls vs exceptions. We don't have a
test case that fails, but the problem is there nonetheless; I ran into
this on alpha, and David saw it on PA. The same solution is implemented
here -- split the call patterns after reload.
With a bit more work I think we'll be able to cse indirect calls to direct
calls. Which doesn't happen at the moment due to the fact that we _still_
expose too many architecture details too early.
r~
* emit-rtl.c (try_split): Handle 1-1 splits of call insns properly.
* config/ia64/ia64.c (TARGET_FUNCTION_OK_FOR_SIBCALL): New.
(ia64_gp_save_reg): Remove.
(struct ia64_frame_info): Move to the beginning of the file;
add reg_save_gp.
(ia64_expand_call): Rearrange for new call patterns.
(ia64_reload_gp): New.
(ia64_split_call): New.
(ia64_compute_frame_size): Allocate reg_save_gp.
(ia64_expand_prologue): Save reg_save_gp.
(ia64_expand_epilogue): Don't restore gp.
(ia64_hard_regno_rename_ok): Remove R4 hack.
(ia64_function_ok_for_sibcall): New.
(ia64_output_mi_thunk): Set reload_completed, no_new_pseudos;
call try_split on sibcall pattern.
* config/ia64/ia64-protos.h: Update.
* config/ia64/ia64.md (call_nogp, call_value_nogp, sibcall_nogp):
Rename from nopic versions. Confiscate 2nd argument to call as
a marker.
(call_pic, call_value_pic, sibcall_pic): Remove.
(call_gp, call_value_gp, sibcall_gp): New.
(builtin_setjmp_setup): Remove.
(builtin_setjmp_receiver): Call ia64_reload_gp.
Index: gcc/emit-rtl.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/emit-rtl.c,v
retrieving revision 1.315
diff -u -p -r1.315 emit-rtl.c
--- gcc/emit-rtl.c 8 Mar 2003 19:24:02 -0000 1.315
+++ gcc/emit-rtl.c 13 Mar 2003 08:52:21 -0000
@@ -3375,6 +3375,8 @@ try_split (pat, trial, last)
rtx tem;
rtx note, seq;
int probability;
+ rtx insn_last, insn;
+ int njumps = 0;
if (any_condjump_p (trial)
&& (note = find_reg_note (trial, REG_BR_PROB, 0)))
@@ -3393,172 +3395,147 @@ try_split (pat, trial, last)
after = NEXT_INSN (after);
}
- if (seq)
+ if (!seq)
+ return trial;
+
+ /* Avoid infinite loop if any insn of the result matches
+ the original pattern. */
+ insn_last = seq;
+ while (1)
{
- /* Sometimes there will be only one insn in that list, this case will
- normally arise only when we want it in turn to be split (SFmode on
- the 29k is an example). */
- if (NEXT_INSN (seq) != NULL_RTX)
- {
- rtx insn_last, insn;
- int njumps = 0;
+ if (INSN_P (insn_last)
+ && rtx_equal_p (PATTERN (insn_last), pat))
+ return trial;
+ if (!NEXT_INSN (insn_last))
+ break;
+ insn_last = NEXT_INSN (insn_last);
+ }
- /* Avoid infinite loop if any insn of the result matches
- the original pattern. */
- insn_last = seq;
- while (1)
+ /* Mark labels. */
+ for (insn = insn_last; insn ; insn = PREV_INSN (insn))
+ {
+ if (GET_CODE (insn) == JUMP_INSN)
+ {
+ mark_jump_label (PATTERN (insn), insn, 0);
+ njumps++;
+ if (probability != -1
+ && any_condjump_p (insn)
+ && !find_reg_note (insn, REG_BR_PROB, 0))
{
- if (INSN_P (insn_last)
- && rtx_equal_p (PATTERN (insn_last), pat))
- return trial;
- if (NEXT_INSN (insn_last) == NULL_RTX)
- break;
- insn_last = NEXT_INSN (insn_last);
+ /* We can preserve the REG_BR_PROB notes only if exactly
+ one jump is created, otherwise the machine description
+ is responsible for this step using
+ split_branch_probability variable. */
+ if (njumps != 1)
+ abort ();
+ REG_NOTES (insn)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (probability),
+ REG_NOTES (insn));
}
+ }
+ }
+
+ /* If we are splitting a CALL_INSN, look for the CALL_INSN
+ in SEQ and copy our CALL_INSN_FUNCTION_USAGE to it. */
+ if (GET_CODE (trial) == CALL_INSN)
+ {
+ for (insn = insn_last; insn ; insn = PREV_INSN (insn))
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ CALL_INSN_FUNCTION_USAGE (insn)
+ = CALL_INSN_FUNCTION_USAGE (trial);
+ SIBLING_CALL_P (insn) = SIBLING_CALL_P (trial);
+ }
+ }
- /* Mark labels. */
+ /* Copy notes, particularly those related to the CFG. */
+ for (note = REG_NOTES (trial); note; note = XEXP (note, 1))
+ {
+ switch (REG_NOTE_KIND (note))
+ {
+ case REG_EH_REGION:
insn = insn_last;
while (insn != NULL_RTX)
{
- if (GET_CODE (insn) == JUMP_INSN)
- {
- mark_jump_label (PATTERN (insn), insn, 0);
- njumps++;
- if (probability != -1
- && any_condjump_p (insn)
- && !find_reg_note (insn, REG_BR_PROB, 0))
- {
- /* We can preserve the REG_BR_PROB notes only if exactly
- one jump is created, otherwise the machine description
- is responsible for this step using
- split_branch_probability variable. */
- if (njumps != 1)
- abort ();
- REG_NOTES (insn)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (probability),
- REG_NOTES (insn));
- }
- }
-
+ if (GET_CODE (insn) == CALL_INSN
+ || (flag_non_call_exceptions
+ && may_trap_p (PATTERN (insn))))
+ REG_NOTES (insn)
+ = gen_rtx_EXPR_LIST (REG_EH_REGION,
+ XEXP (note, 0),
+ REG_NOTES (insn));
insn = PREV_INSN (insn);
}
+ break;
- /* If we are splitting a CALL_INSN, look for the CALL_INSN
- in SEQ and copy our CALL_INSN_FUNCTION_USAGE to it. */
- if (GET_CODE (trial) == CALL_INSN)
- {
- insn = insn_last;
- while (insn != NULL_RTX)
- {
- if (GET_CODE (insn) == CALL_INSN)
- CALL_INSN_FUNCTION_USAGE (insn)
- = CALL_INSN_FUNCTION_USAGE (trial);
-
- insn = PREV_INSN (insn);
- }
- }
-
- /* Copy notes, particularly those related to the CFG. */
- for (note = REG_NOTES (trial); note; note = XEXP (note, 1))
+ case REG_NORETURN:
+ case REG_SETJMP:
+ case REG_ALWAYS_RETURN:
+ insn = insn_last;
+ while (insn != NULL_RTX)
{
- switch (REG_NOTE_KIND (note))
- {
- case REG_EH_REGION:
- insn = insn_last;
- while (insn != NULL_RTX)
- {
- if (GET_CODE (insn) == CALL_INSN
- || (flag_non_call_exceptions
- && may_trap_p (PATTERN (insn))))
- REG_NOTES (insn)
- = gen_rtx_EXPR_LIST (REG_EH_REGION,
- XEXP (note, 0),
- REG_NOTES (insn));
- insn = PREV_INSN (insn);
- }
- break;
-
- case REG_NORETURN:
- case REG_SETJMP:
- case REG_ALWAYS_RETURN:
- insn = insn_last;
- while (insn != NULL_RTX)
- {
- if (GET_CODE (insn) == CALL_INSN)
- REG_NOTES (insn)
- = gen_rtx_EXPR_LIST (REG_NOTE_KIND (note),
- XEXP (note, 0),
- REG_NOTES (insn));
- insn = PREV_INSN (insn);
- }
- break;
-
- case REG_NON_LOCAL_GOTO:
- insn = insn_last;
- while (insn != NULL_RTX)
- {
- if (GET_CODE (insn) == JUMP_INSN)
- REG_NOTES (insn)
- = gen_rtx_EXPR_LIST (REG_NOTE_KIND (note),
- XEXP (note, 0),
- REG_NOTES (insn));
- insn = PREV_INSN (insn);
- }
- break;
-
- default:
- break;
- }
+ if (GET_CODE (insn) == CALL_INSN)
+ REG_NOTES (insn)
+ = gen_rtx_EXPR_LIST (REG_NOTE_KIND (note),
+ XEXP (note, 0),
+ REG_NOTES (insn));
+ insn = PREV_INSN (insn);
}
+ break;
- /* If there are LABELS inside the split insns increment the
- usage count so we don't delete the label. */
- if (GET_CODE (trial) == INSN)
+ case REG_NON_LOCAL_GOTO:
+ insn = insn_last;
+ while (insn != NULL_RTX)
{
- insn = insn_last;
- while (insn != NULL_RTX)
- {
- if (GET_CODE (insn) == INSN)
- mark_label_nuses (PATTERN (insn));
-
- insn = PREV_INSN (insn);
- }
+ if (GET_CODE (insn) == JUMP_INSN)
+ REG_NOTES (insn)
+ = gen_rtx_EXPR_LIST (REG_NOTE_KIND (note),
+ XEXP (note, 0),
+ REG_NOTES (insn));
+ insn = PREV_INSN (insn);
}
+ break;
- tem = emit_insn_after_scope (seq, trial, INSN_SCOPE (trial));
-
- delete_insn (trial);
- if (has_barrier)
- emit_barrier_after (tem);
-
- /* Recursively call try_split for each new insn created; by the
- time control returns here that insn will be fully split, so
- set LAST and continue from the insn after the one returned.
- We can't use next_active_insn here since AFTER may be a note.
- Ignore deleted insns, which can be occur if not optimizing. */
- for (tem = NEXT_INSN (before); tem != after; tem = NEXT_INSN (tem))
- if (! INSN_DELETED_P (tem) && INSN_P (tem))
- tem = try_split (PATTERN (tem), tem, 1);
+ default:
+ break;
}
- /* Avoid infinite loop if the result matches the original pattern. */
- else if (rtx_equal_p (PATTERN (seq), pat))
- return trial;
- else
+ }
+
+ /* If there are LABELS inside the split insns increment the
+ usage count so we don't delete the label. */
+ if (GET_CODE (trial) == INSN)
+ {
+ insn = insn_last;
+ while (insn != NULL_RTX)
{
- PATTERN (trial) = PATTERN (seq);
- INSN_CODE (trial) = -1;
- try_split (PATTERN (trial), trial, last);
- }
+ if (GET_CODE (insn) == INSN)
+ mark_label_nuses (PATTERN (insn));
- /* Return either the first or the last insn, depending on which was
- requested. */
- return last
- ? (after ? PREV_INSN (after) : last_insn)
- : NEXT_INSN (before);
+ insn = PREV_INSN (insn);
+ }
}
- return trial;
+ tem = emit_insn_after_scope (seq, trial, INSN_SCOPE (trial));
+
+ delete_insn (trial);
+ if (has_barrier)
+ emit_barrier_after (tem);
+
+ /* Recursively call try_split for each new insn created; by the
+ time control returns here that insn will be fully split, so
+ set LAST and continue from the insn after the one returned.
+ We can't use next_active_insn here since AFTER may be a note.
+ Ignore deleted insns, which can be occur if not optimizing. */
+ for (tem = NEXT_INSN (before); tem != after; tem = NEXT_INSN (tem))
+ if (! INSN_DELETED_P (tem) && INSN_P (tem))
+ tem = try_split (PATTERN (tem), tem, 1);
+
+ /* Return either the first or the last insn, depending on which was
+ requested. */
+ return last
+ ? (after ? PREV_INSN (after) : last_insn)
+ : NEXT_INSN (before);
}
/* Make and return an INSN rtx, initializing all its slots.
Index: gcc/config/ia64/ia64-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64-protos.h,v
retrieving revision 1.51
diff -u -p -r1.51 ia64-protos.h
--- gcc/config/ia64/ia64-protos.h 9 Jan 2003 23:15:29 -0000 1.51
+++ gcc/config/ia64/ia64-protos.h 13 Mar 2003 08:52:21 -0000
@@ -77,11 +77,12 @@ extern int basereg_operand PARAMS((rtx,
extern rtx ia64_expand_move PARAMS ((rtx, rtx));
extern int ia64_move_ok PARAMS((rtx, rtx));
extern int ia64_depz_field_mask PARAMS((rtx, rtx));
-extern rtx ia64_gp_save_reg PARAMS((int));
extern rtx ia64_split_timode PARAMS((rtx[], rtx, rtx));
extern rtx spill_tfmode_operand PARAMS((rtx, int));
extern rtx ia64_expand_compare PARAMS((enum rtx_code, enum machine_mode));
extern void ia64_expand_call PARAMS((rtx, rtx, rtx, int));
+extern void ia64_split_call PARAMS((rtx, rtx, rtx, rtx, rtx, int, int));
+extern void ia64_reload_gp PARAMS((void));
extern HOST_WIDE_INT ia64_initial_elimination_offset PARAMS((int, int));
extern void ia64_expand_prologue PARAMS((void));
Index: gcc/config/ia64/ia64.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.c,v
retrieving revision 1.209
diff -u -p -r1.209 ia64.c
--- gcc/config/ia64/ia64.c 2 Mar 2003 22:15:51 -0000 1.209
+++ gcc/config/ia64/ia64.c 13 Mar 2003 08:52:21 -0000
@@ -123,6 +123,38 @@ unsigned int ia64_section_threshold;
TRUE if we do insn bundling instead of insn scheduling. */
int bundling_p = 0;
+/* Structure to be filled in by ia64_compute_frame_size with register
+ save masks and offsets for the current function. */
+
+struct ia64_frame_info
+{
+ HOST_WIDE_INT total_size; /* size of the stack frame, not including
+ the caller's scratch area. */
+ HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
+ HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
+ HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
+ HARD_REG_SET mask; /* mask of saved registers. */
+ unsigned int gr_used_mask; /* mask of registers in use as gr spill
+ registers or long-term scratches. */
+ int n_spilled; /* number of spilled registers. */
+ int reg_fp; /* register for fp. */
+ int reg_save_b0; /* save register for b0. */
+ int reg_save_pr; /* save register for prs. */
+ int reg_save_ar_pfs; /* save register for ar.pfs. */
+ int reg_save_ar_unat; /* save register for ar.unat. */
+ int reg_save_ar_lc; /* save register for ar.lc. */
+ int reg_save_gp; /* save register for gp. */
+ int n_input_regs; /* number of input registers used. */
+ int n_local_regs; /* number of local registers used. */
+ int n_output_regs; /* number of output registers used. */
+ int n_rotate_regs; /* number of rotating registers used. */
+
+ char need_regstk; /* true if a .regstk directive needed. */
+ char initialized; /* true if the data is finalized. */
+};
+
+/* Current frame information calculated by ia64_compute_frame_size. */
+static struct ia64_frame_info current_frame_info;
static int ia64_use_dfa_pipeline_interface PARAMS ((void));
static int ia64_first_cycle_multipass_dfa_lookahead PARAMS ((void));
@@ -147,6 +179,7 @@ static rtx gen_fr_spill_x PARAMS ((rtx,
static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
static enum machine_mode hfa_element_mode PARAMS ((tree, int));
+static bool ia64_function_ok_for_sibcall PARAMS ((tree, tree));
static bool ia64_rtx_costs PARAMS ((rtx, int, int, int *));
static void fix_range PARAMS ((const char *));
static struct machine_function * ia64_init_machine_status PARAMS ((void));
@@ -313,6 +346,9 @@ static const struct attribute_spec ia64_
#define TARGET_HAVE_TLS true
#endif
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
+
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
@@ -1317,46 +1353,6 @@ ia64_expand_move (op0, op1)
return op1;
}
-rtx
-ia64_gp_save_reg (setjmp_p)
- int setjmp_p;
-{
- rtx save = cfun->machine->ia64_gp_save;
-
- if (save != NULL)
- {
- /* We can't save GP in a pseudo if we are calling setjmp, because
- pseudos won't be restored by longjmp. For now, we save it in r4. */
- /* ??? It would be more efficient to save this directly into a stack
- slot. Unfortunately, the stack slot address gets cse'd across
- the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
- place. */
-
- /* ??? Get the barf bag, Virginia. We've got to replace this thing
- in place, since this rtx is used in exception handling receivers.
- Moreover, we must get this rtx out of regno_reg_rtx or reload
- will do the wrong thing. */
- unsigned int old_regno = REGNO (save);
- if (setjmp_p && old_regno != GR_REG (4))
- {
- REGNO (save) = GR_REG (4);
- regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
- }
- }
- else
- {
- if (setjmp_p)
- save = gen_rtx_REG (DImode, GR_REG (4));
- else if (! optimize)
- save = gen_rtx_REG (DImode, LOC_REG (0));
- else
- save = gen_reg_rtx (DImode);
- cfun->machine->ia64_gp_save = save;
- }
-
- return save;
-}
-
/* Split a post-reload TImode reference into two DImode components. */
rtx
@@ -1494,67 +1490,148 @@ void
ia64_expand_call (retval, addr, nextarg, sibcall_p)
rtx retval;
rtx addr;
- rtx nextarg;
+ rtx nextarg ATTRIBUTE_UNUSED;
int sibcall_p;
{
- rtx insn, b0, pfs, gp_save, narg_rtx, dest;
- bool indirect_p;
- int narg;
+ rtx insn, b0;
addr = XEXP (addr, 0);
b0 = gen_rtx_REG (DImode, R_BR (0));
- pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
-
- if (! nextarg)
- narg = 0;
- else if (IN_REGNO_P (REGNO (nextarg)))
- narg = REGNO (nextarg) - IN_REG (0);
- else
- narg = REGNO (nextarg) - OUT_REG (0);
- narg_rtx = GEN_INT (narg);
+ /* ??? Should do this for functions known to bind local too. */
if (TARGET_NO_PIC || TARGET_AUTO_PIC)
{
if (sibcall_p)
- insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
+ insn = gen_sibcall_nogp (addr);
else if (! retval)
- insn = gen_call_nopic (addr, narg_rtx, b0);
+ insn = gen_call_nogp (addr, b0);
else
- insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
- emit_call_insn (insn);
- return;
+ insn = gen_call_value_nogp (retval, addr, b0);
+ insn = emit_call_insn (insn);
}
-
- indirect_p = ! symbolic_operand (addr, VOIDmode);
-
- if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
- gp_save = NULL_RTX;
else
- gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
+ {
+ if (sibcall_p)
+ insn = gen_sibcall_gp (addr);
+ else if (! retval)
+ insn = gen_call_gp (addr, b0);
+ else
+ insn = gen_call_value_gp (retval, addr, b0);
+ insn = emit_call_insn (insn);
- if (gp_save)
- emit_move_insn (gp_save, pic_offset_table_rtx);
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+ }
- /* If this is an indirect call, then we have the address of a descriptor. */
- if (indirect_p)
+ if (sibcall_p)
{
- dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
- emit_move_insn (pic_offset_table_rtx,
- gen_rtx_MEM (DImode, plus_constant (addr, 8)));
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
+ gen_rtx_REG (DImode, AR_PFS_REGNUM));
}
+}
+
+void
+ia64_reload_gp ()
+{
+ rtx tmp;
+
+ if (current_frame_info.reg_save_gp)
+ tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
else
- dest = addr;
+ {
+ HOST_WIDE_INT offset;
+
+ offset = (current_frame_info.spill_cfa_off
+ + current_frame_info.spill_size);
+ if (frame_pointer_needed)
+ {
+ tmp = hard_frame_pointer_rtx;
+ offset = -offset;
+ }
+ else
+ {
+ tmp = stack_pointer_rtx;
+ offset = current_frame_info.total_size - offset;
+ }
+
+ if (CONST_OK_FOR_I (offset))
+ emit_insn (gen_adddi3 (pic_offset_table_rtx,
+ tmp, GEN_INT (offset)));
+ else
+ {
+ emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
+ emit_insn (gen_adddi3 (pic_offset_table_rtx,
+ pic_offset_table_rtx, tmp));
+ }
+
+ tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
+ }
+
+ emit_move_insn (pic_offset_table_rtx, tmp);
+}
+
+void
+ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
+ noreturn_p, sibcall_p)
+ rtx retval, addr, retaddr, scratch_r, scratch_b;
+ int noreturn_p, sibcall_p;
+{
+ rtx insn;
+ bool is_desc = false;
+
+ /* If we find we're calling through a register, then we're actually
+ calling through a descriptor, so load up the values. */
+ if (REG_P (addr))
+ {
+ rtx tmp;
+ bool addr_dead_p;
+
+ /* ??? We are currently constrained to *not* use peep2, because
+ we can legitimiately change the global lifetime of the GP
+ (in the form of killing where previously live). This is
+ because a call through a descriptor doesn't use the previous
+ value of the GP, while a direct call does, and we do not
+ commit to either form until the split here.
+
+ That said, this means that we lack precise life info for
+ whether ADDR is dead after this call. This is not terribly
+ important, since we can fix things up essentially for free
+ with the POST_DEC below, but it's nice to not use it when we
+ can immediately tell it's not necessary. */
+ addr_dead_p = ((noreturn_p || sibcall_p
+ || TEST_HARD_REG_BIT (regs_invalidated_by_call,
+ REGNO (addr)))
+ && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
+
+ /* Load the code address into scratch_b. */
+ tmp = gen_rtx_POST_INC (Pmode, addr);
+ tmp = gen_rtx_MEM (Pmode, tmp);
+ emit_move_insn (scratch_r, tmp);
+ emit_move_insn (scratch_b, scratch_r);
+
+ /* Load the GP address. If ADDR is not dead here, then we must
+ revert the change made above via the POST_INCREMENT. */
+ if (!addr_dead_p)
+ tmp = gen_rtx_POST_DEC (Pmode, addr);
+ else
+ tmp = addr;
+ tmp = gen_rtx_MEM (Pmode, tmp);
+ emit_move_insn (pic_offset_table_rtx, tmp);
+
+ is_desc = true;
+ addr = scratch_b;
+ }
if (sibcall_p)
- insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
- else if (! retval)
- insn = gen_call_pic (dest, narg_rtx, b0);
+ insn = gen_sibcall_nogp (addr);
+ else if (retval)
+ insn = gen_call_value_nogp (retval, addr, retaddr);
else
- insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
+ insn = gen_call_nogp (addr, retaddr);
emit_call_insn (insn);
- if (gp_save)
- emit_move_insn (pic_offset_table_rtx, gp_save);
+ if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
+ ia64_reload_gp ();
}
/* Begin the assembly file. */
@@ -1593,39 +1670,6 @@ emit_safe_across_calls (f)
fputc ('\n', f);
}
-
-/* Structure to be filled in by ia64_compute_frame_size with register
- save masks and offsets for the current function. */
-
-struct ia64_frame_info
-{
- HOST_WIDE_INT total_size; /* size of the stack frame, not including
- the caller's scratch area. */
- HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
- HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
- HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
- HARD_REG_SET mask; /* mask of saved registers. */
- unsigned int gr_used_mask; /* mask of registers in use as gr spill
- registers or long-term scratches. */
- int n_spilled; /* number of spilled registers. */
- int reg_fp; /* register for fp. */
- int reg_save_b0; /* save register for b0. */
- int reg_save_pr; /* save register for prs. */
- int reg_save_ar_pfs; /* save register for ar.pfs. */
- int reg_save_ar_unat; /* save register for ar.unat. */
- int reg_save_ar_lc; /* save register for ar.lc. */
- int n_input_regs; /* number of input registers used. */
- int n_local_regs; /* number of local registers used. */
- int n_output_regs; /* number of output registers used. */
- int n_rotate_regs; /* number of rotating registers used. */
-
- char need_regstk; /* true if a .regstk directive needed. */
- char initialized; /* true if the data is finalized. */
-};
-
-/* Current frame information calculated by ia64_compute_frame_size. */
-static struct ia64_frame_info current_frame_info;
-
/* Helper function for ia64_compute_frame_size: find an appropriate general
register to spill some special register to. SPECIAL_SPILL_MASK contains
bits in GR0 to GR31 that have already been allocated by this routine.
@@ -1867,6 +1911,17 @@ ia64_compute_frame_size (size)
extra_spill_size += 8;
n_spilled += 1;
}
+
+ /* Similarly for gp. Note that if we're calling setjmp, the stacked
+ registers are clobbered, so we fall back to the stack. */
+ current_frame_info.reg_save_gp
+ = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
+ if (current_frame_info.reg_save_gp == 0)
+ {
+ SET_HARD_REG_BIT (mask, GR_REG (1));
+ spill_size += 8;
+ n_spilled += 1;
+ }
}
else
{
@@ -2570,6 +2625,19 @@ ia64_expand_prologue ()
}
}
+ if (current_frame_info.reg_save_gp)
+ {
+ insn = emit_move_insn (gen_rtx_REG (DImode,
+ current_frame_info.reg_save_gp),
+ pic_offset_table_rtx);
+ /* We don't know for sure yet if this is actually needed, since
+ we've not split the PIC call patterns. If all of the calls
+ are indirect, and not followed by any uses of the gp, then
+ this save is dead. Allow it to go away. */
+ REG_NOTES (insn)
+ = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
+ }
+
/* We should now be at the base of the gr/br/fr spill area. */
if (cfa_off != (current_frame_info.spill_cfa_off
+ current_frame_info.spill_size))
@@ -2751,8 +2819,13 @@ ia64_expand_epilogue (sibcall_p)
+ current_frame_info.spill_size))
abort ();
+ /* The GP may be stored on the stack in the prologue, but it's
+ never restored in the epilogue. Skip the stack slot. */
+ if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
+ cfa_off -= 8;
+
/* Restore all general registers. */
- for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+ for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
{
reg = gen_rtx_REG (DImode, regno);
@@ -2940,10 +3013,6 @@ ia64_hard_regno_rename_ok (from, to)
if (PR_REGNO_P (from) && PR_REGNO_P (to))
return (from & 1) == (to & 1);
- /* Reg 4 contains the saved gp; we can't reliably rename this. */
- if (from == GR_REG (4) && current_function_calls_setjmp)
- return 0;
-
return 1;
}
@@ -3572,6 +3641,23 @@ ia64_function_arg_pass_by_reference (cum
{
return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
}
+
+/* True if it is OK to do sibling call optimization for the specified
+ call expression EXP. DECL will be the called function, or NULL if
+ this is an indirect call. */
+static bool
+ia64_function_ok_for_sibcall (decl, exp)
+ tree decl, exp;
+{
+ /* Direct calls are always ok. */
+ if (decl)
+ return true;
+
+ /* If TARGET_CONST_GP is in effect, then our caller expects us to
+ return with our current GP. This means that we'll always have
+ a GP reload after an indirect call. */
+ return !ia64_epilogue_uses (R_GR (1));
+}
/* Implement va_arg. */
@@ -8419,6 +8505,9 @@ ia64_output_mi_thunk (file, thunk, delta
{
rtx this, insn, funexp;
+ reload_completed = 1;
+ no_new_pseudos = 1;
+
/* Set things up as ia64_expand_prologue might. */
last_scratch_gr_reg = 15;
@@ -8481,18 +8570,27 @@ ia64_output_mi_thunk (file, thunk, delta
ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
insn = get_last_insn ();
SIBLING_CALL_P (insn) = 1;
+
+ /* Code generation for calls relies on splitting. */
+ reload_completed = 1;
+ try_split (PATTERN (insn), insn, 0);
+
emit_barrier ();
/* Run just enough of rest_of_compilation to get the insns emitted.
There's not really enough bulk here to make other passes such as
instruction scheduling worth while. Note that use_thunk calls
assemble_start_function and assemble_end_function. */
+
insn = get_insns ();
emit_all_insn_group_barriers (NULL, insn);
shorten_branches (insn);
final_start_function (insn, file, 1);
final (insn, file, 1, 0);
final_end_function ();
+
+ reload_completed = 0;
+ no_new_pseudos = 0;
}
#include "gt-ia64.h"
Index: gcc/config/ia64/ia64.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.md,v
retrieving revision 1.98
diff -u -p -r1.98 ia64.md
--- gcc/config/ia64/ia64.md 2 Mar 2003 22:15:51 -0000 1.98
+++ gcc/config/ia64/ia64.md 13 Mar 2003 08:52:21 -0000
@@ -4671,7 +4671,7 @@
(use (match_operand 3 "" ""))]
""
{
- ia64_expand_call (NULL_RTX, operands[0], operands[2], 0);
+ ia64_expand_call (NULL_RTX, operands[0], operands[2], false);
DONE;
})
@@ -4682,7 +4682,7 @@
(use (match_operand 3 "" ""))]
""
{
- ia64_expand_call (NULL_RTX, operands[0], operands[2], 1);
+ ia64_expand_call (NULL_RTX, operands[0], operands[2], true);
DONE;
})
@@ -4701,7 +4701,7 @@
(use (match_operand 4 "" ""))]
""
{
- ia64_expand_call (operands[0], operands[1], operands[3], 0);
+ ia64_expand_call (operands[0], operands[1], operands[3], false);
DONE;
})
@@ -4713,7 +4713,7 @@
(use (match_operand 4 "" ""))]
""
{
- ia64_expand_call (operands[0], operands[1], operands[3], 1);
+ ia64_expand_call (operands[0], operands[1], operands[3], true);
DONE;
})
@@ -4745,59 +4745,125 @@
DONE;
})
-(define_insn "call_nopic"
- [(call (mem:DI (match_operand:DI 0 "call_operand" "b,i"))
- (match_operand 1 "" ""))
- (clobber (match_operand:DI 2 "register_operand" "=b,b"))]
+(define_insn "call_nogp"
+ [(call (mem:DI (match_operand:DI 0 "call_operand" "?b,i"))
+ (const_int 0))
+ (clobber (match_operand:DI 1 "register_operand" "=b,b"))]
""
- "br.call%+.many %2 = %0"
+ "br.call%+.many %1 = %0"
[(set_attr "itanium_class" "br,scall")])
-(define_insn "call_value_nopic"
+(define_insn "call_value_nogp"
[(set (match_operand 0 "" "")
- (call (mem:DI (match_operand:DI 1 "call_operand" "b,i"))
- (match_operand 2 "" "")))
- (clobber (match_operand:DI 3 "register_operand" "=b,b"))]
+ (call (mem:DI (match_operand:DI 1 "call_operand" "?b,i"))
+ (const_int 0)))
+ (clobber (match_operand:DI 2 "register_operand" "=b,b"))]
""
- "br.call%+.many %3 = %1"
+ "br.call%+.many %2 = %1"
[(set_attr "itanium_class" "br,scall")])
-(define_insn "sibcall_nopic"
- [(call (mem:DI (match_operand:DI 0 "call_operand" "b,i"))
- (match_operand 1 "" ""))
- (use (match_operand:DI 2 "register_operand" "=b,b"))
- (use (match_operand:DI 3 "ar_pfs_reg_operand" ""))]
+(define_insn "sibcall_nogp"
+ [(call (mem:DI (match_operand:DI 0 "call_operand" "?b,i"))
+ (const_int 0))]
""
"br%+.many %0"
[(set_attr "itanium_class" "br,scall")])
-(define_insn "call_pic"
- [(call (mem (match_operand 0 "call_operand" "b,i"))
- (match_operand 1 "" ""))
- (use (unspec [(reg:DI 1)] UNSPEC_PIC_CALL))
- (clobber (match_operand:DI 2 "register_operand" "=b,b"))]
+(define_insn "call_gp"
+ [(call (mem (match_operand 0 "call_operand" "?r,i"))
+ (const_int 1))
+ (clobber (match_operand:DI 1 "register_operand" "=b,b"))
+ (clobber (match_scratch:DI 2 "=&r,X"))
+ (clobber (match_scratch:DI 3 "=b,X"))]
""
- "br.call%+.many %2 = %0"
+ "#"
[(set_attr "itanium_class" "br,scall")])
-(define_insn "call_value_pic"
+;; Irritatingly, we don't have access to INSN within the split body.
+;; See commentary in ia64_split_call as to why these aren't peep2.
+(define_split
+ [(call (mem (match_operand 0 "call_operand" ""))
+ (const_int 1))
+ (clobber (match_operand:DI 1 "register_operand" ""))
+ (clobber (match_scratch:DI 2 ""))
+ (clobber (match_scratch:DI 3 ""))]
+ "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+ [(const_int 0)]
+{
+ ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
+ operands[3], true, false);
+ DONE;
+})
+
+(define_split
+ [(call (mem (match_operand 0 "call_operand" ""))
+ (const_int 1))
+ (clobber (match_operand:DI 1 "register_operand" ""))
+ (clobber (match_scratch:DI 2 ""))
+ (clobber (match_scratch:DI 3 ""))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
+ operands[3], false, false);
+ DONE;
+})
+
+(define_insn "call_value_gp"
[(set (match_operand 0 "" "")
- (call (mem:DI (match_operand:DI 1 "call_operand" "b,i"))
- (match_operand 2 "" "")))
- (use (unspec [(reg:DI 1)] UNSPEC_PIC_CALL))
- (clobber (match_operand:DI 3 "register_operand" "=b,b"))]
+ (call (mem:DI (match_operand:DI 1 "call_operand" "?r,i"))
+ (const_int 1)))
+ (clobber (match_operand:DI 2 "register_operand" "=b,b"))
+ (clobber (match_scratch:DI 3 "=&r,X"))
+ (clobber (match_scratch:DI 4 "=b,X"))]
""
- "br.call%+.many %3 = %1"
+ "#"
[(set_attr "itanium_class" "br,scall")])
-(define_insn "sibcall_pic"
- [(call (mem:DI (match_operand:DI 0 "call_operand" "bi"))
- (match_operand 1 "" ""))
- (use (unspec [(reg:DI 1)] UNSPEC_PIC_CALL))
- (use (match_operand:DI 2 "register_operand" "=b"))
- (use (match_operand:DI 3 "ar_pfs_reg_operand" ""))]
+(define_split
+ [(set (match_operand 0 "" "")
+ (call (mem:DI (match_operand:DI 1 "call_operand" ""))
+ (const_int 1)))
+ (clobber (match_operand:DI 2 "register_operand" ""))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (match_scratch:DI 4 ""))]
+ "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+ [(const_int 0)]
+{
+ ia64_split_call (operands[0], operands[1], operands[2], operands[3],
+ operands[4], true, false);
+ DONE;
+})
+
+(define_split
+ [(set (match_operand 0 "" "")
+ (call (mem:DI (match_operand:DI 1 "call_operand" ""))
+ (const_int 1)))
+ (clobber (match_operand:DI 2 "register_operand" ""))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (match_scratch:DI 4 ""))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ ia64_split_call (operands[0], operands[1], operands[2], operands[3],
+ operands[4], false, false);
+ DONE;
+})
+
+(define_insn_and_split "sibcall_gp"
+ [(call (mem:DI (match_operand:DI 0 "call_operand" "?r,i"))
+ (const_int 1))
+ (clobber (match_scratch:DI 1 "=&r,X"))
+ (clobber (match_scratch:DI 2 "=b,X"))]
""
- "br%+.many %0"
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ ia64_split_call (NULL_RTX, operands[0], NULL_RTX, operands[1],
+ operands[2], true, true);
+ DONE;
+}
[(set_attr "itanium_class" "br")])
(define_insn "return_internal"
@@ -5263,21 +5329,11 @@
DONE;
})
-;; The rest of the setjmp processing happens with the nonlocal_goto expander.
-;; ??? This is not tested.
-(define_expand "builtin_setjmp_setup"
- [(use (match_operand:DI 0 "" ""))]
- ""
-{
- emit_move_insn (ia64_gp_save_reg (0), gen_rtx_REG (DImode, GR_REG (1)));
- DONE;
-})
-
(define_expand "builtin_setjmp_receiver"
[(use (match_operand:DI 0 "" ""))]
""
{
- emit_move_insn (gen_rtx_REG (DImode, GR_REG (1)), ia64_gp_save_reg (0));
+ ia64_reload_gp ();
DONE;
})