This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
pa.c clean-up
- To: gcc-patches at gcc dot gnu dot org
- Subject: pa.c clean-up
- From: Alan Modra <alan at linuxcare dot com dot au>
- Date: Wed, 24 Jan 2001 21:43:39 +1100 (EST)
Besides some tidying of code, this patch fixes a couple of bugs
- VAL_14_BITS_P was called with negative of actual value used in
instructions. With exactly the right value of frame size (8k), this
could result in illegal instructions.
- Lack of a blockage after the stack pointer was adjusted in
hppa_expand_prologue resulted in register spills being scheduled before
the stack frame was valid. We hit this problem in the linux kernel.
gcc/ChangeLog
* config/pa/pa.c: (emit_move_sequence): Avoid compiler warnings.
(compute_movstrsi_length): Likewise.
(remove_useless_addtr_insns): Formatting fix.
* config/pa/pa.c (hppa_expand_prologue): Simplify code storing
return pointer. For large (> 8k) frames with a post_store, adjust
stack pointer by 8k-64 first rather than by 64. When testing with
VAL_14_BITS_P, always use the actual value rather than the value
negated. Add blockages to prevent scheduling of spills before
stack frame has been created.
(hppa_expand_epilogue): Simplify code loading return pointer.
Allow a slightly larger range for merge_sp_adjust_with_load case.
When testing with VAL_14_BITS_P, always use the actual value.
Bootstrapped on both hppa-linux and hppa2.0w-hpux11.00
Alan Modra
--
Linuxcare. Support for the Revolution.
diff -urp -xCVS -x*~ egcs/gcc/config/pa/pa.c newegcs/gcc/config/pa/pa.c
--- egcs/gcc/config/pa/pa.c Mon Jan 22 00:28:20 2001
+++ newegcs/gcc/config/pa/pa.c Wed Jan 24 11:43:37 2001
@@ -1603,13 +1603,14 @@ emit_move_sequence (operands, mode, scra
&& GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
{
HOST_WIDE_INT val = INTVAL (operand1);
- HOST_WIDE_INT nval = INTVAL (operand1);
+ HOST_WIDE_INT nval;
/* If the value is the same after a 32->64bit sign
extension, then we can use it as-is. Else we will
need to sign extend the constant from 32->64bits
then zero extend the result from 32->64bits. */
- nval = ((val & 0xffffffff) ^ (~0x7fffffff)) + 0x80000000;
+ nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
+ ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
if (val != nval)
{
need_zero_extend = 1;
@@ -2291,7 +2292,7 @@ compute_movstrsi_length (insn)
rtx insn;
{
rtx pat = PATTERN (insn);
- int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
+ unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
unsigned int n_insns = 0;
@@ -2682,8 +2683,8 @@ remove_useless_addtr_insns (insns, check
/* Reverse our condition. */
tmp = PATTERN (insn);
PUT_CODE (XEXP (tmp, 1),
- reverse_condition_maybe_unordered (GET_CODE (XEXP (tmp,
- 1))));
+ (reverse_condition_maybe_unordered
+ (GET_CODE (XEXP (tmp, 1)))));
}
}
}
@@ -2988,70 +2989,77 @@ hppa_expand_prologue()
/* Save RP first. The calling conventions manual states RP will
always be stored into the caller's frame at sp-20 or sp - 16
depending on which ABI is in use. */
- if ((regs_ever_live[2] || profile_flag) && TARGET_64BIT)
- store_reg (2, -16, STACK_POINTER_REGNUM);
-
- if ((regs_ever_live[2] || profile_flag) && ! TARGET_64BIT)
- store_reg (2, -20, STACK_POINTER_REGNUM);
+ if (regs_ever_live[2] || profile_flag)
+ store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
/* Allocate the local frame and set up the frame pointer if needed. */
- if (actual_fsize)
- {
- if (frame_pointer_needed)
- {
- /* Copy the old frame pointer temporarily into %r1. Set up the
- new stack pointer, then store away the saved old frame pointer
- into the stack at sp+actual_fsize and at the same time update
- the stack pointer by actual_fsize bytes. Two versions, first
- handles small (<8k) frames. The second handles large (>8k)
- frames. */
- emit_move_insn (tmpreg, frame_pointer_rtx);
- emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
- if (VAL_14_BITS_P (actual_fsize))
- emit_insn (gen_post_store (stack_pointer_rtx, tmpreg, size_rtx));
- else
- {
- /* It is incorrect to store the saved frame pointer at *sp,
- then increment sp (writes beyond the current stack boundary).
+ if (actual_fsize != 0)
+ {
+ if (frame_pointer_needed)
+ {
+ /* Copy the old frame pointer temporarily into %r1. Set up the
+ new stack pointer, then store away the saved old frame pointer
+ into the stack at sp+actual_fsize and at the same time update
+ the stack pointer by actual_fsize bytes. Two versions, first
+ handles small (<8k) frames. The second handles large (>8k)
+ frames. */
+ emit_move_insn (tmpreg, frame_pointer_rtx);
+ emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+ if (VAL_14_BITS_P (actual_fsize))
+ emit_insn (gen_post_store (stack_pointer_rtx, tmpreg, size_rtx));
+ else
+ {
+ /* It is incorrect to store the saved frame pointer at *sp,
+ then increment sp (writes beyond the current stack boundary).
- So instead use stwm to store at *sp and post-increment the
- stack pointer as an atomic operation. Then increment sp to
- finish allocating the new frame. */
- emit_insn (gen_post_store (stack_pointer_rtx, tmpreg,
- GEN_INT (64)));
+ So instead use stwm to store at *sp and post-increment the
+ stack pointer as an atomic operation. Then increment sp to
+ finish allocating the new frame. */
+ int adjust1 = 8192 - 64;
+ int adjust2 = actual_fsize - adjust1;
+ rtx delta = GEN_INT (adjust1);
+ emit_insn (gen_post_store (stack_pointer_rtx, tmpreg, delta));
+ set_reg_plus_d (STACK_POINTER_REGNUM,
+ STACK_POINTER_REGNUM,
+ adjust2);
+ }
+ }
+ /* no frame pointer needed. */
+ else
+ {
+ /* In some cases we can perform the first callee register save
+ and allocating the stack frame at the same time. If so, just
+ make a note of it and defer allocating the frame until saving
+ the callee registers. */
+ if (VAL_14_BITS_P (actual_fsize)
+ && local_fsize == 0
+ && ! profile_flag
+ && ! flag_pic)
+ merge_sp_adjust_with_store = 1;
+ /* Can not optimize. Adjust the stack frame by actual_fsize
+ bytes. */
+ else
set_reg_plus_d (STACK_POINTER_REGNUM,
STACK_POINTER_REGNUM,
- actual_fsize - 64);
- }
- }
- /* no frame pointer needed. */
- else
- {
- /* In some cases we can perform the first callee register save
- and allocating the stack frame at the same time. If so, just
- make a note of it and defer allocating the frame until saving
- the callee registers. */
- if (VAL_14_BITS_P (-actual_fsize)
- && local_fsize == 0
- && ! profile_flag
- && ! flag_pic)
- merge_sp_adjust_with_store = 1;
- /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
- else if (actual_fsize != 0)
- set_reg_plus_d (STACK_POINTER_REGNUM,
- STACK_POINTER_REGNUM,
- actual_fsize);
- }
- }
+ actual_fsize);
+ }
- /* The hppa calling conventions say that %r19, the pic offset
- register, is saved at sp - 32 (in this function's frame) when
- generating PIC code. FIXME: What is the correct thing to do
- for functions which make no calls and allocate no frame? Do
- we need to allocate a frame, or can we just omit the save? For
- now we'll just omit the save. */
- if (actual_fsize != 0 && flag_pic && !TARGET_64BIT)
- store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
+ if (! merge_sp_adjust_with_store)
+ {
+ /* Prevent register spills from being scheduled before the
+ stack pointer is raised. */
+ emit_insn (gen_blockage ());
+ }
+
+ /* The hppa calling conventions say that %r19, the pic offset
+ register, is saved at sp - 32 (in this function's frame)
+ when generating PIC code. FIXME: What is the correct thing
+ to do for functions which make no calls and allocate no
+ frame? Do we need to allocate a frame, or can we just omit
+ the save? For now we'll just omit the save. */
+ if (flag_pic && !TARGET_64BIT)
+ store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
+ }
/* Profiling code.
@@ -3073,8 +3081,9 @@ hppa_expand_prologue()
pointer. Adjust the offset according to the frame size if
this function does not have a frame pointer. */
- basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
- : STACK_POINTER_REGNUM;
+ basereg = (frame_pointer_needed
+ ? FRAME_POINTER_REGNUM
+ : STACK_POINTER_REGNUM);
offsetadj = frame_pointer_needed ? 0 : actual_fsize;
/* Horrid hack. emit_function_prologue will modify this RTL in
@@ -3145,6 +3154,9 @@ hppa_expand_prologue()
emit_insn (gen_post_store (stack_pointer_rtx,
gen_rtx_REG (word_mode, i),
GEN_INT (-offset)));
+ /* Prevent register spills from being scheduled before
+ the stack pointer is raised. */
+ emit_insn (gen_blockage ());
}
else
store_reg (i, offset, STACK_POINTER_REGNUM);
@@ -3155,9 +3167,14 @@ hppa_expand_prologue()
/* If we wanted to merge the SP adjustment with a GR save, but we never
did any GR saves, then just emit the adjustment here. */
if (merge_sp_adjust_with_store)
- set_reg_plus_d (STACK_POINTER_REGNUM,
- STACK_POINTER_REGNUM,
- actual_fsize);
+ {
+ set_reg_plus_d (STACK_POINTER_REGNUM,
+ STACK_POINTER_REGNUM,
+ actual_fsize);
+ /* Prevent register spills from being scheduled before the
+ stack pointer is raised. */
+ emit_insn (gen_blockage ());
+ }
}
/* Align pointer properly (doubleword boundary). */
@@ -3244,8 +3261,9 @@ void
hppa_expand_epilogue ()
{
rtx tmpreg;
- int offset,i;
- int merge_sp_adjust_with_load = 0;
+ int offset, i;
+ int merge_sp_adjust_with_load = 0;
+ int ret_off = 0;
/* We will use this often. */
tmpreg = gen_rtx_REG (word_mode, 1);
@@ -3253,23 +3271,24 @@ hppa_expand_epilogue ()
/* Try to restore RP early to avoid load/use interlocks when
RP gets used in the return (bv) instruction. This appears to still
be necessary even when we schedule the prologue and epilogue. */
- if (frame_pointer_needed
- && !TARGET_64BIT
- && (regs_ever_live [2] || profile_flag))
- load_reg (2, -20, FRAME_POINTER_REGNUM);
- else if (TARGET_64BIT && frame_pointer_needed
- && (regs_ever_live[2] || profile_flag))
- load_reg (2, -16, FRAME_POINTER_REGNUM);
- else if (TARGET_64BIT
- && ! frame_pointer_needed
- && (regs_ever_live[2] || profile_flag)
- && VAL_14_BITS_P (actual_fsize + 20))
- load_reg (2, - (actual_fsize + 16), STACK_POINTER_REGNUM);
- /* No frame pointer, and stack is smaller than 8k. */
- else if (! frame_pointer_needed
- && VAL_14_BITS_P (actual_fsize + 20)
- && (regs_ever_live[2] || profile_flag))
- load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
+ if (regs_ever_live [2] || profile_flag)
+ {
+ ret_off = TARGET_64BIT ? -16 : -20;
+ if (frame_pointer_needed)
+ {
+ load_reg (2, ret_off, FRAME_POINTER_REGNUM);
+ ret_off = 0;
+ }
+ else
+ {
+ /* No frame pointer, and stack is smaller than 8k. */
+ if (VAL_14_BITS_P (ret_off - actual_fsize))
+ {
+ load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
+ ret_off = 0;
+ }
+ }
+ }
/* General register restores. */
if (frame_pointer_needed)
@@ -3290,9 +3309,9 @@ hppa_expand_epilogue ()
/* Only for the first load.
merge_sp_adjust_with_load holds the register load
with which we will merge the sp adjustment. */
- if (VAL_14_BITS_P (actual_fsize + 20)
+ if (merge_sp_adjust_with_load == 0
&& local_fsize == 0
- && ! merge_sp_adjust_with_load)
+ && VAL_14_BITS_P (-actual_fsize))
merge_sp_adjust_with_load = i;
else
load_reg (i, offset, STACK_POINTER_REGNUM);
@@ -3332,47 +3351,10 @@ hppa_expand_epilogue ()
This is necessary as we must not cut the stack back before all the
restores are finished. */
emit_insn (gen_blockage ());
- /* No frame pointer, but we have a stack greater than 8k. We restore
- %r2 very late in this case. (All other cases are restored as early
- as possible.) */
- if (! frame_pointer_needed
- && ! VAL_14_BITS_P (actual_fsize + 20)
- && ! TARGET_64BIT
- && (regs_ever_live[2] || profile_flag))
- {
- set_reg_plus_d (STACK_POINTER_REGNUM,
- STACK_POINTER_REGNUM,
- - actual_fsize);
-
- /* This used to try and be clever by not depending on the value in
- %r30 and instead use the value held in %r1 (so that the 2nd insn
- which sets %r30 could be put in the delay slot of the return insn).
-
- That won't work since if the stack is exactly 8k set_reg_plus_d
- doesn't set %r1, just %r30. */
- load_reg (2, - 20, STACK_POINTER_REGNUM);
- }
- else if (! frame_pointer_needed
- && ! VAL_14_BITS_P (actual_fsize + 20)
- && TARGET_64BIT
- && (regs_ever_live[2] || profile_flag))
- {
- set_reg_plus_d (STACK_POINTER_REGNUM,
- STACK_POINTER_REGNUM,
- - actual_fsize);
-
- /* This used to try and be clever by not depending on the value in
- %r30 and instead use the value held in %r1 (so that the 2nd insn
- which sets %r30 could be put in the delay slot of the return insn).
-
- That won't work since if the stack is exactly 8k set_reg_plus_d
- doesn't set %r1, just %r30. */
- load_reg (2, - 16, STACK_POINTER_REGNUM);
- }
/* Reset stack pointer (and possibly frame pointer). The stack
pointer is initially set to fp + 64 to avoid a race condition. */
- else if (frame_pointer_needed)
+ if (frame_pointer_needed)
{
set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
emit_insn (gen_pre_load (frame_pointer_rtx,
@@ -3380,14 +3362,25 @@ hppa_expand_epilogue ()
GEN_INT (-64)));
}
/* If we were deferring a callee register restore, do it now. */
- else if (! frame_pointer_needed && merge_sp_adjust_with_load)
- emit_insn (gen_pre_load (gen_rtx_REG (word_mode, merge_sp_adjust_with_load),
- stack_pointer_rtx,
- GEN_INT (- actual_fsize)));
+ else if (merge_sp_adjust_with_load)
+ {
+ rtx delta = GEN_INT (-actual_fsize);
+ emit_insn (gen_pre_load (gen_rtx_REG (word_mode,
+ merge_sp_adjust_with_load),
+ stack_pointer_rtx,
+ delta));
+ }
else if (actual_fsize != 0)
- set_reg_plus_d (STACK_POINTER_REGNUM,
- STACK_POINTER_REGNUM,
- - actual_fsize);
+ {
+ set_reg_plus_d (STACK_POINTER_REGNUM,
+ STACK_POINTER_REGNUM,
+ -actual_fsize);
+ }
+
+ /* If we haven't restored %r2 yet (no frame pointer, and a stack
+ frame greater than 8k), do so now. */
+ if (ret_off != 0)
+ load_reg (2, ret_off, STACK_POINTER_REGNUM);
}
/* Fetch the return address for the frame COUNT steps up from