This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RS6000] Prologue error saving vector regs
- From: Alan Modra <amodra at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: David Edelsohn <edelsohn at gnu dot org>
- Date: Tue, 11 May 2010 23:20:06 +0930
- Subject: [RS6000] Prologue error saving vector regs
This patch started life as a tidy of rs6000_emit_allocate_stack
parameters, but then I noticed a bug in the vector reg save code.
As shown by the following testcase when compiled for linux with
-m32 -mabi=altivec -maltivec -O2
#define vector __attribute__ ((vector_size (16)))
vector int f5 (int n, vector int x)
{
vector int a[4096];
int i;
for (i = 0; i < n; i++)
a[i] = ((vector int){0,1,-1,i});
for (i = 0; i < n; i++)
x += a[i];
return x;
}
00000000 <f5>:
0: 3c 00 ff fe lis r0,-2
4: 60 00 ff c0 ori r0,r0,65472
8: 7c 21 01 6e stwux r1,r1,r0
c: 7c 00 42 a6 mfvrsave r0
10: 90 0c ff fc stw r0,-4(r12) <- r12 uninitialised
..
Finding that one led me to look carefully at uses of frame_reg_rtx,
which revealed a similar problem with functions that call
__builtin_eh_return. -m32 -O0 -fomit-frame-pointer with the following
long offset;
void *handler;
void foo (void)
{
int a[8188];
__builtin_eh_return (offset, handler);
}
gives
00000000 <foo>:
0: 3c 00 ff ff lis r0,-1
4: 60 00 7f f0 ori r0,r0,32752
8: 7c 21 01 6e stwux r1,r1,r0
c: 90 6c ff f0 stw r3,-16(r12) <- r12 uninitialised
..
These bugs were hidden by the fact that the logic to tell
rs6000_emit_allocate_stack to copy the old frame pointer was separate
from the logic to set up frame_reg_rtx. It wasn't even obvious to me
that the correct register received a copy of the old frame pointer.
Another small bug was that frame_reg_rtx was set up when a function
with a large frame saved no registers. This caused a stack tie to be
emitted unnecessarily.
Bootstrapped and regression tested powerpc-linux. OK to apply?
Active branches too?
* config/rs6000/rs6000.c (rs6000_emit_allocate_stack): Delete
unnecessary prototype. Replace copy_r12 and copy_r11 flag params
with copy_reg rtx param.
(rs6000_emit_prologue): Update rs6000_emit_allocate_stack calls.
Correct cases where code for ABI_V4 did not initialise the reg
used to access frame. Also leave frame_reg_rtx as sp for large
frames that save no regs.
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 159214)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -881,7 +881,6 @@ static bool spe_func_has_64bit_regs_p (v
static void emit_frame_save (rtx, rtx, enum machine_mode, unsigned int,
int, HOST_WIDE_INT);
static rtx gen_frame_mem_offset (enum machine_mode, rtx, int);
-static void rs6000_emit_allocate_stack (HOST_WIDE_INT, int, int);
static unsigned rs6000_hash_constant (rtx);
static unsigned toc_hash_function (const void *);
static int toc_hash_eq (const void *, const void *);
@@ -18024,13 +18023,11 @@ rs6000_emit_stack_tie (void)
}
/* Emit the correct code for allocating stack space, as insns.
- If COPY_R12, make sure a copy of the old frame is left in r12.
- If COPY_R11, make sure a copy of the old frame is left in r11,
- in preference to r12 if COPY_R12.
+ If COPY_REG, make sure a copy of the old frame is left there.
The generated code may use hard register 0 as a temporary. */
static void
-rs6000_emit_allocate_stack (HOST_WIDE_INT size, int copy_r12, int copy_r11)
+rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg)
{
rtx insn;
rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
@@ -18073,11 +18070,8 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
warning (0, "stack limit expression is not supported");
}
- if (copy_r12 || copy_r11)
- emit_move_insn (copy_r11
- ? gen_rtx_REG (Pmode, 11)
- : gen_rtx_REG (Pmode, 12),
- stack_reg);
+ if (copy_reg)
+ emit_move_insn (copy_reg, stack_reg);
if (size > 32767)
{
@@ -18763,20 +18757,33 @@ rs6000_emit_prologue (void)
? (!saving_GPRs_inline
&& info->spe_64bit_regs_used == 0)
: (!saving_FPRs_inline || !saving_GPRs_inline));
+ rtx copy_reg = need_r11 ? gen_rtx_REG (Pmode, 11) : NULL;
+
if (info->total_size < 32767)
sp_offset = info->total_size;
+ else if (need_r11)
+ frame_reg_rtx = copy_reg;
+ else if (info->cr_save_p
+ || info->lr_save_p
+ || info->first_fp_reg_save < 64
+ || info->first_gp_reg_save < 32
+ || info->altivec_size != 0
+ || info->vrsave_mask != 0
+ || crtl->calls_eh_return)
+ {
+ copy_reg = frame_ptr_rtx;
+ frame_reg_rtx = copy_reg;
+ }
else
- frame_reg_rtx = (need_r11
- ? gen_rtx_REG (Pmode, 11)
- : frame_ptr_rtx);
- rs6000_emit_allocate_stack (info->total_size,
- (frame_reg_rtx != sp_reg_rtx
- && (info->cr_save_p
- || info->lr_save_p
- || info->first_fp_reg_save < 64
- || info->first_gp_reg_save < 32
- )),
- need_r11);
+ {
+ /* The prologue won't be saving any regs so there is no need
+ to set up a frame register to access any frame save area.
+ We also won't be using sp_offset anywhere below, but set
+ the correct value anyway to protect against future
+ changes to this function. */
+ sp_offset = info->total_size;
+ }
+ rs6000_emit_allocate_stack (info->total_size, copy_reg);
if (frame_reg_rtx != sp_reg_rtx)
rs6000_emit_stack_tie ();
}
@@ -19211,16 +19218,19 @@ rs6000_emit_prologue (void)
if (!WORLD_SAVE_P (info) && info->push_p
&& !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
{
+ rtx copy_reg = NULL;
+
if (info->total_size < 32767)
- sp_offset = info->total_size;
+ sp_offset = info->total_size;
+ else if (info->altivec_size != 0
+ || info->vrsave_mask != 0)
+ {
+ copy_reg = frame_ptr_rtx;
+ frame_reg_rtx = copy_reg;
+ }
else
- frame_reg_rtx = frame_ptr_rtx;
- rs6000_emit_allocate_stack (info->total_size,
- (frame_reg_rtx != sp_reg_rtx
- && ((info->altivec_size != 0)
- || (info->vrsave_mask != 0)
- )),
- FALSE);
+ sp_offset = info->total_size;
+ rs6000_emit_allocate_stack (info->total_size, copy_reg);
if (frame_reg_rtx != sp_reg_rtx)
rs6000_emit_stack_tie ();
}
--
Alan Modra
Australia Development Lab, IBM