This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RS6000] Prologue error saving vector regs


This patch started life as a tidy of rs6000_emit_allocate_stack
parameters, but then I noticed a bug in the vector reg save code.
As shown by the following testcase when compiled for linux with
-m32 -mabi=altivec -maltivec -O2

#define vector __attribute__ ((vector_size (16)))

vector int f5 (int n, vector int x)
{
  vector int a[4096];
  int i;

  for (i = 0; i < n; i++)
    a[i] = ((vector int){0,1,-1,i});

  for (i = 0; i < n; i++)
    x += a[i];

  return x;
}

00000000 <f5>:
   0:	3c 00 ff fe 	lis     r0,-2
   4:	60 00 ff c0 	ori     r0,r0,65472
   8:	7c 21 01 6e 	stwux   r1,r1,r0
   c:	7c 00 42 a6 	mfvrsave r0
  10:	90 0c ff fc 	stw     r0,-4(r12)  <- r12 uninitialised
..

Finding that one led me to look carefully at uses of frame_reg_rtx,
which revealed a similar problem with functions that call
__builtin_eh_return.  -m32 -O0 -fomit-frame-pointer with the following

long offset;
void *handler;

void foo (void)
{
  int a[8188];
  __builtin_eh_return (offset, handler);
}

gives

00000000 <foo>:
   0:	3c 00 ff ff 	lis     r0,-1
   4:	60 00 7f f0 	ori     r0,r0,32752
   8:	7c 21 01 6e 	stwux   r1,r1,r0
   c:	90 6c ff f0 	stw     r3,-16(r12)  <- r12 uninitialised
..

These bugs were hidden by the fact that the logic to tell
rs6000_emit_allocate_stack to copy the old frame pointer was separate
from the logic to set up frame_reg_rtx.  It wasn't even obvious to me
that the correct register received a copy of the old frame pointer.

Another small bug was that frame_reg_rtx was set up when a function
with a large frame saved no registers.  This caused a stack tie to be
emitted unnecessarily.

Bootstrapped and regression tested powerpc-linux.  OK to apply?
Active branches too?

	* config/rs6000/rs6000.c (rs6000_emit_allocate_stack): Delete
	unnecessary prototype.  Replace copy_r12 and copy_r11 flag params
	with copy_reg rtx param.
	(rs6000_emit_prologue): Update rs6000_emit_allocate_stack calls.
	Correct cases where code for ABI_V4 did not initialise the reg
	used to access frame.  Also leave frame_reg_rtx as sp for large
	frames that save no regs.

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 159214)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -881,7 +881,6 @@ static bool spe_func_has_64bit_regs_p (v
 static void emit_frame_save (rtx, rtx, enum machine_mode, unsigned int,
 			     int, HOST_WIDE_INT);
 static rtx gen_frame_mem_offset (enum machine_mode, rtx, int);
-static void rs6000_emit_allocate_stack (HOST_WIDE_INT, int, int);
 static unsigned rs6000_hash_constant (rtx);
 static unsigned toc_hash_function (const void *);
 static int toc_hash_eq (const void *, const void *);
@@ -18024,13 +18023,11 @@ rs6000_emit_stack_tie (void)
 }
 
 /* Emit the correct code for allocating stack space, as insns.
-   If COPY_R12, make sure a copy of the old frame is left in r12.
-   If COPY_R11, make sure a copy of the old frame is left in r11,
-   in preference to r12 if COPY_R12.
+   If COPY_REG, make sure a copy of the old frame is left there.
    The generated code may use hard register 0 as a temporary.  */
 
 static void
-rs6000_emit_allocate_stack (HOST_WIDE_INT size, int copy_r12, int copy_r11)
+rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg)
 {
   rtx insn;
   rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
@@ -18073,11 +18070,8 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
 	warning (0, "stack limit expression is not supported");
     }
 
-  if (copy_r12 || copy_r11)
-    emit_move_insn (copy_r11
-                    ? gen_rtx_REG (Pmode, 11)
-                    : gen_rtx_REG (Pmode, 12),
-                    stack_reg);
+  if (copy_reg)
+    emit_move_insn (copy_reg, stack_reg);
 
   if (size > 32767)
     {
@@ -18763,20 +18757,33 @@ rs6000_emit_prologue (void)
 		       ? (!saving_GPRs_inline
 			  && info->spe_64bit_regs_used == 0)
 		       : (!saving_FPRs_inline || !saving_GPRs_inline));
+      rtx copy_reg = need_r11 ? gen_rtx_REG (Pmode, 11) : NULL;
+
       if (info->total_size < 32767)
 	sp_offset = info->total_size;
+      else if (need_r11)
+	frame_reg_rtx = copy_reg;
+      else if (info->cr_save_p
+	       || info->lr_save_p
+	       || info->first_fp_reg_save < 64
+	       || info->first_gp_reg_save < 32
+	       || info->altivec_size != 0
+	       || info->vrsave_mask != 0
+	       || crtl->calls_eh_return)
+	{
+	  copy_reg = frame_ptr_rtx;
+	  frame_reg_rtx = copy_reg;
+	}
       else
-	frame_reg_rtx = (need_r11
-			 ? gen_rtx_REG (Pmode, 11)
-			 : frame_ptr_rtx);
-      rs6000_emit_allocate_stack (info->total_size,
-				  (frame_reg_rtx != sp_reg_rtx
-				   && (info->cr_save_p
-				       || info->lr_save_p
-				       || info->first_fp_reg_save < 64
-				       || info->first_gp_reg_save < 32
-				       )),
-				  need_r11);
+	{
+	  /* The prologue won't be saving any regs so there is no need
+	     to set up a frame register to access any frame save area.
+	     We also won't be using sp_offset anywhere below, but set
+	     the correct value anyway to protect against future
+	     changes to this function.  */
+	  sp_offset = info->total_size;
+	}
+      rs6000_emit_allocate_stack (info->total_size, copy_reg);
       if (frame_reg_rtx != sp_reg_rtx)
 	rs6000_emit_stack_tie ();
     }
@@ -19211,16 +19218,19 @@ rs6000_emit_prologue (void)
   if (!WORLD_SAVE_P (info) && info->push_p
       && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
     {
+      rtx copy_reg = NULL;
+
       if (info->total_size < 32767)
-      sp_offset = info->total_size;
+	sp_offset = info->total_size;
+      else if (info->altivec_size != 0
+	       || info->vrsave_mask != 0)
+	{
+	  copy_reg = frame_ptr_rtx;
+	  frame_reg_rtx = copy_reg;
+	}
       else
-	frame_reg_rtx = frame_ptr_rtx;
-      rs6000_emit_allocate_stack (info->total_size,
-				  (frame_reg_rtx != sp_reg_rtx
-				   && ((info->altivec_size != 0)
-				       || (info->vrsave_mask != 0)
-				       )),
-				  FALSE);
+	sp_offset = info->total_size;
+      rs6000_emit_allocate_stack (info->total_size, copy_reg);
       if (frame_reg_rtx != sp_reg_rtx)
 	rs6000_emit_stack_tie ();
     }

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]