This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix AMD64 handling of functions with huge stack frames


On Thu, Oct 23, 2003 at 03:21:04PM +0200, Jan Hubicka wrote:
> > 2003-10-23  Jakub Jelinek  <jakub@redhat.com>
> > 
> > 	* config/i386/i386.c (ix86_expand_prologue): Handle stack
> > 	adjustement bigger than 2GB.
> > 	(epilogue_adjust_stack): New function.
> > 	(ix86_expand_epilogue): Use it.
> 
> Actually hammer branch has similar patch.  I realize now that it appears
> to be missing on mainline for some reason.  Sorry for that, it would
> probably save some of your time.

Oops, didn't know that.

> The hammer branch version embeddes the register load in
> pro_epilogue_adjust_stack_expander, this seem to avoid new for
> duplicating the code..

But on hammer-3_3-branch register %r11 is not used by indirect sibcall.
Relying on somebody not forgetting to tweak i386 backend when sibcall.c
stops limiting sibcalls to !frame_size sounds too error-prone to me,
plus FRAME_RELATED_P is not set on the first instruction that way.

> I was trying to do the same on hammer branch and it did hit problems
> with unwind code not being able to realize that (plus (rsp) (reg2))
> is (plus (rsp) (big_constant)), so I had to create new pattern
> pro_epilogue_adjust_stack_rex64_2.  DOes this problem still remain in
> mainline or it has been fixed?

What was the difference in the unwind info?
Here is an updated patch which uses pro_epilogue_adjust_stack_rex64_2
from hammer-3_3-branch (fixed up a little bit and simplified),
but it generates identical assembly as when it was using just _rex64.
I get similar results on a backport to gcc-3_2-rhl8-branch, using
rex64_2 or just rex64 changes nothing.
This patch has the expander in i386.c instead of i386.md, so that epilogue
with indirect sibcall can be potentionally handled (as well as
FRAME_RELATED_P).
I have also noticed that stack_pointer_offset was computed wrongly for huge
stack usages (int offset variable doing += size where size is HOST_WIDE_INT
and can be > 4GB) and that we need to avoid using mov for pushes.

2003-10-23  Jakub Jelinek  <jakub@redhat.com>
	    Jan Hubicka  <jh@suse.cz>

	* config/i386/i386.c (ix86_compute_frame_layout): Change offset type
	to HOST_WIDE_INT.  Don't save regs using mov for huge frame sizes
	if TARGET_64BIT.
	(pro_epilogue_adjust_stack): New function.
	(ix86_expand_prologue, ix86_expand_epilogue): Use it.
	* config/i386/i386.md (pro_epilogue_adjust_stack): Remove.
	(pro_epilogue_adjust_stack_1): Remove * in front of name.
	(pro_epilogue_adjust_stack_rex64_2): New insn.

	* config/i386/i386.c (ix86_expand_epilogue): Fix comment typo.

	* config/i386/i386.c (ix86_expand_call): Replace 40 with
	FIRST_REX_INT_REG + 3 /* R11 */.

	* gcc.c-torture/compile/20031023-1.c: New test.
	* gcc.c-torture/compile/20031023-2.c: New test.
	* gcc.c-torture/compile/20031023-3.c: New test.
	* gcc.c-torture/compile/20031023-4.c: New test.

--- gcc/config/i386/i386.c.jj	2003-10-23 09:55:03.000000000 +0200
+++ gcc/config/i386/i386.c	2003-10-23 19:07:27.000000000 +0200
@@ -4841,7 +4841,7 @@ ix86_compute_frame_layout (struct ix86_f
 {
   HOST_WIDE_INT total_size;
   int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
-  int offset;
+  HOST_WIDE_INT offset;
   int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
   HOST_WIDE_INT size = get_frame_size ();
 
@@ -4957,7 +4957,8 @@ ix86_compute_frame_layout (struct ix86_f
     (size + frame->padding1 + frame->padding2
      + frame->outgoing_arguments_size + frame->va_arg_size);
 
-  if (!frame->to_allocate && frame->nregs <= 1)
+  if ((!frame->to_allocate && frame->nregs <= 1)
+      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
     frame->save_regs_using_mov = false;
 
   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
@@ -5024,6 +5025,41 @@ ix86_emit_save_regs_using_mov (rtx point
       }
 }
 
+/* Expand prologue or epilogue stack adjustement.
+   The pattern exist to put a dependency on all ebp-based memory accesses.
+   STYLE should be negative if instructions should be marked as frame related,
+   zero if %r11 register is live and cannot be freely used and positive
+   otherwise.  */
+
+static void
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
+{
+  rtx insn;
+
+  if (! TARGET_64BIT)
+    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
+  else if (x86_64_immediate_operand (offset, DImode))
+    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
+  else
+    {
+      rtx r11;
+      /* r11 is used by indirect sibcall return as well, set before the
+	 epilogue and used after the epilogue.  ATM indirect sibcall
+	 shouldn't be used together with huge frame sizes in one
+	 function because of the frame_size check in sibcall.c.  */
+      if (style == 0)
+	abort ();
+      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
+      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
+      if (style < 0)
+	RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
+							       offset));
+    }
+  if (style < 0)
+    RTX_FRAME_RELATED_P (insn) = 1;
+}
+
 /* Expand the prologue into a bunch of separate insns.  */
 
 void
@@ -5065,12 +5101,8 @@ ix86_expand_prologue (void)
   if (allocate == 0)
     ;
   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
-    {
-      insn = emit_insn (gen_pro_epilogue_adjust_stack
-			(stack_pointer_rtx, stack_pointer_rtx,
-			 GEN_INT (-allocate)));
-      RTX_FRAME_RELATED_P (insn) = 1;
-    }
+    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (-allocate), -1);
   else
     {
       /* Only valid for Win32 */
@@ -5213,8 +5245,8 @@ ix86_expand_epilogue (int style)
 	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
 	      emit_move_insn (hard_frame_pointer_rtx, tmp);
 
-	      emit_insn (gen_pro_epilogue_adjust_stack
-			 (stack_pointer_rtx, sa, const0_rtx));
+	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
+					 const0_rtx, style);
 	    }
 	  else
 	    {
@@ -5225,19 +5257,19 @@ ix86_expand_epilogue (int style)
 	    }
 	}
       else if (!frame_pointer_needed)
-	emit_insn (gen_pro_epilogue_adjust_stack
-		   (stack_pointer_rtx, stack_pointer_rtx,
-		    GEN_INT (frame.to_allocate
-			     + frame.nregs * UNITS_PER_WORD)));
+	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				   GEN_INT (frame.to_allocate
+					    + frame.nregs * UNITS_PER_WORD),
+				   style);
       /* If not an i386, mov & pop is faster than "leave".  */
       else if (TARGET_USE_LEAVE || optimize_size
 	       || !cfun->machine->use_fast_prologue_epilogue)
 	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
       else
 	{
-	  emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
-						    hard_frame_pointer_rtx,
-						    const0_rtx));
+	  pro_epilogue_adjust_stack (stack_pointer_rtx,
+				     hard_frame_pointer_rtx,
+				     const0_rtx, style);
 	  if (TARGET_64BIT)
 	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
 	  else
@@ -5252,14 +5284,13 @@ ix86_expand_epilogue (int style)
 	{
 	  if (!frame_pointer_needed)
 	    abort ();
-          emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
-						    hard_frame_pointer_rtx,
-						    GEN_INT (offset)));
+	  pro_epilogue_adjust_stack (stack_pointer_rtx,
+				     hard_frame_pointer_rtx,
+				     GEN_INT (offset), style);
 	}
       else if (frame.to_allocate)
-	emit_insn (gen_pro_epilogue_adjust_stack
-		   (stack_pointer_rtx, stack_pointer_rtx,
-		    GEN_INT (frame.to_allocate)));
+	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				   GEN_INT (frame.to_allocate), style);
 
       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 	if (ix86_save_reg (regno, false))
@@ -5298,7 +5329,7 @@ ix86_expand_epilogue (int style)
 	{
 	  rtx ecx = gen_rtx_REG (SImode, 2);
 
-	  /* There are is no "pascal" calling convention in 64bit ABI.  */
+	  /* There is no "pascal" calling convention in 64bit ABI.  */
 	  if (TARGET_64BIT)
 	    abort ();
 
@@ -11541,7 +11572,7 @@ ix86_expand_call (rtx retval, rtx fnaddr
     {
       rtx addr;
       addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
-      fnaddr = gen_rtx_REG (Pmode, 40);
+      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
       emit_move_insn (fnaddr, addr);
       fnaddr = gen_rtx_MEM (QImode, fnaddr);
     }
--- gcc/config/i386/i386.md.jj	2003-10-23 09:55:07.000000000 +0200
+++ gcc/config/i386/i386.md	2003-10-23 16:14:47.000000000 +0200
@@ -17772,23 +17772,7 @@
 ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
 ;;
 ;; in proper program order.
-(define_expand "pro_epilogue_adjust_stack"
-  [(parallel [(set (match_operand:SI 0 "register_operand" "=r,r")
-		   (plus:SI (match_operand:SI 1 "register_operand" "0,r")
-			    (match_operand:SI 2 "immediate_operand" "i,i")))
-	      (clobber (reg:CC 17))
-	      (clobber (mem:BLK (scratch)))])]
- ""
-{
-  if (TARGET_64BIT)
-    {
-      emit_insn (gen_pro_epilogue_adjust_stack_rex64
-		 (operands[0], operands[1], operands[2]));
-      DONE;
-    }
-})
-
-(define_insn "*pro_epilogue_adjust_stack_1"
+(define_insn "pro_epilogue_adjust_stack_1"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(plus:SI (match_operand:SI 1 "register_operand" "0,r")
 	         (match_operand:SI 2 "immediate_operand" "i,i")))
@@ -17870,6 +17854,33 @@
 	      (const_string "lea")))
    (set_attr "mode" "DI")])
 
+(define_insn "pro_epilogue_adjust_stack_rex64_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "0,r")
+		 (match_operand:DI 3 "immediate_operand" "i,i")))
+   (use (match_operand:DI 2 "register_operand" "r,r"))
+   (clobber (reg:CC 17))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      return "add{q}\t{%2, %0|%0, %2}";
+
+    case TYPE_LEA:
+      operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]);
+      return "lea{q}\t{%a2, %0|%0, %a2}";
+
+    default:
+      abort ();
+    }
+}
+  [(set (attr "type")
+	(if_then_else (eq_attr "alternative" "0")
+		      (const_string "alu")
+		      (const_string "lea")))
+   (set_attr "mode" "DI")])
 
 ;; Placeholder for the conditional moves.  This one is split either to SSE
 ;; based moves emulation or to usual cmove sequence.  Little bit unfortunate
--- gcc/testsuite/gcc.c-torture/compile/20031023-1.c.jj	2003-10-23 14:19:10.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/compile/20031023-1.c	2003-10-23 15:50:14.000000000 +0200
@@ -0,0 +1,66 @@
+#ifndef ASIZE
+# define ASIZE 0x10000000000UL
+#endif
+
+#include <limits.h>
+
+#if LONG_MAX < 8 * ASIZE
+# undef ASIZE
+# define ASIZE 4096
+#endif
+
+extern void abort (void);
+
+int __attribute__((noinline))
+foo (const char *s)
+{
+  if (!s)
+    return 1;
+  if (s[0] != 'a')
+    abort ();
+  s += ASIZE - 1;
+  if (s[0] != 'b')
+    abort ();
+  return 0;
+}
+
+int (*fn) (const char *) = foo;
+
+int __attribute__((noinline))
+bar (void)
+{
+  char s[ASIZE];
+  s[0] = 'a';
+  s[ASIZE - 1] = 'b';
+  foo (s);
+  foo (s);
+  return 0;
+}
+
+int __attribute__((noinline))
+baz (long i)
+{
+  if (i)
+    return fn (0);
+  else
+    {
+      char s[ASIZE];
+      s[0] = 'a';
+      s[ASIZE - 1] = 'b';
+      foo (s);
+      foo (s);
+      return fn (0);
+    }
+}
+
+int
+main (void)
+{
+  if (bar ())
+    abort ();
+  if (baz (0) != 1)
+    abort ();
+  if (baz (1) != 1)
+    abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/compile/20031023-2.c.jj	2003-10-23 14:19:25.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/compile/20031023-2.c	2003-10-23 14:19:49.000000000 +0200
@@ -0,0 +1,2 @@
+#define ASIZE 0x1000000000UL
+#include "20031023-1.c"
--- gcc/testsuite/gcc.c-torture/compile/20031023-3.c.jj	2003-10-23 14:19:25.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/compile/20031023-3.c	2003-10-23 14:20:28.000000000 +0200
@@ -0,0 +1,2 @@
+#define ASIZE 0x100000000UL
+#include "20031023-1.c"
--- gcc/testsuite/gcc.c-torture/compile/20031023-4.c.jj	2003-10-23 14:19:25.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/compile/20031023-4.c	2003-10-23 14:20:51.000000000 +0200
@@ -0,0 +1,2 @@
+#define ASIZE 0x80000000UL
+#include "20031023-1.c"


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]