This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][AArch64] Improve stack adjustment


Improve stack adjustment by reusing a temporary move immediate 
from the epilog if the register is still valid in the epilog.  This generates
smaller code for leaf functions:

        mov     x16, 40000
        sub     sp, sp, x16
        ldr     w0, [sp, w0, sxtw 2]
        add     sp, sp, x16
        ret

Passes GCC regression tests.

ChangeLog:
2016-08-04  Wilco Dijkstra  <wdijkstr@arm.com>

gcc/
	* config/aarch64/aarch64.c (aarch64_add_constant):
	Add extra argument to allow emitting the move immediate.
	Use add/sub with positive immediate.
	(aarch64_expand_epilogue): Decide when to leave out move.

testsuite/
	* gcc.target/aarch64/test_frame_17.c: New test.
--

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ce2cc5ae3e1291f4ef4a8408461678c9397b06bd..5b59e4dd157351f301fc563a724cefe8a9be132c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1941,15 +1941,21 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 }
 
 /* Add DELTA to REGNUM in mode MODE.  SCRATCHREG can be used to held
-   intermediate value if necessary.
+   intermediate value if necessary.  FRAME_RELATED_P should be true if
+   the RTX_FRAME_RELATED flag should be set and CFA adjustments added
+   to the generated instructions.  If SCRATCHREG is known to hold
+   abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
+   immediate again.
 
-   This function is sometimes used to adjust the stack pointer, so we must
-   ensure that it can never cause transient stack deallocation by writing an
-   invalid value into REGNUM.  */
+   Since this function may be used to adjust the stack pointer, we must
+   ensure that it cannot cause transient stack deallocation (for example
+   by first incrementing SP and then decrementing when adjusting by a
+   large immediate).  */
 
 static void
 aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
-		      HOST_WIDE_INT delta, bool frame_related_p)
+		      HOST_WIDE_INT delta, bool frame_related_p,
+		      bool emit_move_imm = true)
 {
   HOST_WIDE_INT mdelta = abs_hwi (delta);
   rtx this_rtx = gen_rtx_REG (mode, regnum);
@@ -1967,11 +1973,11 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
       return;
     }
 
-  /* We need two add/sub instructions, each one performing part of the
-     calculation.  Don't do this if the addend can be loaded into register with
-     a single instruction, in that case we prefer a move to a scratch register
-     following by an addition.  */
-  if (mdelta < 0x1000000 && !aarch64_move_imm (delta, mode))
+  /* We need two add/sub instructions, each one perform part of the
+     addition/subtraction, but don't this if the addend can be loaded into
+     register by single instruction, in that case we prefer a move to scratch
+     register following by addition.  */
+  if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
     {
       HOST_WIDE_INT low_off = mdelta & 0xfff;
 
@@ -1985,8 +1991,10 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
 
   /* Otherwise use generic function to handle all other situations.  */
   rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
-  aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
-  insn = emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
+  if (emit_move_imm)
+    aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
+  insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
+			      : gen_add2_insn (this_rtx, scratch_rtx));
   if (frame_related_p)
     {
       RTX_FRAME_RELATED_P (insn) = frame_related_p;
@@ -3288,7 +3296,8 @@ aarch64_expand_epilogue (bool for_sibcall)
       RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
     }
   else
-    aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true);
+    aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true,
+			  df_regs_ever_live_p (IP1_REGNUM));
 
   aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
 				callee_adjust != 0, &cfi_ops);
@@ -3311,7 +3320,8 @@ aarch64_expand_epilogue (bool for_sibcall)
       cfi_ops = NULL;
     }
 
-  aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true);
+  aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true,
+			df_regs_ever_live_p (IP0_REGNUM));
 
   if (cfi_ops)
     {
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_17.c b/gcc/testsuite/gcc.target/aarch64/test_frame_17.c
new file mode 100644
index 0000000000000000000000000000000000000000..c214431999b60cce8a75204876a8c73ec6304128
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_17.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --save-temps" } */
+
+/* Test reuse of stack adjustment temporaries.  */
+
+void foo ();
+
+int reuse_mov (int i)
+{
+  int arr[1025];
+  return arr[i];
+}
+
+int no_reuse_mov (int i)
+{
+  int arr[1025];
+  foo ();
+  return arr[i];
+}
+
+/* { dg-final { scan-assembler-times "mov\tx16, \[0-9\]+" 3 } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]