This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 1/4] aarch64: Improve epilogue unwind info


Delay cfi restore opcodes until the stack frame is deallocated.
This reduces the number of cfi advance opcodes required.

We perform a similar optimization in the x86_64 epilogue.


	* config/aarch64/aarch64.c (aarch64_popwb_single_reg): Remove.
	(aarch64_popwb_pair_reg): Remove.
	(aarch64_restore_callee_saves): Add CFI_OPS argument; fill it with
	the restore ops performed by the insns generated.
	(aarch64_expand_epilogue): Attach CFI_OPS to the stack deallocation
	insn.  Perform the calls_eh_return addition later; do not attempt to
	preserve the CFA in that case.  Don't use aarch64_set_frame_expr.
---
 gcc/config/aarch64/aarch64.c | 177 +++++++++++++------------------------------
 1 file changed, 52 insertions(+), 125 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c3c871e..9a11e05 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1964,23 +1964,6 @@ aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
   RTX_FRAME_RELATED_P (insn) = 1;
 }
 
-static void
-aarch64_popwb_single_reg (enum machine_mode mode, unsigned regno,
-			  HOST_WIDE_INT adjustment)
-{
-  rtx base_rtx = stack_pointer_rtx;
-  rtx insn, reg, mem;
-
-  reg = gen_rtx_REG (mode, regno);
-  mem = gen_rtx_POST_MODIFY (Pmode, base_rtx,
-			     plus_constant (Pmode, base_rtx, adjustment));
-  mem = gen_rtx_MEM (mode, mem);
-
-  insn = emit_move_insn (reg, mem);
-  add_reg_note (insn, REG_CFA_RESTORE, reg);
-  RTX_FRAME_RELATED_P (insn) = 1;
-}
-
 static rtx
 aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
 			  HOST_WIDE_INT adjustment)
@@ -2011,7 +1994,6 @@ aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
   insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
 					      reg2, adjustment));
   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-
   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
   RTX_FRAME_RELATED_P (insn) = 1;
 }
@@ -2033,29 +2015,6 @@ aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
     }
 }
 
-static void
-aarch64_popwb_pair_reg (enum machine_mode mode, unsigned regno1,
-			unsigned regno2, HOST_WIDE_INT adjustment, rtx cfa)
-{
-  rtx insn;
-  rtx reg1 = gen_rtx_REG (mode, regno1);
-  rtx reg2 = gen_rtx_REG (mode, regno2);
-
-  insn = emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
-					     reg2, adjustment));
-  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-  RTX_FRAME_RELATED_P (insn) = 1;
-
-  if (cfa)
-    add_reg_note (insn, REG_CFA_ADJUST_CFA,
-		  (gen_rtx_SET (Pmode, stack_pointer_rtx,
-				plus_constant (Pmode, cfa, adjustment))));
-
-  add_reg_note (insn, REG_CFA_RESTORE, reg1);
-  add_reg_note (insn, REG_CFA_RESTORE, reg2);
-}
-
 static rtx
 aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
 			rtx reg2)
@@ -2151,9 +2110,8 @@ aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
 static void
 aarch64_restore_callee_saves (enum machine_mode mode,
 			      HOST_WIDE_INT start_offset, unsigned start,
-			      unsigned limit, bool skip_wb)
+			      unsigned limit, bool skip_wb, rtx *cfi_ops)
 {
-  rtx insn;
   rtx base_rtx = stack_pointer_rtx;
   rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
 						 ? gen_frame_mem : gen_rtx_MEM);
@@ -2187,25 +2145,14 @@ aarch64_restore_callee_saves (enum machine_mode mode,
 
 	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
 	  mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
-	  insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2,
-						   mem2));
-	  add_reg_note (insn, REG_CFA_RESTORE, reg);
-	  add_reg_note (insn, REG_CFA_RESTORE, reg2);
+	  emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
 
-	  /* The first part of a frame-related parallel insn is
-	     always assumed to be relevant to the frame
-	     calculations; subsequent parts, are only
-	     frame-related if explicitly marked.  */
-	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+	  *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
 	  regno = regno2;
 	}
       else
-	{
-	  insn = emit_move_insn (reg, mem);
-	  add_reg_note (insn, REG_CFA_RESTORE, reg);
-	}
-
-      RTX_FRAME_RELATED_P (insn) = 1;
+	emit_move_insn (reg, mem);
+      *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
     }
 }
 
@@ -2418,7 +2365,6 @@ aarch64_expand_epilogue (bool for_sibcall)
   HOST_WIDE_INT frame_size, offset;
   HOST_WIDE_INT fp_offset;
   rtx insn;
-  rtx cfa_reg;
 
   aarch64_layout_frame ();
 
@@ -2426,8 +2372,6 @@ aarch64_expand_epilogue (bool for_sibcall)
   fp_offset = cfun->machine->frame.frame_size
 	      - cfun->machine->frame.hard_fp_offset;
 
-  cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
-
   /* Store pairs and load pairs have a range only -512 to 504.  */
   if (offset >= 512)
     {
@@ -2459,11 +2403,6 @@ aarch64_expand_epilogue (bool for_sibcall)
 				       hard_frame_pointer_rtx,
 				       GEN_INT (0)));
       offset = offset - fp_offset;
-      RTX_FRAME_RELATED_P (insn) = 1;
-      /* As SP is set to (FP - fp_offset), according to the rules in
-	 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
-	 from the value of SP from now on.  */
-      cfa_reg = stack_pointer_rtx;
     }
 
   if (offset > 0)
@@ -2471,6 +2410,7 @@ aarch64_expand_epilogue (bool for_sibcall)
       unsigned reg1 = cfun->machine->frame.wb_candidate1;
       unsigned reg2 = cfun->machine->frame.wb_candidate2;
       bool skip_wb = true;
+      rtx cfi_ops = NULL;
 
       if (frame_pointer_needed)
 	fp_offset = 0;
@@ -2481,99 +2421,86 @@ aarch64_expand_epilogue (bool for_sibcall)
 	skip_wb = false;
 
       aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
-				    skip_wb);
+				    skip_wb, &cfi_ops);
       aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
-				    skip_wb);
+				    skip_wb, &cfi_ops);
 
       if (skip_wb)
 	{
 	  enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
+	  rtx rreg1 = gen_rtx_REG (mode1, reg1);
 
+	  cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
 	  if (reg2 == FIRST_PSEUDO_REGISTER)
-	    aarch64_popwb_single_reg (mode1, reg1, offset);
+	    {
+	      rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
+	      mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
+	      mem = gen_rtx_MEM (mode1, mem);
+	      insn = emit_move_insn (rreg1, mem);
+	    }
 	  else
 	    {
-	      if (reg1 != HARD_FRAME_POINTER_REGNUM)
-		cfa_reg = NULL;
+	      rtx rreg2 = gen_rtx_REG (mode1, reg2);
 
-	      aarch64_popwb_pair_reg (mode1, reg1, reg2, offset, cfa_reg);
+	      cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
+	      insn = aarch64_gen_loadwb_pair (mode1, stack_pointer_rtx, rreg1,
+					      rreg2, offset);
+	      insn = emit_insn (insn);
 	    }
 	}
       else
 	{
 	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
 					   GEN_INT (offset)));
-	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
-    }
-
-  /* Stack adjustment for exception handler.  */
-  if (crtl->calls_eh_return)
-    {
-      /* We need to unwind the stack by the offset computed by
-	 EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
-	 based on SP.  Ideally we would update the SP and define the
-	 CFA along the lines of:
-
-	 SP = SP + EH_RETURN_STACKADJ_RTX
-	 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
-
-	 However the dwarf emitter only understands a constant
-	 register offset.
-
-	 The solution chosen here is to use the otherwise unused IP0
-	 as a temporary register to hold the current SP value.  The
-	 CFA is described using IP0 then SP is modified.  */
 
-      rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
-
-      insn = emit_move_insn (ip0, stack_pointer_rtx);
-      add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
+      /* Reset the CFA to be SP + FRAME_SIZE.  */
+      rtx new_cfa = stack_pointer_rtx;
+      if (frame_size > 0)
+	new_cfa = plus_constant (Pmode, new_cfa, frame_size);
+      cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
+      REG_NOTES (insn) = cfi_ops;
       RTX_FRAME_RELATED_P (insn) = 1;
-
-      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
-
-      /* Ensure the assignment to IP0 does not get optimized away.  */
-      emit_use (ip0);
     }
 
-  if (frame_size > -1)
+  if (frame_size > 0)
     {
       if (frame_size >= 0x1000000)
 	{
 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
 	  emit_move_insn (op0, GEN_INT (frame_size));
-	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
-	  aarch64_set_frame_expr (gen_rtx_SET
-				  (Pmode, stack_pointer_rtx,
-				   plus_constant (Pmode,
-						  stack_pointer_rtx,
-						  frame_size)));
+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
 	}
-      else if (frame_size > 0)
+      else
 	{
-	  if ((frame_size & 0xfff) != 0)
-	    {
-	      insn = emit_insn (gen_add2_insn
-				(stack_pointer_rtx,
-				 GEN_INT ((frame_size
-					   & (HOST_WIDE_INT) 0xfff))));
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	    }
-	  if ((frame_size & 0xfff) != frame_size)
+          int hi_ofs = frame_size & 0xfff000;
+          int lo_ofs = frame_size & 0x000fff;
+
+	  if (hi_ofs && lo_ofs)
 	    {
 	      insn = emit_insn (gen_add2_insn
-				(stack_pointer_rtx,
-				 GEN_INT ((frame_size
-					   & ~ (HOST_WIDE_INT) 0xfff))));
+				(stack_pointer_rtx, GEN_INT (hi_ofs)));
 	      RTX_FRAME_RELATED_P (insn) = 1;
+	      frame_size = lo_ofs;
 	    }
+	  insn = emit_insn (gen_add2_insn
+			    (stack_pointer_rtx, GEN_INT (frame_size)));
 	}
 
-      aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
-					   plus_constant (Pmode,
-							  stack_pointer_rtx,
-							  offset)));
+      /* Reset the CFA to be SP + 0.  */
+      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    {
+      /* We need to unwind the stack by the offset computed by
+	 EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
+	 to be SP; letting the CFA move during this adjustment
+	 is just as correct as retaining the CFA from the body
+	 of the function.  Therefore, do nothing special.  */
+      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
     }
 
   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
-- 
1.8.3.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]