[PATCH 1/4] aarch64: Improve epilogue unwind info
Jiong Wang
jiong.wang@arm.com
Tue Aug 26 13:37:00 GMT 2014
thanks,
verified no regression on aarch64-none-elf bare-metal check-gcc/check-gdb.
-- Jiong
On 22/08/14 23:05, Richard Henderson wrote:
> Delay cfi restore opcodes until the stack frame is deallocated.
> This reduces the number of cfi advance opcodes required.
>
> We perform a similar optimization in the x86_64 epilogue.
>
>
> * config/aarch64/aarch64.c (aarch64_popwb_single_reg): Remove.
> (aarch64_popwb_pair_reg): Remove.
> (aarch64_restore_callee_saves): Add CFI_OPS argument; fill it with
> the restore ops performed by the insns generated.
> (aarch64_expand_epilogue): Attach CFI_OPS to the stack deallocation
> insn. Perform the calls_eh_return addition later; do not attempt to
> preserve the CFA in that case. Don't use aarch64_set_frame_expr.
> ---
> gcc/config/aarch64/aarch64.c | 177 +++++++++++++------------------------------
> 1 file changed, 52 insertions(+), 125 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index c3c871e..9a11e05 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1964,23 +1964,6 @@ aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
> RTX_FRAME_RELATED_P (insn) = 1;
> }
>
> -static void
> -aarch64_popwb_single_reg (enum machine_mode mode, unsigned regno,
> - HOST_WIDE_INT adjustment)
> -{
> - rtx base_rtx = stack_pointer_rtx;
> - rtx insn, reg, mem;
> -
> - reg = gen_rtx_REG (mode, regno);
> - mem = gen_rtx_POST_MODIFY (Pmode, base_rtx,
> - plus_constant (Pmode, base_rtx, adjustment));
> - mem = gen_rtx_MEM (mode, mem);
> -
> - insn = emit_move_insn (reg, mem);
> - add_reg_note (insn, REG_CFA_RESTORE, reg);
> - RTX_FRAME_RELATED_P (insn) = 1;
> -}
> -
> static rtx
> aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
> HOST_WIDE_INT adjustment)
> @@ -2011,7 +1994,6 @@ aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
> insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
> reg2, adjustment));
> RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
> -
> RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
> RTX_FRAME_RELATED_P (insn) = 1;
> }
> @@ -2033,29 +2015,6 @@ aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
> }
> }
>
> -static void
> -aarch64_popwb_pair_reg (enum machine_mode mode, unsigned regno1,
> - unsigned regno2, HOST_WIDE_INT adjustment, rtx cfa)
> -{
> - rtx insn;
> - rtx reg1 = gen_rtx_REG (mode, regno1);
> - rtx reg2 = gen_rtx_REG (mode, regno2);
> -
> - insn = emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
> - reg2, adjustment));
> - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
> - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
> - RTX_FRAME_RELATED_P (insn) = 1;
> -
> - if (cfa)
> - add_reg_note (insn, REG_CFA_ADJUST_CFA,
> - (gen_rtx_SET (Pmode, stack_pointer_rtx,
> - plus_constant (Pmode, cfa, adjustment))));
> -
> - add_reg_note (insn, REG_CFA_RESTORE, reg1);
> - add_reg_note (insn, REG_CFA_RESTORE, reg2);
> -}
> -
> static rtx
> aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
> rtx reg2)
> @@ -2151,9 +2110,8 @@ aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
> static void
> aarch64_restore_callee_saves (enum machine_mode mode,
> HOST_WIDE_INT start_offset, unsigned start,
> - unsigned limit, bool skip_wb)
> + unsigned limit, bool skip_wb, rtx *cfi_ops)
> {
> - rtx insn;
> rtx base_rtx = stack_pointer_rtx;
> rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
> ? gen_frame_mem : gen_rtx_MEM);
> @@ -2187,25 +2145,14 @@ aarch64_restore_callee_saves (enum machine_mode mode,
>
> offset = start_offset + cfun->machine->frame.reg_offset[regno2];
> mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
> - insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2,
> - mem2));
> - add_reg_note (insn, REG_CFA_RESTORE, reg);
> - add_reg_note (insn, REG_CFA_RESTORE, reg2);
> + emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
>
> - /* The first part of a frame-related parallel insn is
> - always assumed to be relevant to the frame
> - calculations; subsequent parts, are only
> - frame-related if explicitly marked. */
> - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
> + *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
> regno = regno2;
> }
> else
> - {
> - insn = emit_move_insn (reg, mem);
> - add_reg_note (insn, REG_CFA_RESTORE, reg);
> - }
> -
> - RTX_FRAME_RELATED_P (insn) = 1;
> + emit_move_insn (reg, mem);
> + *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
> }
> }
>
> @@ -2418,7 +2365,6 @@ aarch64_expand_epilogue (bool for_sibcall)
> HOST_WIDE_INT frame_size, offset;
> HOST_WIDE_INT fp_offset;
> rtx insn;
> - rtx cfa_reg;
>
> aarch64_layout_frame ();
>
> @@ -2426,8 +2372,6 @@ aarch64_expand_epilogue (bool for_sibcall)
> fp_offset = cfun->machine->frame.frame_size
> - cfun->machine->frame.hard_fp_offset;
>
> - cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
> -
> /* Store pairs and load pairs have a range only -512 to 504. */
> if (offset >= 512)
> {
> @@ -2459,11 +2403,6 @@ aarch64_expand_epilogue (bool for_sibcall)
> hard_frame_pointer_rtx,
> GEN_INT (0)));
> offset = offset - fp_offset;
> - RTX_FRAME_RELATED_P (insn) = 1;
> - /* As SP is set to (FP - fp_offset), according to the rules in
> - dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
> - from the value of SP from now on. */
> - cfa_reg = stack_pointer_rtx;
> }
>
> if (offset > 0)
> @@ -2471,6 +2410,7 @@ aarch64_expand_epilogue (bool for_sibcall)
> unsigned reg1 = cfun->machine->frame.wb_candidate1;
> unsigned reg2 = cfun->machine->frame.wb_candidate2;
> bool skip_wb = true;
> + rtx cfi_ops = NULL;
>
> if (frame_pointer_needed)
> fp_offset = 0;
> @@ -2481,99 +2421,86 @@ aarch64_expand_epilogue (bool for_sibcall)
> skip_wb = false;
>
> aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
> - skip_wb);
> + skip_wb, &cfi_ops);
> aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
> - skip_wb);
> + skip_wb, &cfi_ops);
>
> if (skip_wb)
> {
> enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
> + rtx rreg1 = gen_rtx_REG (mode1, reg1);
>
> + cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
> if (reg2 == FIRST_PSEUDO_REGISTER)
> - aarch64_popwb_single_reg (mode1, reg1, offset);
> + {
> + rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
> + mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
> + mem = gen_rtx_MEM (mode1, mem);
> + insn = emit_move_insn (rreg1, mem);
> + }
> else
> {
> - if (reg1 != HARD_FRAME_POINTER_REGNUM)
> - cfa_reg = NULL;
> + rtx rreg2 = gen_rtx_REG (mode1, reg2);
>
> - aarch64_popwb_pair_reg (mode1, reg1, reg2, offset, cfa_reg);
> + cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
> + insn = aarch64_gen_loadwb_pair (mode1, stack_pointer_rtx, rreg1,
> + rreg2, offset);
> + insn = emit_insn (insn);
> }
> }
> else
> {
> insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
> GEN_INT (offset)));
> - RTX_FRAME_RELATED_P (insn) = 1;
> }
> - }
> -
> - /* Stack adjustment for exception handler. */
> - if (crtl->calls_eh_return)
> - {
> - /* We need to unwind the stack by the offset computed by
> - EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
> - based on SP. Ideally we would update the SP and define the
> - CFA along the lines of:
> -
> - SP = SP + EH_RETURN_STACKADJ_RTX
> - (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
> -
> - However the dwarf emitter only understands a constant
> - register offset.
> -
> - The solution chosen here is to use the otherwise unused IP0
> - as a temporary register to hold the current SP value. The
> - CFA is described using IP0 then SP is modified. */
>
> - rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
> -
> - insn = emit_move_insn (ip0, stack_pointer_rtx);
> - add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
> + /* Reset the CFA to be SP + FRAME_SIZE. */
> + rtx new_cfa = stack_pointer_rtx;
> + if (frame_size > 0)
> + new_cfa = plus_constant (Pmode, new_cfa, frame_size);
> + cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
> + REG_NOTES (insn) = cfi_ops;
> RTX_FRAME_RELATED_P (insn) = 1;
> -
> - emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
> -
> - /* Ensure the assignment to IP0 does not get optimized away. */
> - emit_use (ip0);
> }
>
> - if (frame_size > -1)
> + if (frame_size > 0)
> {
> if (frame_size >= 0x1000000)
> {
> rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
> emit_move_insn (op0, GEN_INT (frame_size));
> - emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
> - aarch64_set_frame_expr (gen_rtx_SET
> - (Pmode, stack_pointer_rtx,
> - plus_constant (Pmode,
> - stack_pointer_rtx,
> - frame_size)));
> + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
> }
> - else if (frame_size > 0)
> + else
> {
> - if ((frame_size & 0xfff) != 0)
> - {
> - insn = emit_insn (gen_add2_insn
> - (stack_pointer_rtx,
> - GEN_INT ((frame_size
> - & (HOST_WIDE_INT) 0xfff))));
> - RTX_FRAME_RELATED_P (insn) = 1;
> - }
> - if ((frame_size & 0xfff) != frame_size)
> + int hi_ofs = frame_size & 0xfff000;
> + int lo_ofs = frame_size & 0x000fff;
> +
> + if (hi_ofs && lo_ofs)
> {
> insn = emit_insn (gen_add2_insn
> - (stack_pointer_rtx,
> - GEN_INT ((frame_size
> - & ~ (HOST_WIDE_INT) 0xfff))));
> + (stack_pointer_rtx, GEN_INT (hi_ofs)));
> RTX_FRAME_RELATED_P (insn) = 1;
> + frame_size = lo_ofs;
> }
> + insn = emit_insn (gen_add2_insn
> + (stack_pointer_rtx, GEN_INT (frame_size)));
> }
>
> - aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
> - plus_constant (Pmode,
> - stack_pointer_rtx,
> - offset)));
> + /* Reset the CFA to be SP + 0. */
> + add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
> + RTX_FRAME_RELATED_P (insn) = 1;
> + }
> +
> + /* Stack adjustment for exception handler. */
> + if (crtl->calls_eh_return)
> + {
> + /* We need to unwind the stack by the offset computed by
> + EH_RETURN_STACKADJ_RTX. We have already reset the CFA
> + to be SP; letting the CFA move during this adjustment
> + is just as correct as retaining the CFA from the body
> + of the function. Therefore, do nothing special. */
> + emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
> }
>
> emit_use (gen_rtx_REG (DImode, LR_REGNUM));
> --
> 1.8.3.1
>
>
>
More information about the Gcc-patches
mailing list