[PATCH 1/4] aarch64: Improve epilogue unwind info

Jiong Wang jiong.wang@arm.com
Tue Aug 26 13:37:00 GMT 2014


thanks,

verified no regression on aarch64-none-elf bare-metal check-gcc/check-gdb.

-- Jiong

On 22/08/14 23:05, Richard Henderson wrote:
> Delay cfi restore opcodes until the stack frame is deallocated.
> This reduces the number of cfi advance opcodes required.
>
> We perform a similar optimization in the x86_64 epilogue.
>
>
>          * config/aarch64/aarch64.c (aarch64_popwb_single_reg): Remove.
>          (aarch64_popwb_pair_reg): Remove.
>          (aarch64_restore_callee_saves): Add CFI_OPS argument; fill it with
>          the restore ops performed by the insns generated.
>          (aarch64_expand_epilogue): Attach CFI_OPS to the stack deallocation
>          insn.  Perform the calls_eh_return addition later; do not attempt to
>          preserve the CFA in that case.  Don't use aarch64_set_frame_expr.
> ---
>   gcc/config/aarch64/aarch64.c | 177 +++++++++++++------------------------------
>   1 file changed, 52 insertions(+), 125 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index c3c871e..9a11e05 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1964,23 +1964,6 @@ aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
>     RTX_FRAME_RELATED_P (insn) = 1;
>   }
>
> -static void
> -aarch64_popwb_single_reg (enum machine_mode mode, unsigned regno,
> -                         HOST_WIDE_INT adjustment)
> -{
> -  rtx base_rtx = stack_pointer_rtx;
> -  rtx insn, reg, mem;
> -
> -  reg = gen_rtx_REG (mode, regno);
> -  mem = gen_rtx_POST_MODIFY (Pmode, base_rtx,
> -                            plus_constant (Pmode, base_rtx, adjustment));
> -  mem = gen_rtx_MEM (mode, mem);
> -
> -  insn = emit_move_insn (reg, mem);
> -  add_reg_note (insn, REG_CFA_RESTORE, reg);
> -  RTX_FRAME_RELATED_P (insn) = 1;
> -}
> -
>   static rtx
>   aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
>                            HOST_WIDE_INT adjustment)
> @@ -2011,7 +1994,6 @@ aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
>     insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
>                                                reg2, adjustment));
>     RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
> -
>     RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
>     RTX_FRAME_RELATED_P (insn) = 1;
>   }
> @@ -2033,29 +2015,6 @@ aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
>       }
>   }
>
> -static void
> -aarch64_popwb_pair_reg (enum machine_mode mode, unsigned regno1,
> -                       unsigned regno2, HOST_WIDE_INT adjustment, rtx cfa)
> -{
> -  rtx insn;
> -  rtx reg1 = gen_rtx_REG (mode, regno1);
> -  rtx reg2 = gen_rtx_REG (mode, regno2);
> -
> -  insn = emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
> -                                            reg2, adjustment));
> -  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
> -  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
> -  RTX_FRAME_RELATED_P (insn) = 1;
> -
> -  if (cfa)
> -    add_reg_note (insn, REG_CFA_ADJUST_CFA,
> -                 (gen_rtx_SET (Pmode, stack_pointer_rtx,
> -                               plus_constant (Pmode, cfa, adjustment))));
> -
> -  add_reg_note (insn, REG_CFA_RESTORE, reg1);
> -  add_reg_note (insn, REG_CFA_RESTORE, reg2);
> -}
> -
>   static rtx
>   aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
>                          rtx reg2)
> @@ -2151,9 +2110,8 @@ aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
>   static void
>   aarch64_restore_callee_saves (enum machine_mode mode,
>                                HOST_WIDE_INT start_offset, unsigned start,
> -                             unsigned limit, bool skip_wb)
> +                             unsigned limit, bool skip_wb, rtx *cfi_ops)
>   {
> -  rtx insn;
>     rtx base_rtx = stack_pointer_rtx;
>     rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
>                                                   ? gen_frame_mem : gen_rtx_MEM);
> @@ -2187,25 +2145,14 @@ aarch64_restore_callee_saves (enum machine_mode mode,
>
>            offset = start_offset + cfun->machine->frame.reg_offset[regno2];
>            mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
> -         insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2,
> -                                                  mem2));
> -         add_reg_note (insn, REG_CFA_RESTORE, reg);
> -         add_reg_note (insn, REG_CFA_RESTORE, reg2);
> +         emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
>
> -         /* The first part of a frame-related parallel insn is
> -            always assumed to be relevant to the frame
> -            calculations; subsequent parts, are only
> -            frame-related if explicitly marked.  */
> -         RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
> +         *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
>            regno = regno2;
>          }
>         else
> -       {
> -         insn = emit_move_insn (reg, mem);
> -         add_reg_note (insn, REG_CFA_RESTORE, reg);
> -       }
> -
> -      RTX_FRAME_RELATED_P (insn) = 1;
> +       emit_move_insn (reg, mem);
> +      *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
>       }
>   }
>
> @@ -2418,7 +2365,6 @@ aarch64_expand_epilogue (bool for_sibcall)
>     HOST_WIDE_INT frame_size, offset;
>     HOST_WIDE_INT fp_offset;
>     rtx insn;
> -  rtx cfa_reg;
>
>     aarch64_layout_frame ();
>
> @@ -2426,8 +2372,6 @@ aarch64_expand_epilogue (bool for_sibcall)
>     fp_offset = cfun->machine->frame.frame_size
>                - cfun->machine->frame.hard_fp_offset;
>
> -  cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
> -
>     /* Store pairs and load pairs have a range only -512 to 504.  */
>     if (offset >= 512)
>       {
> @@ -2459,11 +2403,6 @@ aarch64_expand_epilogue (bool for_sibcall)
>                                         hard_frame_pointer_rtx,
>                                         GEN_INT (0)));
>         offset = offset - fp_offset;
> -      RTX_FRAME_RELATED_P (insn) = 1;
> -      /* As SP is set to (FP - fp_offset), according to the rules in
> -        dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
> -        from the value of SP from now on.  */
> -      cfa_reg = stack_pointer_rtx;
>       }
>
>     if (offset > 0)
> @@ -2471,6 +2410,7 @@ aarch64_expand_epilogue (bool for_sibcall)
>         unsigned reg1 = cfun->machine->frame.wb_candidate1;
>         unsigned reg2 = cfun->machine->frame.wb_candidate2;
>         bool skip_wb = true;
> +      rtx cfi_ops = NULL;
>
>         if (frame_pointer_needed)
>          fp_offset = 0;
> @@ -2481,99 +2421,86 @@ aarch64_expand_epilogue (bool for_sibcall)
>          skip_wb = false;
>
>         aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
> -                                   skip_wb);
> +                                   skip_wb, &cfi_ops);
>         aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
> -                                   skip_wb);
> +                                   skip_wb, &cfi_ops);
>
>         if (skip_wb)
>          {
>            enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
> +         rtx rreg1 = gen_rtx_REG (mode1, reg1);
>
> +         cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
>            if (reg2 == FIRST_PSEUDO_REGISTER)
> -           aarch64_popwb_single_reg (mode1, reg1, offset);
> +           {
> +             rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
> +             mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
> +             mem = gen_rtx_MEM (mode1, mem);
> +             insn = emit_move_insn (rreg1, mem);
> +           }
>            else
>              {
> -             if (reg1 != HARD_FRAME_POINTER_REGNUM)
> -               cfa_reg = NULL;
> +             rtx rreg2 = gen_rtx_REG (mode1, reg2);
>
> -             aarch64_popwb_pair_reg (mode1, reg1, reg2, offset, cfa_reg);
> +             cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
> +             insn = aarch64_gen_loadwb_pair (mode1, stack_pointer_rtx, rreg1,
> +                                             rreg2, offset);
> +             insn = emit_insn (insn);
>              }
>          }
>         else
>          {
>            insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
>                                             GEN_INT (offset)));
> -         RTX_FRAME_RELATED_P (insn) = 1;
>          }
> -    }
> -
> -  /* Stack adjustment for exception handler.  */
> -  if (crtl->calls_eh_return)
> -    {
> -      /* We need to unwind the stack by the offset computed by
> -        EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
> -        based on SP.  Ideally we would update the SP and define the
> -        CFA along the lines of:
> -
> -        SP = SP + EH_RETURN_STACKADJ_RTX
> -        (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
> -
> -        However the dwarf emitter only understands a constant
> -        register offset.
> -
> -        The solution chosen here is to use the otherwise unused IP0
> -        as a temporary register to hold the current SP value.  The
> -        CFA is described using IP0 then SP is modified.  */
>
> -      rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
> -
> -      insn = emit_move_insn (ip0, stack_pointer_rtx);
> -      add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
> +      /* Reset the CFA to be SP + FRAME_SIZE.  */
> +      rtx new_cfa = stack_pointer_rtx;
> +      if (frame_size > 0)
> +       new_cfa = plus_constant (Pmode, new_cfa, frame_size);
> +      cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
> +      REG_NOTES (insn) = cfi_ops;
>         RTX_FRAME_RELATED_P (insn) = 1;
> -
> -      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
> -
> -      /* Ensure the assignment to IP0 does not get optimized away.  */
> -      emit_use (ip0);
>       }
>
> -  if (frame_size > -1)
> +  if (frame_size > 0)
>       {
>         if (frame_size >= 0x1000000)
>          {
>            rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
>            emit_move_insn (op0, GEN_INT (frame_size));
> -         emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
> -         aarch64_set_frame_expr (gen_rtx_SET
> -                                 (Pmode, stack_pointer_rtx,
> -                                  plus_constant (Pmode,
> -                                                 stack_pointer_rtx,
> -                                                 frame_size)));
> +         insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
>          }
> -      else if (frame_size > 0)
> +      else
>          {
> -         if ((frame_size & 0xfff) != 0)
> -           {
> -             insn = emit_insn (gen_add2_insn
> -                               (stack_pointer_rtx,
> -                                GEN_INT ((frame_size
> -                                          & (HOST_WIDE_INT) 0xfff))));
> -             RTX_FRAME_RELATED_P (insn) = 1;
> -           }
> -         if ((frame_size & 0xfff) != frame_size)
> +          int hi_ofs = frame_size & 0xfff000;
> +          int lo_ofs = frame_size & 0x000fff;
> +
> +         if (hi_ofs && lo_ofs)
>              {
>                insn = emit_insn (gen_add2_insn
> -                               (stack_pointer_rtx,
> -                                GEN_INT ((frame_size
> -                                          & ~ (HOST_WIDE_INT) 0xfff))));
> +                               (stack_pointer_rtx, GEN_INT (hi_ofs)));
>                RTX_FRAME_RELATED_P (insn) = 1;
> +             frame_size = lo_ofs;
>              }
> +         insn = emit_insn (gen_add2_insn
> +                           (stack_pointer_rtx, GEN_INT (frame_size)));
>          }
>
> -      aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
> -                                          plus_constant (Pmode,
> -                                                         stack_pointer_rtx,
> -                                                         offset)));
> +      /* Reset the CFA to be SP + 0.  */
> +      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
> +      RTX_FRAME_RELATED_P (insn) = 1;
> +    }
> +
> +  /* Stack adjustment for exception handler.  */
> +  if (crtl->calls_eh_return)
> +    {
> +      /* We need to unwind the stack by the offset computed by
> +        EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
> +        to be SP; letting the CFA move during this adjustment
> +        is just as correct as retaining the CFA from the body
> +        of the function.  Therefore, do nothing special.  */
> +      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
>       }
>
>     emit_use (gen_rtx_REG (DImode, LR_REGNUM));
> --
> 1.8.3.1
>
>
>




More information about the Gcc-patches mailing list