This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][AArch64] Improve stack adjustment


On 04/08/16 16:56, Richard Earnshaw (lists) wrote:
> On 04/08/16 12:06, Wilco Dijkstra wrote:
>> Improve stack adjustment by reusing a temporary move immediate 
>> from the epilog if the register is still valid in the epilog.  This generates
>> smaller code for leaf functions:
>>
>>         mov     x16, 40000
>>         sub     sp, sp, x16
>>         ldr     w0, [sp, w0, sxtw 2]
>>         add     sp, sp, x16
>>         ret
>>
>> Passes GCC regression tests.
>>
>> ChangeLog:
>> 2016-08-04  Wilco Dijkstra  <wdijkstr@arm.com>
>>
>> gcc/
>> 	* config/aarch64/aarch64.c (aarch64_add_constant):
>> 	Add extra argument to allow emitting the move immediate.
>> 	Use add/sub with positive immediate.
>> 	(aarch64_expand_epilogue): Decide when to leave out move.
>>
>> testsuite/
>> 	* gcc.target/aarch64/test_frame_17.c: New test.
>> --
>>
> 
> I see you've added a default argument for your new parameter.  I think
> doing that is fine, but I have two comments about how we might use that
> in this case.
> 
> Firstly, if this parameter is suitable for having a default value, then
> I think the preceding one should also be treated in the same way.
> 
> Secondly, I think (due to these parameters being BOOL with no useful
> context to make it clear which is which) that having wrapper functions
> (inlined, of course) that describe the intended behaviour more clearly
> would be useful.  So, defining
> 
> static inline void
> aarch64_add_frame_constant (mode, regnum, scratchreg, delta)
> {
>    aarch64_add_frame_constant (mode, regnum, scratchreg, delta, true);

The inner function should be aarch64_add_constant of course!

R.

> }
> 
> would make it clearer at the call point than having a lot of true and
> false parameters scattered round the code.
> 
> Alternatively we might remove all the default parameters and require
> wrappers like the above to make it more explicit which API is intended -
> this might make more sense if not all combinations make sense.
> 
> R.
> 
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index ce2cc5ae3e1291f4ef4a8408461678c9397b06bd..5b59e4dd157351f301fc563a724cefe8a9be132c 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -1941,15 +1941,21 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>>  }
>>  
>>  /* Add DELTA to REGNUM in mode MODE.  SCRATCHREG can be used to held
>> -   intermediate value if necessary.
>> +   intermediate value if necessary.  FRAME_RELATED_P should be true if
>> +   the RTX_FRAME_RELATED flag should be set and CFA adjustments added
>> +   to the generated instructions.  If SCRATCHREG is known to hold
>> +   abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
>> +   immediate again.
>>  
>> -   This function is sometimes used to adjust the stack pointer, so we must
>> -   ensure that it can never cause transient stack deallocation by writing an
>> -   invalid value into REGNUM.  */
>> +   Since this function may be used to adjust the stack pointer, we must
>> +   ensure that it cannot cause transient stack deallocation (for example
>> +   by first incrementing SP and then decrementing when adjusting by a
>> +   large immediate).  */
>>  
>>  static void
>>  aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
>> -		      HOST_WIDE_INT delta, bool frame_related_p)
>> +		      HOST_WIDE_INT delta, bool frame_related_p,
>> +		      bool emit_move_imm = true)
>>  {
>>    HOST_WIDE_INT mdelta = abs_hwi (delta);
>>    rtx this_rtx = gen_rtx_REG (mode, regnum);
>> @@ -1967,11 +1973,11 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
>>        return;
>>      }
>>  
>> -  /* We need two add/sub instructions, each one performing part of the
>> -     calculation.  Don't do this if the addend can be loaded into register with
>> -     a single instruction, in that case we prefer a move to a scratch register
>> -     following by an addition.  */
>> -  if (mdelta < 0x1000000 && !aarch64_move_imm (delta, mode))
>> +  /* We need two add/sub instructions, each one perform part of the
>> +     addition/subtraction, but don't this if the addend can be loaded into
>> +     register by single instruction, in that case we prefer a move to scratch
>> +     register following by addition.  */
>> +  if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
>>      {
>>        HOST_WIDE_INT low_off = mdelta & 0xfff;
>>  
>> @@ -1985,8 +1991,10 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
>>  
>>    /* Otherwise use generic function to handle all other situations.  */
>>    rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
>> -  aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
>> -  insn = emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
>> +  if (emit_move_imm)
>> +    aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
>> +  insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
>> +			      : gen_add2_insn (this_rtx, scratch_rtx));
>>    if (frame_related_p)
>>      {
>>        RTX_FRAME_RELATED_P (insn) = frame_related_p;
>> @@ -3288,7 +3296,8 @@ aarch64_expand_epilogue (bool for_sibcall)
>>        RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
>>      }
>>    else
>> -    aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true);
>> +    aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true,
>> +			  df_regs_ever_live_p (IP1_REGNUM));
>>  
>>    aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
>>  				callee_adjust != 0, &cfi_ops);
>> @@ -3311,7 +3320,8 @@ aarch64_expand_epilogue (bool for_sibcall)
>>        cfi_ops = NULL;
>>      }
>>  
>> -  aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true);
>> +  aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true,
>> +			df_regs_ever_live_p (IP0_REGNUM));
>>  
>>    if (cfi_ops)
>>      {
>> diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_17.c b/gcc/testsuite/gcc.target/aarch64/test_frame_17.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..c214431999b60cce8a75204876a8c73ec6304128
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_17.c
>> @@ -0,0 +1,21 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 --save-temps" } */
>> +
>> +/* Test reuse of stack adjustment temporaries.  */
>> +
>> +void foo ();
>> +
>> +int reuse_mov (int i)
>> +{
>> +  int arr[1025];
>> +  return arr[i];
>> +}
>> +
>> +int no_reuse_mov (int i)
>> +{
>> +  int arr[1025];
>> +  foo ();
>> +  return arr[i];
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "mov\tx16, \[0-9\]+" 3 } } */
>>
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]