This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [AArch64][2/3] Optimize aarch64_add_constant to generate better addition sequences


On 20/07/16 14:02, Jiong Wang wrote:
> This patch optimize immediate addition sequences generated by
> aarch64_add_constant.
> 
> The current addition sequences generated are:
> 
>   * If immediate fit into unsigned 12bit range, generate single add/sub.
>       * Otherwise if it fit into unsigned 24bit range, generate two
> add/sub.
> 
>   * Otherwise invoke general constant build function.
> 
> 
> This haven't considered the situation where immedate can't fit into
> unsigned 12bit range, but can fit into single mov instruction for which
> case we generate one move and one addition.  The move won't touch the
> destination register thus the sequences is better than two additions
> which both touch the destination register.
> 
> 
> This patch thus optimize the addition sequences into:
> 
>   * If immediate fit into unsigned 12bit range, generate single add/sub.
>  
>   * Otherwise if it fit into unsigned 24bit range, generate two add/sub.
>     And don't do this if it fit into single move instruction, in which case
>     move the immedaite to scratch register firstly, then generate one
>     addition to add the scratch register to the destination register.
>       * Otherwise invoke general constant build function.
> 
> 
> OK for trunk?
> 
> gcc/
> 2016-07-20  Jiong Wang  <jiong.wang@arm.com>
> 
>             * config/aarch64/aarch64.c (aarch64_add_constant): Optimize
>             instruction sequences.
> 
> 

OK with the updates to the comments as mentioned below.

> build-const-2.patch
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index aeea3b3ebc514663043ac8d7cd13361f06f78502..41844a101247c939ecb31f8a8c17cf79759255aa 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1865,6 +1865,47 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>    aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
>  }
>  
> +/* Add DELTA onto REGNUM in MODE, using SCRATCHREG to held intermediate value if
> +   it is necessary.  */

Add DELTA to REGNUM in mode MODE.  SCRATCHREG can be used to hold an
intermediate value if necessary.


> +
> +static void
> +aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
> +		      HOST_WIDE_INT delta)
> +{
> +  HOST_WIDE_INT mdelta = abs_hwi (delta);
> +  rtx this_rtx = gen_rtx_REG (mode, regnum);
> +
> +  /* Do nothing if mdelta is zero.  */
> +  if (!mdelta)
> +    return;
> +
> +  /* We only need single instruction if the offset fit into add/sub.  */
> +  if (aarch64_uimm12_shift (mdelta))
> +    {
> +      emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
> +      return;
> +    }
> +
> +  /* We need two add/sub instructions, each one perform part of the
> +     addition/subtraction, but don't this if the addend can be loaded into
> +     register by single instruction, in that case we prefer a move to scratch
> +     register following by addition.  */

We need two add/sub instructions, each one performing part of the
calculation.  Don't do this if the addend can be loaded into
register with a single instruction, in that case we prefer a move to a
scratch register following by an addition.



> +  if (mdelta < 0x1000000 && !aarch64_move_imm (delta, mode))
> +    {
> +      HOST_WIDE_INT low_off = mdelta & 0xfff;
> +
> +      low_off = delta < 0 ? -low_off : low_off;
> +      emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
> +      emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
> +      return;
> +    }
> +
> +  /* Otherwise use generic function to handle all other situations.  */
> +  rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
> +  aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
> +  emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
> +}
> +
>  static bool
>  aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
>  				 tree exp ATTRIBUTE_UNUSED)
> @@ -3337,44 +3378,6 @@ aarch64_final_eh_return_addr (void)
>  				       - 2 * UNITS_PER_WORD));
>  }
>  
> -static void
> -aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
> -		      HOST_WIDE_INT delta)
> -{
> -  HOST_WIDE_INT mdelta = delta;
> -  rtx this_rtx = gen_rtx_REG (mode, regnum);
> -  rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
> -
> -  if (mdelta < 0)
> -    mdelta = -mdelta;
> -
> -  if (mdelta >= 4096 * 4096)
> -    {
> -      aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
> -      emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
> -    }
> -  else if (mdelta > 0)
> -    {
> -      if (mdelta >= 4096)
> -	{
> -	  emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
> -	  rtx shift = gen_rtx_ASHIFT (mode, scratch_rtx, GEN_INT (12));
> -	  if (delta < 0)
> -	    emit_insn (gen_rtx_SET (this_rtx,
> -				    gen_rtx_MINUS (mode, this_rtx, shift)));
> -	  else
> -	    emit_insn (gen_rtx_SET (this_rtx,
> -				    gen_rtx_PLUS (mode, this_rtx, shift)));
> -	}
> -      if (mdelta % 4096 != 0)
> -	{
> -	  scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
> -	  emit_insn (gen_rtx_SET (this_rtx,
> -				  gen_rtx_PLUS (mode, this_rtx, scratch_rtx)));
> -	}
> -    }
> -}
> -
>  /* Output code to add DELTA to the first argument, and then jump
>     to FUNCTION.  Used for C++ multiple inheritance.  */
>  static void
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]