This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][AArch64][2/5] Improve immediate generation


On Wed, Sep 02, 2015 at 01:35:19PM +0100, Wilco Dijkstra wrote:
> aarch64_internal_mov_immediate uses loops iterating over all legal bitmask
> immediates to find 2-instruction immediate combinations. One loop is
> quadratic and despite being extremely expensive very rarely finds a matching
> immediate (43 matches in all of SPEC2006 but none are emitted in final code),
> so it can be removed without any effect on code quality. The other loop can
> be replaced by a constant-time search: rather than iterating over all legal
> bitmask values, reconstruct a potential bitmask and query the fast
> aarch64_bitmask_imm.
> 
> No change in generated code, passes GCC regression tests/bootstrap.

Well, presumably those 43 cases in SPEC2006 changed...

The code looks OK, and I haven't seen any objections from Marcus or
Richard on the overall direction of the patch set.

OK for trunk.

Thanks,
James

> 
> ChangeLog:
> 2015-09-02  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate):
> 	Replace slow immediate matching loops with a faster algorithm.
> 
> ---
>  gcc/config/aarch64/aarch64.c | 96 +++++++++++---------------------------------
>  1 file changed, 23 insertions(+), 73 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index c0280e6..d6f7cb0 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1376,7 +1376,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
>    unsigned HOST_WIDE_INT mask;
>    int i;
>    bool first;
> -  unsigned HOST_WIDE_INT val;
> +  unsigned HOST_WIDE_INT val, val2;
>    bool subtargets;
>    rtx subtarget;
>    int one_match, zero_match, first_not_ffff_match;
> @@ -1503,85 +1503,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
>  	}
>      }
>  
> -  /* See if we can do it by arithmetically combining two
> -     immediates.  */
> -  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
> +  if (zero_match != 2 && one_match != 2)
>      {
> -      int j;
> -      mask = 0xffff;
> +      /* Try emitting a bitmask immediate with a movk replacing 16 bits.
> +	 For a 64-bit bitmask try whether changing 16 bits to all ones or
> +	 zeroes creates a valid bitmask.  To check any repeated bitmask,
> +	 try using 16 bits from the other 32-bit half of val.  */
>  
> -      if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
> -	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
> +      for (i = 0; i < 64; i += 16, mask <<= 16)
>  	{
> -	  if (generate)
> -	    {
> -	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
> -	      emit_insn (gen_rtx_SET (subtarget,
> -				      GEN_INT (aarch64_bitmasks[i])));
> -	      emit_insn (gen_adddi3 (dest, subtarget,
> -				     GEN_INT (val - aarch64_bitmasks[i])));
> -	    }
> -	  num_insns += 2;
> -	  return num_insns;
> +	  val2 = val & ~mask;
> +	  if (val2 != val && aarch64_bitmask_imm (val2, mode))
> +	    break;
> +	  val2 = val | mask;
> +	  if (val2 != val && aarch64_bitmask_imm (val2, mode))
> +	    break;
> +	  val2 = val2 & ~mask;
> +	  val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
> +	  if (val2 != val && aarch64_bitmask_imm (val2, mode))
> +	    break;
>  	}
> -
> -      for (j = 0; j < 64; j += 16, mask <<= 16)
> +      if (i != 64)
>  	{
> -	  if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
> +	  if (generate)
>  	    {
> -	      if (generate)
> -		{
> -		  emit_insn (gen_rtx_SET (dest,
> -					  GEN_INT (aarch64_bitmasks[i])));
> -		  emit_insn (gen_insv_immdi (dest, GEN_INT (j),
> -					     GEN_INT ((val >> j) & 0xffff)));
> -		}
> -	      num_insns += 2;
> -	      return num_insns;
> +	      emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> +	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> +			 GEN_INT ((val >> i) & 0xffff)));
>  	    }
> -	}
> -    }
> -
> -  /* See if we can do it by logically combining two immediates.  */
> -  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
> -    {
> -      if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
> -	{
> -	  int j;
> -
> -	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
> -	    if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
> -	      {
> -		if (generate)
> -		  {
> -		    subtarget = subtargets ? gen_reg_rtx (mode) : dest;
> -		    emit_insn (gen_rtx_SET (subtarget,
> -					    GEN_INT (aarch64_bitmasks[i])));
> -		    emit_insn (gen_iordi3 (dest, subtarget,
> -					   GEN_INT (aarch64_bitmasks[j])));
> -		  }
> -		num_insns += 2;
> -		return num_insns;
> -	      }
> -	}
> -      else if ((val & aarch64_bitmasks[i]) == val)
> -	{
> -	  int j;
> -
> -	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
> -	    if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
> -	      {
> -		if (generate)
> -		  {
> -		    subtarget = subtargets ? gen_reg_rtx (mode) : dest;
> -		    emit_insn (gen_rtx_SET (subtarget,
> -					    GEN_INT (aarch64_bitmasks[j])));
> -		    emit_insn (gen_anddi3 (dest, subtarget,
> -					   GEN_INT (aarch64_bitmasks[i])));
> -		  }
> -		num_insns += 2;
> -		return num_insns;
> -	      }
> +	  return 2;
>  	}
>      }
>  
> -- 
> 1.8.3
> 
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]