This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [AArch64, PATCH] Improve Neon store of zero


On 10/08/17 14:12, Jackson Woodruff wrote:
> Hi all,
> 
> This patch changes patterns in aarch64-simd.md to replace
> 
>     movi    v0.4s, 0
>     str    q0, [x0, 16]
> 
> With:
> 
>     stp xzr, xzr, [x0, 16]
> 
> When we are storing zeros to vectors like this:
> 
>     void f(uint32x4_t *p) {
>       uint32x4_t x = { 0, 0, 0, 0};
>       p[1] = x;
>     }
> 
> Bootstrapped and regtested on aarch64 with no regressions.
> OK for trunk?
> 
> Jackson
> 
> gcc/
> 
> 2017-08-09  Jackson Woodruff  <jackson.woodruff@arm.com>
> 
>     * aarch64-simd.md (mov<mode>): No longer force zero
>     immediate into register.
>     (*aarch64_simd_mov<mode>): Add new case for stp
>     using zero immediate.
> 
> 
> gcc/testsuite
> 
> 2017-08-09  Jackson Woodruff  <jackson.woodruff@arm.com>
> 
>     * gcc.target/aarch64/simd/neon_str_zero.c: New.
> 
> 
> patchfile
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 74de9b8c89dd5e4e3d87504594c969de0e0128ce..0149a742d34ae4fd5b3fd705b03c845f94aa1d59 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -23,7 +23,10 @@
>  	(match_operand:VALL_F16 1 "general_operand" ""))]
>    "TARGET_SIMD"
>    "
> -    if (GET_CODE (operands[0]) == MEM)
> +    if (GET_CODE (operands[0]) == MEM
> +	    && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
> +		 && aarch64_legitimate_address_p (<MODE>mode, operands[0],
> +						  PARALLEL, 1)))
>        operands[1] = force_reg (<MODE>mode, operands[1]);
>    "
>  )
> @@ -94,63 +97,70 @@
>  
>  (define_insn "*aarch64_simd_mov<mode>"
>    [(set (match_operand:VD 0 "nonimmediate_operand"
> -		"=w, m,  w, ?r, ?w, ?r, w")
> +		"=w, m,  m,  w, ?r, ?w, ?r, w")
>  	(match_operand:VD 1 "general_operand"
> -		"m,  w,  w,  w,  r,  r, Dn"))]
> +		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
>    "TARGET_SIMD
> -   && (register_operand (operands[0], <MODE>mode)
> -       || register_operand (operands[1], <MODE>mode))"
> +   && ((register_operand (operands[0], <MODE>mode)
> +       || register_operand (operands[1], <MODE>mode))
> +      || (memory_operand (operands[0], <MODE>mode)
> +	  && immediate_operand (operands[1], <MODE>mode)))"

Allowing any immediate here seems too lax - it allows any immediate
value which then could cause reload operations to be inserted (that in
turn might cause register pressure calculations to be incorrect).
Wouldn't it be better to use something like aarch64_simd_reg_or_zero?
Similarly below.

R.

>  {
>     switch (which_alternative)
>       {
>       case 0: return "ldr\\t%d0, %1";
> -     case 1: return "str\\t%d1, %0";
> -     case 2: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
> -     case 3: return "umov\t%0, %1.d[0]";
> -     case 4: return "fmov\t%d0, %1";
> -     case 5: return "mov\t%0, %1";
> -     case 6:
> +     case 1: return "str\\txzr, %0";
> +     case 2: return "str\\t%d1, %0";
> +     case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
> +     case 4: return "umov\t%0, %1.d[0]";
> +     case 5: return "fmov\t%d0, %1";
> +     case 6: return "mov\t%0, %1";
> +     case 7:
>  	return aarch64_output_simd_mov_immediate (operands[1],
>  						  <MODE>mode, 64);
>       default: gcc_unreachable ();
>       }
>  }
> -  [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
> +  [(set_attr "type" "neon_load1_1reg<q>, neon_stp, neon_store1_1reg<q>,\
>  		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
>  		     mov_reg, neon_move<q>")]
>  )
>  
>  (define_insn "*aarch64_simd_mov<mode>"
>    [(set (match_operand:VQ 0 "nonimmediate_operand"
> -		"=w, m,  w, ?r, ?w, ?r, w")
> +		"=w, Ump,  m,  w, ?r, ?w, ?r, w")
>  	(match_operand:VQ 1 "general_operand"
> -		"m,  w,  w,  w,  r,  r, Dn"))]
> +		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
>    "TARGET_SIMD
> -   && (register_operand (operands[0], <MODE>mode)
> -       || register_operand (operands[1], <MODE>mode))"
> +   && ((register_operand (operands[0], <MODE>mode)
> +	|| register_operand (operands[1], <MODE>mode))
> +       || (memory_operand (operands[0], <MODE>mode)
> +	   && immediate_operand (operands[1], <MODE>mode)))"
>  {
>    switch (which_alternative)
>      {
>      case 0:
>  	return "ldr\\t%q0, %1";
>      case 1:
> -	return "str\\t%q1, %0";
> +	return "stp\\txzr, xzr, %0";
>      case 2:
> -	return "mov\t%0.<Vbtype>, %1.<Vbtype>";
> +	return "str\\t%q1, %0";
>      case 3:
> +	return "mov\t%0.<Vbtype>, %1.<Vbtype>";
>      case 4:
>      case 5:
> -	return "#";
>      case 6:
> +	return "#";
> +    case 7:
>  	return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
>      default:
>  	gcc_unreachable ();
>      }
>  }
>    [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
> -                     neon_logic<q>, multiple, multiple, multiple,\
> -                     neon_move<q>")
> -   (set_attr "length" "4,4,4,8,8,8,4")]
> +		     neon_stp, neon_logic<q>, multiple, multiple,\
> +		     multiple, neon_move<q>")
> +   (set_attr "length" "4,4,4,4,8,8,8,4")]
>  )
>  
>  ;; When storing lane zero we can use the normal STR and its more permissive
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/neon_str_zero.c b/gcc/testsuite/gcc.target/aarch64/simd/neon_str_zero.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..07198de109432b530745cc540790303ae0245efb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/neon_str_zero.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1" } */
> +
> +#include <arm_neon.h>
> +
> +void
> +f (uint32x4_t *p)
> +{
> +  uint32x4_t x = { 0, 0, 0, 0};
> +  p[1] = x;
> +
> +  /* { dg-final { scan-assembler "stp\txzr, xzr," } } */
> +}
> +
> +void
> +g (float32x2_t *p)
> +{
> +  float32x2_t x = {0.0, 0.0};
> +  p[0] = x;
> +
> +  /* { dg-final { scan-assembler "str\txzr, " } } */
> +}
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]