[committed] aarch64: Tweak movti and movtf patterns

Thu Oct 1 08:27:41 GMT 2020

On Wed, 30 Sep 2020 at 12:53, Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> movti lacked an way of zeroing an FPR, meaning that we'd do:
>
>         mov     x0, 0
>         mov     x1, 0
>         fmov    d0, x0
>         fmov    v0.d[1], x1
>
> instead of just:
>
>         movi    v0.2d, #0
>
> movtf had the opposite problem for GPRs: we'd generate:
>
>         movi    v0.2d, #0
>         fmov    x0, d0
>         fmov    x1, v0.d[1]
>
> instead of just:
>
>         mov     x0, 0
>         mov     x1, 0
>
> Also, there was an unnecessary earlyclobber on the GPR<-GPR movtf
> alternative (but not the movti one).  The splitter handles overlap
> correctly.
>
> The TF splitter used aarch64_reg_or_imm, but the _imm part only
> accepts integer constants, not floating-point ones.  The patch
> changes it to nonmemory_operand instead.
>
> Tested on aarch64-linux-gnu, pushed.
>
> Richard
>
>
> gcc/
>         * config/aarch64/aarch64.c (aarch64_split_128bit_move_p): Add a
>         function comment.  Tighten check for FP moves.
>         * config/aarch64/aarch64.md (*movti_aarch64): Add a w<-Z alternative.
>         (*movtf_aarch64): Handle r<-Y like r<-r.  Remove unnecessary
>         earlyclobber.  Change splitter predicate from aarch64_reg_or_imm
>         to nonmemory_operand.
>
> gcc/testsuite/
>         * gcc.target/aarch64/movtf_1.c: New test.
>         * gcc.target/aarch64/movti_1.c: Likewise.

Sorry to bother you, the new tests fail with -mabi=ilp32 :-(
    gcc.target/aarch64/movtf_1.c check-function-bodies load_q
    gcc.target/aarch64/movtf_1.c check-function-bodies load_x
    gcc.target/aarch64/movtf_1.c check-function-bodies store_q
    gcc.target/aarch64/movtf_1.c check-function-bodies store_x
    gcc.target/aarch64/movti_1.c check-function-bodies load_q
    gcc.target/aarch64/movti_1.c check-function-bodies load_x
    gcc.target/aarch64/movti_1.c check-function-bodies store_q
    gcc.target/aarch64/movti_1.c check-function-bodies store_x

I don't think that's high priority though.

Christophe

> ---
>  gcc/config/aarch64/aarch64.c               |  9 ++-
>  gcc/config/aarch64/aarch64.md              | 17 +++--
>  gcc/testsuite/gcc.target/aarch64/movtf_1.c | 87 ++++++++++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/movti_1.c | 87 ++++++++++++++++++++++
>  4 files changed, 190 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_1.c
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 491fc582dab..9e88438b3c3 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3422,11 +3422,16 @@ aarch64_split_128bit_move (rtx dst, rtx src)
>      }
>  }
>
> +/* Return true if we should split a move from 128-bit value SRC
> +   to 128-bit register DEST.  */
> +
>  bool
>  aarch64_split_128bit_move_p (rtx dst, rtx src)
>  {
> -  return (! REG_P (src)
> -         || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
> +  if (FP_REGNUM_P (REGNO (dst)))
> +    return REG_P (src) && !FP_REGNUM_P (REGNO (src));
> +  /* All moves to GPRs need to be split.  */
> +  return true;
>  }
>
>  /* Split a complex SIMD combine.  */
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 19ec9e33f9f..78fe7c43a00 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1361,13 +1361,14 @@ (define_expand "movti"
>
>  (define_insn "*movti_aarch64"
>    [(set (match_operand:TI 0
> -        "nonimmediate_operand"  "=   r,w, r,w,r,m,m,w,m")
> +        "nonimmediate_operand"  "=   r,w,w, r,w,r,m,m,w,m")
>         (match_operand:TI 1
> -        "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))]
> +        "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
>    "(register_operand (operands[0], TImode)
>      || aarch64_reg_or_zero (operands[1], TImode))"
>    "@
>     #
> +   movi\\t%0.2d, #0
>     #
>     #
>     mov\\t%0.16b, %1.16b
> @@ -1376,11 +1377,11 @@ (define_insn "*movti_aarch64"
>     stp\\txzr, xzr, %0
>     ldr\\t%q0, %1
>     str\\t%q1, %0"
> -  [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> +  [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
>                              load_16,store_16,store_16,\
>                               load_16,store_16")
> -   (set_attr "length" "8,8,8,4,4,4,4,4,4")
> -   (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")]
> +   (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
> +   (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
>  )
>
>  ;; Split a TImode register-register or register-immediate move into
> @@ -1511,9 +1512,9 @@ (define_split
>
>  (define_insn "*movtf_aarch64"
>    [(set (match_operand:TF 0
> -        "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
> +        "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
>         (match_operand:TF 1
> -        "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
> +        "general_operand"      " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
>    "TARGET_FLOAT && (register_operand (operands[0], TFmode)
>      || aarch64_reg_or_fp_zero (operands[1], TFmode))"
>    "@
> @@ -1536,7 +1537,7 @@ (define_insn "*movtf_aarch64"
>
>  (define_split
>     [(set (match_operand:TF 0 "register_operand" "")
> -        (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
> +        (match_operand:TF 1 "nonmemory_operand" ""))]
>    "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
>    [(const_int 0)]
>    {
> diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_1.c b/gcc/testsuite/gcc.target/aarch64/movtf_1.c
> new file mode 100644
> index 00000000000..570de931389
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/movtf_1.c
> @@ -0,0 +1,87 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/*
> +** zero_q:
> +**     movi    v0.2d, #0
> +**     ret
> +*/
> +void
> +zero_q ()
> +{
> +  register _Float128 q0 asm ("q0");
> +  q0 = 0;
> +  asm volatile ("" :: "w" (q0));
> +}
> +
> +/*
> +** load_q:
> +**     ldr     q0, \[x0\]
> +**     ret
> +*/
> +void
> +load_q (_Float128 *ptr)
> +{
> +  register _Float128 q0 asm ("q0");
> +  q0 = *ptr;
> +  asm volatile ("" :: "w" (q0));
> +}
> +
> +/*
> +** store_q:
> +**     str     q0, \[x0\]
> +**     ret
> +*/
> +void
> +store_q (_Float128 *ptr)
> +{
> +  register _Float128 q0 asm ("q0");
> +  asm volatile ("" : "=w" (q0));
> +  *ptr = q0;
> +}
> +
> +/*
> +** zero_x:
> +** (
> +**     mov     x0, #?0
> +**     mov     x1, #?0
> +** |
> +**     mov     x1, #?0
> +**     mov     x0, #?0
> +** )
> +**     ret
> +*/
> +void
> +zero_x ()
> +{
> +  register _Float128 x0 asm ("x0");
> +  x0 = 0;
> +  asm volatile ("" :: "r" (x0));
> +}
> +
> +/*
> +** load_x:
> +**     ldp     x2, x3, \[x0\]
> +**     ret
> +*/
> +void
> +load_x (_Float128 *ptr)
> +{
> +  register _Float128 x2 asm ("x2");
> +  x2 = *ptr;
> +  asm volatile ("" :: "r" (x2));
> +}
> +
> +/*
> +** store_x:
> +**     stp     x2, x3, \[x0\]
> +**     ret
> +*/
> +void
> +store_x (_Float128 *ptr)
> +{
> +  register _Float128 x2 asm ("x2");
> +  asm volatile ("" : "=r" (x2));
> +  *ptr = x2;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/movti_1.c b/gcc/testsuite/gcc.target/aarch64/movti_1.c
> new file mode 100644
> index 00000000000..160e1acd281
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/movti_1.c
> @@ -0,0 +1,87 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/*
> +** zero_q:
> +**     movi    v0.2d, #0
> +**     ret
> +*/
> +void
> +zero_q ()
> +{
> +  register __int128_t q0 asm ("q0");
> +  q0 = 0;
> +  asm volatile ("" :: "w" (q0));
> +}
> +
> +/*
> +** load_q:
> +**     ldr     q0, \[x0\]
> +**     ret
> +*/
> +void
> +load_q (__int128_t *ptr)
> +{
> +  register __int128_t q0 asm ("q0");
> +  q0 = *ptr;
> +  asm volatile ("" :: "w" (q0));
> +}
> +
> +/*
> +** store_q:
> +**     str     q0, \[x0\]
> +**     ret
> +*/
> +void
> +store_q (__int128_t *ptr)
> +{
> +  register __int128_t q0 asm ("q0");
> +  asm volatile ("" : "=w" (q0));
> +  *ptr = q0;
> +}
> +
> +/*
> +** zero_x:
> +** (
> +**     mov     x0, #?0
> +**     mov     x1, #?0
> +** |
> +**     mov     x1, #?0
> +**     mov     x0, #?0
> +** )
> +**     ret
> +*/
> +void
> +zero_x ()
> +{
> +  register __int128_t x0 asm ("x0");
> +  x0 = 0;
> +  asm volatile ("" :: "r" (x0));
> +}
> +
> +/*
> +** load_x:
> +**     ldp     x2, x3, \[x0\]
> +**     ret
> +*/
> +void
> +load_x (__int128_t *ptr)
> +{
> +  register __int128_t x2 asm ("x2");
> +  x2 = *ptr;
> +  asm volatile ("" :: "r" (x2));
> +}
> +
> +/*
> +** store_x:
> +**     stp     x2, x3, \[x0\]
> +**     ret
> +*/
> +void
> +store_x (__int128_t *ptr)
> +{
> +  register __int128_t x2 asm ("x2");
> +  asm volatile ("" : "=r" (x2));
> +  *ptr = x2;
> +}