[committed] aarch64: Tweak movti and movtf patterns
Christophe Lyon
christophe.lyon@linaro.org
Thu Oct 1 08:27:41 GMT 2020
On Wed, 30 Sep 2020 at 12:53, Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> movti lacked an way of zeroing an FPR, meaning that we'd do:
>
> mov x0, 0
> mov x1, 0
> fmov d0, x0
> fmov v0.d[1], x1
>
> instead of just:
>
> movi v0.2d, #0
>
> movtf had the opposite problem for GPRs: we'd generate:
>
> movi v0.2d, #0
> fmov x0, d0
> fmov x1, v0.d[1]
>
> instead of just:
>
> mov x0, 0
> mov x1, 0
>
> Also, there was an unnecessary earlyclobber on the GPR<-GPR movtf
> alternative (but not the movti one). The splitter handles overlap
> correctly.
>
> The TF splitter used aarch64_reg_or_imm, but the _imm part only
> accepts integer constants, not floating-point ones. The patch
> changes it to nonmemory_operand instead.
>
> Tested on aarch64-linux-gnu, pushed.
>
> Richard
>
>
> gcc/
> * config/aarch64/aarch64.c (aarch64_split_128bit_move_p): Add a
> function comment. Tighten check for FP moves.
> * config/aarch64/aarch64.md (*movti_aarch64): Add a w<-Z alternative.
> (*movtf_aarch64): Handle r<-Y like r<-r. Remove unnecessary
> earlyclobber. Change splitter predicate from aarch64_reg_or_imm
> to nonmemory_operand.
>
> gcc/testsuite/
> * gcc.target/aarch64/movtf_1.c: New test.
> * gcc.target/aarch64/movti_1.c: Likewise.
Sorry to bother you, the new tests fail with -mabi=ilp32 :-(
gcc.target/aarch64/movtf_1.c check-function-bodies load_q
gcc.target/aarch64/movtf_1.c check-function-bodies load_x
gcc.target/aarch64/movtf_1.c check-function-bodies store_q
gcc.target/aarch64/movtf_1.c check-function-bodies store_x
gcc.target/aarch64/movti_1.c check-function-bodies load_q
gcc.target/aarch64/movti_1.c check-function-bodies load_x
gcc.target/aarch64/movti_1.c check-function-bodies store_q
gcc.target/aarch64/movti_1.c check-function-bodies store_x
I don't think that's high priority though.
Christophe
> ---
> gcc/config/aarch64/aarch64.c | 9 ++-
> gcc/config/aarch64/aarch64.md | 17 +++--
> gcc/testsuite/gcc.target/aarch64/movtf_1.c | 87 ++++++++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/movti_1.c | 87 ++++++++++++++++++++++
> 4 files changed, 190 insertions(+), 10 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_1.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_1.c
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 491fc582dab..9e88438b3c3 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3422,11 +3422,16 @@ aarch64_split_128bit_move (rtx dst, rtx src)
> }
> }
>
> +/* Return true if we should split a move from 128-bit value SRC
> + to 128-bit register DEST. */
> +
> bool
> aarch64_split_128bit_move_p (rtx dst, rtx src)
> {
> - return (! REG_P (src)
> - || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
> + if (FP_REGNUM_P (REGNO (dst)))
> + return REG_P (src) && !FP_REGNUM_P (REGNO (src));
> + /* All moves to GPRs need to be split. */
> + return true;
> }
>
> /* Split a complex SIMD combine. */
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 19ec9e33f9f..78fe7c43a00 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1361,13 +1361,14 @@ (define_expand "movti"
>
> (define_insn "*movti_aarch64"
> [(set (match_operand:TI 0
> - "nonimmediate_operand" "= r,w, r,w,r,m,m,w,m")
> + "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m")
> (match_operand:TI 1
> - "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))]
> + "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
> "(register_operand (operands[0], TImode)
> || aarch64_reg_or_zero (operands[1], TImode))"
> "@
> #
> + movi\\t%0.2d, #0
> #
> #
> mov\\t%0.16b, %1.16b
> @@ -1376,11 +1377,11 @@ (define_insn "*movti_aarch64"
> stp\\txzr, xzr, %0
> ldr\\t%q0, %1
> str\\t%q1, %0"
> - [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> + [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
> load_16,store_16,store_16,\
> load_16,store_16")
> - (set_attr "length" "8,8,8,4,4,4,4,4,4")
> - (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")]
> + (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
> + (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
> )
>
> ;; Split a TImode register-register or register-immediate move into
> @@ -1511,9 +1512,9 @@ (define_split
>
> (define_insn "*movtf_aarch64"
> [(set (match_operand:TF 0
> - "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
> + "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
> (match_operand:TF 1
> - "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
> + "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
> "TARGET_FLOAT && (register_operand (operands[0], TFmode)
> || aarch64_reg_or_fp_zero (operands[1], TFmode))"
> "@
> @@ -1536,7 +1537,7 @@ (define_insn "*movtf_aarch64"
>
> (define_split
> [(set (match_operand:TF 0 "register_operand" "")
> - (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
> + (match_operand:TF 1 "nonmemory_operand" ""))]
> "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
> [(const_int 0)]
> {
> diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_1.c b/gcc/testsuite/gcc.target/aarch64/movtf_1.c
> new file mode 100644
> index 00000000000..570de931389
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/movtf_1.c
> @@ -0,0 +1,87 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/*
> +** zero_q:
> +** movi v0.2d, #0
> +** ret
> +*/
> +void
> +zero_q ()
> +{
> + register _Float128 q0 asm ("q0");
> + q0 = 0;
> + asm volatile ("" :: "w" (q0));
> +}
> +
> +/*
> +** load_q:
> +** ldr q0, \[x0\]
> +** ret
> +*/
> +void
> +load_q (_Float128 *ptr)
> +{
> + register _Float128 q0 asm ("q0");
> + q0 = *ptr;
> + asm volatile ("" :: "w" (q0));
> +}
> +
> +/*
> +** store_q:
> +** str q0, \[x0\]
> +** ret
> +*/
> +void
> +store_q (_Float128 *ptr)
> +{
> + register _Float128 q0 asm ("q0");
> + asm volatile ("" : "=w" (q0));
> + *ptr = q0;
> +}
> +
> +/*
> +** zero_x:
> +** (
> +** mov x0, #?0
> +** mov x1, #?0
> +** |
> +** mov x1, #?0
> +** mov x0, #?0
> +** )
> +** ret
> +*/
> +void
> +zero_x ()
> +{
> + register _Float128 x0 asm ("x0");
> + x0 = 0;
> + asm volatile ("" :: "r" (x0));
> +}
> +
> +/*
> +** load_x:
> +** ldp x2, x3, \[x0\]
> +** ret
> +*/
> +void
> +load_x (_Float128 *ptr)
> +{
> + register _Float128 x2 asm ("x2");
> + x2 = *ptr;
> + asm volatile ("" :: "r" (x2));
> +}
> +
> +/*
> +** store_x:
> +** stp x2, x3, \[x0\]
> +** ret
> +*/
> +void
> +store_x (_Float128 *ptr)
> +{
> + register _Float128 x2 asm ("x2");
> + asm volatile ("" : "=r" (x2));
> + *ptr = x2;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/movti_1.c b/gcc/testsuite/gcc.target/aarch64/movti_1.c
> new file mode 100644
> index 00000000000..160e1acd281
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/movti_1.c
> @@ -0,0 +1,87 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/*
> +** zero_q:
> +** movi v0.2d, #0
> +** ret
> +*/
> +void
> +zero_q ()
> +{
> + register __int128_t q0 asm ("q0");
> + q0 = 0;
> + asm volatile ("" :: "w" (q0));
> +}
> +
> +/*
> +** load_q:
> +** ldr q0, \[x0\]
> +** ret
> +*/
> +void
> +load_q (__int128_t *ptr)
> +{
> + register __int128_t q0 asm ("q0");
> + q0 = *ptr;
> + asm volatile ("" :: "w" (q0));
> +}
> +
> +/*
> +** store_q:
> +** str q0, \[x0\]
> +** ret
> +*/
> +void
> +store_q (__int128_t *ptr)
> +{
> + register __int128_t q0 asm ("q0");
> + asm volatile ("" : "=w" (q0));
> + *ptr = q0;
> +}
> +
> +/*
> +** zero_x:
> +** (
> +** mov x0, #?0
> +** mov x1, #?0
> +** |
> +** mov x1, #?0
> +** mov x0, #?0
> +** )
> +** ret
> +*/
> +void
> +zero_x ()
> +{
> + register __int128_t x0 asm ("x0");
> + x0 = 0;
> + asm volatile ("" :: "r" (x0));
> +}
> +
> +/*
> +** load_x:
> +** ldp x2, x3, \[x0\]
> +** ret
> +*/
> +void
> +load_x (__int128_t *ptr)
> +{
> + register __int128_t x2 asm ("x2");
> + x2 = *ptr;
> + asm volatile ("" :: "r" (x2));
> +}
> +
> +/*
> +** store_x:
> +** stp x2, x3, \[x0\]
> +** ret
> +*/
> +void
> +store_x (__int128_t *ptr)
> +{
> + register __int128_t x2 asm ("x2");
> + asm volatile ("" : "=r" (x2));
> + *ptr = x2;
> +}
More information about the Gcc-patches
mailing list