]> gcc.gnu.org Git - gcc.git/commitdiff
aarch64: Add =r,m and =m,r alternatives to 64-bit vector move patterns
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 1 Jun 2023 08:37:06 +0000 (09:37 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 1 Jun 2023 08:37:06 +0000 (09:37 +0100)
We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives
to the mov patterns. This straightforward patch does that and for the pair variants too.
For the testcase in the code we now generate the optimal assembly without any superfluous
GP<->SIMD moves.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
Add =r,m and =r,m alternatives.
(load_pair<DREG:mode><DREG2:mode>): Likewise.
(vec_store_pair<DREG:mode><DREG2:mode>): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/xreg-vec-modes_1.c: New test.

gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c [new file with mode: 0644]

index 4904a50658bdf148938a6a1ccb50f690fbd89194..9307a573cefdd16b0b54af10c72b948877ab74d7 100644 (file)
 
 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
   [(set (match_operand:VDMOV 0 "nonimmediate_operand"
-               "=w, m,  m,  w, ?r, ?w, ?r,  w,  w")
+               "=w, r, m,  m, m,  w, ?r, ?w, ?r,  w,  w")
        (match_operand:VDMOV 1 "general_operand"
-               "m,  Dz, w,  w,  w,  r,  r, Dn, Dz"))]
+               "m,  m, Dz, w, r,  w,  w,  r,  r, Dn, Dz"))]
   "TARGET_FLOAT
    && (register_operand (operands[0], <MODE>mode)
        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
   "@
    ldr\t%d0, %1
+   ldr\t%x0, %1
    str\txzr, %0
    str\t%d1, %0
+   str\t%x1, %0
    * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
    * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
    fmov\t%d0, %1
    mov\t%0, %1
    * return aarch64_output_simd_mov_immediate (operands[1], 64);
    fmov\t%d0, xzr"
-  [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
-                    neon_logic<q>, neon_to_gp<q>, f_mcr,\
+  [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
+                    store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
                     mov_reg, neon_move<q>, f_mcr")
-   (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
+   (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
 )
 
 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
 )
 
 (define_insn "load_pair<DREG:mode><DREG2:mode>"
-  [(set (match_operand:DREG 0 "register_operand" "=w")
-       (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
-   (set (match_operand:DREG2 2 "register_operand" "=w")
-       (match_operand:DREG2 3 "memory_operand" "m"))]
+  [(set (match_operand:DREG 0 "register_operand" "=w,r")
+       (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+   (set (match_operand:DREG2 2 "register_operand" "=w,r")
+       (match_operand:DREG2 3 "memory_operand" "m,m"))]
   "TARGET_FLOAT
    && rtx_equal_p (XEXP (operands[3], 0),
                   plus_constant (Pmode,
                                  XEXP (operands[1], 0),
                                  GET_MODE_SIZE (<DREG:MODE>mode)))"
-  "ldp\\t%d0, %d2, %z1"
-  [(set_attr "type" "neon_ldp")]
+  "@
+   ldp\t%d0, %d2, %z1
+   ldp\t%x0, %x2, %z1"
+  [(set_attr "type" "neon_ldp,load_16")]
 )
 
 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
-  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
-       (match_operand:DREG 1 "register_operand" "w"))
-   (set (match_operand:DREG2 2 "memory_operand" "=m")
-       (match_operand:DREG2 3 "register_operand" "w"))]
+  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+       (match_operand:DREG 1 "register_operand" "w,r"))
+   (set (match_operand:DREG2 2 "memory_operand" "=m,m")
+       (match_operand:DREG2 3 "register_operand" "w,r"))]
   "TARGET_FLOAT
    && rtx_equal_p (XEXP (operands[2], 0),
                   plus_constant (Pmode,
                                  XEXP (operands[0], 0),
                                  GET_MODE_SIZE (<DREG:MODE>mode)))"
-  "stp\\t%d1, %d3, %z0"
-  [(set_attr "type" "neon_stp")]
+  "@
+   stp\t%d1, %d3, %z0
+   stp\t%x1, %x3, %z0"
+  [(set_attr "type" "neon_stp,store_16")]
 )
 
 (define_insn "aarch64_simd_stp<mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
new file mode 100644 (file)
index 0000000..fc4dcb1
--- /dev/null
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef unsigned int v2si  __attribute__((vector_size (8)));
+
+#define force_gp(V1)   asm volatile (""                                \
+           : "=r"(V1)                                           \
+           : "r"(V1)                                            \
+           : /* No clobbers */);
+
+/*
+** foo:
+**     ldr     (x[0-9]+), \[x1\]
+**     str     \1, \[x0\]
+**     ret
+*/
+
+void
+foo (v2si *a, v2si *b)
+{
+  v2si tmp = *b;
+  force_gp (tmp);
+  *a = tmp;
+}
+
+/*
+** foo2:
+**     ldp     (x[0-9]+), (x[0-9]+), \[x0\]
+**     stp     \1, \2, \[x1\]
+**     ret
+*/
+void
+foo2 (v2si *a, v2si *b)
+{
+  v2si t1 = *a;
+  v2si t2 = a[1];
+  force_gp (t1);
+  force_gp (t2);
+  *b = t1;
+  b[1] = t2;
+}
This page took 0.078456 seconds and 5 git commands to generate.