This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch AArch64] Improve SIMD concatenation with zeroes
- From: James Greenhalgh <james dot greenhalgh at arm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: marcus dot shawcroft at arm dot com, richard dot earnshaw at arm dot com
- Date: Fri, 2 Oct 2015 09:12:36 +0100
- Subject: [Patch AArch64] Improve SIMD concatenation with zeroes
- Authentication-results: sourceware.org; auth=none
Hi,
In AArch64, SIMD instructions which only touch the bottom 64-bits of a
vector register write zeroes to the upper 64-bits. In other words, we have
a cheap way to implement a "zero extend" of a SIMD operation, and can
generate efficient code for:
[(set (match_operand 0)
(vec_concat:128-bit mode
(other vector operations in a 64-bit mode)
(match_operand 2 [zeroes])))]
And for the big-endian equivalent of this.
This small patch catches two important cases of this, namely loading a
64-bit vector and moving a 64-bit vector from general purpose registers to
vector registers.
Bootstrapped on aarch64-none-linux-gnu with no issues, and aarch64.exp run
for aarch64_be-none-elf.
Ok for trunk?
Thanks,
James
---
gcc/
2015-10-01 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd.md (*aarch64_combinez<mode>): Add
alternatives for reads from memory and moves from general-purpose
registers.
(*aarch64_combinez_be<mode>): Likewise.
2015-10-01 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vect_combine_zeroes_1.c: New.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 541faf9..6a2ab61 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2530,23 +2530,33 @@
;; dest vector.
(define_insn "*aarch64_combinez<mode>"
- [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+ [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
- (match_operand:VD_BHSI 1 "register_operand" "w")
- (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")))]
+ (match_operand:VD_BHSI 1 "general_operand" "w,r,m")
+ (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
- "mov\\t%0.8b, %1.8b"
- [(set_attr "type" "neon_move<q>")]
+ "@
+ mov\\t%0.8b, %1.8b
+ fmov\t%d0, %1
+ ldr\\t%d0, %1"
+ [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
+ (set_attr "simd" "yes,*,yes")
+ (set_attr "fp" "*,yes,*")]
)
(define_insn "*aarch64_combinez_be<mode>"
- [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+ [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
- (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")
- (match_operand:VD_BHSI 1 "register_operand" "w")))]
+ (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
+ (match_operand:VD_BHSI 1 "general_operand" "w,r,m")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
- "mov\\t%0.8b, %1.8b"
- [(set_attr "type" "neon_move<q>")]
+ "@
+ mov\\t%0.8b, %1.8b
+ fmov\t%d0, %1
+ ldr\\t%d0, %1"
+ [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
+ (set_attr "simd" "yes,*,yes")
+ (set_attr "fp" "*,yes,*")]
)
(define_expand "aarch64_combine<mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_combine_zeroes_1.c b/gcc/testsuite/gcc.target/aarch64/vect_combine_zeroes_1.c
new file mode 100644
index 0000000..6257fa9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect_combine_zeroes_1.c
@@ -0,0 +1,24 @@
+/* { dg-options "-O2 --save-temps" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+foo (int32x2_t *x)
+{
+ int32x2_t i = *x;
+ int32x2_t zeroes = vcreate_s32 (0l);
+ int32x4_t ret = vcombine_s32 (i, zeroes);
+ return ret;
+}
+
+int32x4_t
+bar (int64_t x)
+{
+ int32x2_t i = vcreate_s32 (x);
+ int32x2_t zeroes = vcreate_s32 (0l);
+ int32x4_t ret = vcombine_s32 (i, zeroes);
+ return ret;
+}
+
+/* { dg-final { scan-assembler-not "mov\tv\[0-9\]+.8b, v\[0-9\]+.8b" } } */
+