This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][GCC][Aarch64] Add vectorize patten for copysign.
- From: Tamar Christina <Tamar dot Christina at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, James Greenhalgh <James dot Greenhalgh at arm dot com>, Marcus Shawcroft <Marcus dot Shawcroft at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>
- Cc: nd <nd at arm dot com>
- Date: Tue, 17 Jan 2017 14:50:19 +0000
- Subject: [PATCH][GCC][Aarch64] Add vectorize patten for copysign.
- Authentication-results: sourceware.org; auth=none
- Authentication-results: spf=none (sender IP is ) smtp.mailfrom=Tamar dot Christina at arm dot com;
- Nodisclaimer: True
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:99
Hi All,
This patch vectorizes the copysign builtin for AArch64
similar to how it is done for Arm.
AArch64 now generates:
...
.L4:
ldr q1, [x6, x3]
add w4, w4, 1
ldr q0, [x5, x3]
cmp w4, w7
bif v1.16b, v2.16b, v3.16b
fmul v0.2d, v0.2d, v1.2d
str q0, [x5, x3]
for the input:
x * copysign(1.0, y)
On 481.wrf in Spec2006 on AArch64 this gives us a speedup of 9.1%.
Regtested on aarch64-none-linux-gnu and no regressions.
Ok for trunk?
gcc/
2017-01-17 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Added CASE_CFN_COPYSIGN.
* config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup):
Changed int to HOST_WIDE_INT.
* config/aarch64/aarch64-protos.h
(aarch64_simd_gen_const_vector_dup): Likewise.
* config/aarch64/aarch64-simd-builtins.def: Added copysign BINOP.
* config/aarch64/aarch64-simd.md: Added copysign<mode>3.
gcc/testsuite/
2017-01-17 Tamar Christina <tamar.christina@arm.com>
* gcc.target/arm/vect-copysignf.c: Move to...
* gcc.dg/vect/vect-copysignf.c: ... Here.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 69fb756f0fbdc016f35ce1d08f2aaf092a034704..faba7a1a38b6e494e9589637d51c639e3126969d 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1447,6 +1447,16 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
else
return NULL_TREE;
+ CASE_CFN_COPYSIGN:
+ if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_BINOP_copysignv2sf];
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_BINOP_copysignv4sf];
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, D))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_BINOP_copysignv2df];
+ else
+ return NULL_TREE;
+
default:
return NULL_TREE;
}
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..e75ba29f93e9e749791803ca3fa8d716ca261064 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -362,7 +362,7 @@ rtx aarch64_final_eh_return_addr (void);
rtx aarch64_mask_from_zextract_ops (rtx, rtx);
const char *aarch64_output_move_struct (rtx *operands);
rtx aarch64_return_addr (int, rtx);
-rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
+rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
bool aarch64_simd_mem_operand_p (rtx);
rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool);
rtx aarch64_tls_get_addr (void);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d713d5d8b88837ec6f2dc51188fb252f8d5bc8bd..a67b7589e8badfbd0f13168557ef87e052eedcb1 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -151,6 +151,9 @@
BUILTIN_VQN (TERNOP, raddhn2, 0)
BUILTIN_VQN (TERNOP, rsubhn2, 0)
+ /* Implemented by copysign<mode>3. */
+ BUILTIN_VHSDF (BINOP, copysign, 3)
+
BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
/* Implemented by aarch64_<sur>qmovn<mode>. */
BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a12e2268ef9b023112f8d05db0a86957fee83273..627ada98b3e4d4b02685d5b5ff71ae74d8e3356a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -338,6 +338,24 @@
}
)
+(define_expand "copysign<mode>3"
+ [(match_operand:VHSDF 0 "register_operand")
+ (match_operand:VHSDF 1 "register_operand")
+ (match_operand:VHSDF 2 "register_operand")]
+ "TARGET_FLOAT && TARGET_SIMD"
+{
+ rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
+ int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+ emit_move_insn (v_bitmask,
+ aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
+ HOST_WIDE_INT_M1 << bits));
+ emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
+ operands[2], operands[1]));
+ DONE;
+}
+)
+
(define_insn "*aarch64_mul3_elt<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0cf7d12186af3e05ba8742af5a03425f61f51754..1a69605db5d2a4a0efb8c9f97a019de9dded40eb 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11244,14 +11244,16 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
/* Return a const_int vector of VAL. */
rtx
-aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
+aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
{
int nunits = GET_MODE_NUNITS (mode);
rtvec v = rtvec_alloc (nunits);
int i;
+ rtx cache = GEN_INT (val);
+
for (i=0; i < nunits; i++)
- RTVEC_ELT (v, i) = GEN_INT (val);
+ RTVEC_ELT (v, i) = cache;
return gen_rtx_CONST_VECTOR (mode, v);
}
diff --git a/gcc/testsuite/gcc.target/arm/vect-copysignf.c b/gcc/testsuite/gcc.dg/vect/vect-copysignf.c
similarity index 91%
rename from gcc/testsuite/gcc.target/arm/vect-copysignf.c
rename to gcc/testsuite/gcc.dg/vect/vect-copysignf.c
index 425f1b78af7b07be6929f9e5bc1118ca901bc9ce..dc961d0223399c6e7ee8209d22ca77f6d22dbd70 100644
--- a/gcc/testsuite/gcc.target/arm/vect-copysignf.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-copysignf.c
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-require-effective-target arm_neon_hw { target { arm*-*-* } } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
/* { dg-add-options "arm_neon" } */