This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][GCC][Aarch64] Add vectorize patten for copysign.


Hi James,

I have corrected the testsuite changes and attached is the new file and changelog.

Ok for trunk?

Tamar

Hi All,

This patch vectorizes the copysign builtin for Aarch64
similar to how it is done for Arm.

AArch64 now generates:

...
.L4:
        ldr     q1, [x6, x3]
        add     w4, w4, 1
        ldr     q0, [x5, x3]
        cmp     w4, w7
        bif     v1.16b, v2.16b, v3.16b
        fmul    v0.2d, v0.2d, v1.2d
        str     q0, [x5, x3]

for the input:

     x * copysign(1.0, y)

On 481.wrf in Spec2006 on AArch64 this gives us a speedup of 9.1%.
Regtested on  aarch64-none-linux-gnu and arm-none-eabi and no regressions.

Ok for trunk?

gcc/
2017-01-19  Tamar Christina  <tamar.christina@arm.com>

        * config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup):
        Change int to HOST_WIDE_INT.
        * config/aarch64/aarch64-protos.h
        (aarch64_simd_gen_const_vector_dup): Likewise.
        * config/aarch64/aarch64-simd.md: Add copysign<mode>3.

gcc/testsuite/
2017-01-19  Tamar Christina  <tamar.christina@arm.com>

        * gcc/testsuite/lib/target-supports.exp
        (check_effective_target_vect_call_copysignf): Enable for AArch64.
________________________________________
From: James Greenhalgh <james.greenhalgh@arm.com>
Sent: Thursday, January 19, 2017 4:58:09 PM
To: Tamar Christina
Cc: GCC Patches; Marcus Shawcroft; Richard Earnshaw; nd
Subject: Re: [PATCH][GCC][Aarch64] Add vectorize patten for copysign.

On Thu, Jan 19, 2017 at 03:55:36PM +0000, Tamar Christina wrote:
>
> It seems I can drop even more:

The AArch64 parts of this look fine to me, and based on benchmarking of the
patch they are low risk for high reward (and other targets have had a
vectorized copysign for a while without issue).

However, the testsuite change looks wrong.

> diff --git a/gcc/testsuite/gcc.target/arm/vect-copysignf.c b/gcc/testsuite/gcc.dg/vect/vect-copysignf.c
> similarity index 91%
> rename from gcc/testsuite/gcc.target/arm/vect-copysignf.c
> rename to gcc/testsuite/gcc.dg/vect/vect-copysignf.c
> index 425f1b78af7b07be6929f9e5bc1118ca901bc9ce..dc961d0223399c6e7ee8209d22ca77f6d22dbd70 100644
> --- a/gcc/testsuite/gcc.target/arm/vect-copysignf.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-copysignf.c
> @@ -1,5 +1,5 @@
>  /* { dg-do run } */
> -/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-require-effective-target arm_neon_hw { target { arm*-*-* } } } */
>  /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
>  /* { dg-add-options "arm_neon" } */

It is a bit redundant as add_options_for_arm_neon will check you are an ARM
target before doing anything, but this dg-add-options could be guarded by
{ target { arm*-*-* } } for clarity. Though, in the gcc.dg/vect/ directory
I'd be surprised if you needed this at all.

I see we have a check_effective_target test for
{ target { vect_call_copysignf } } . It seems that would be most
appropriate for this test - then you can also drop the effective-target
arm_neon_hw test.

You might want to look at gcc.dg/vect/fast-math-bb-slp-call-1.c - It seems
that gives you most of what you are looking for from this test.

That will mean updating check_effective_target_vect_call_copysignf in
testsuite/lib/target-supports.exp .

Thanks,
James

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..e75ba29f93e9e749791803ca3fa8d716ca261064 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -362,7 +362,7 @@ rtx aarch64_final_eh_return_addr (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
-rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
+rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
 bool aarch64_simd_mem_operand_p (rtx);
 rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool);
 rtx aarch64_tls_get_addr (void);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a12e2268ef9b023112f8d05db0a86957fee83273..b61f79a09462b8cecca7dd2cc4ac0eb4be2dbc79 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -338,6 +338,24 @@
   }
 )
 
+(define_expand "copysign<mode>3"
+  [(match_operand:VHSDF 0 "register_operand")
+   (match_operand:VHSDF 1 "register_operand")
+   (match_operand:VHSDF 2 "register_operand")]
+  "TARGET_FLOAT && TARGET_SIMD"
+{
+  rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
+  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+  emit_move_insn (v_bitmask,
+		  aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
+						     HOST_WIDE_INT_M1U << bits));
+  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
+					 operands[2], operands[1]));
+  DONE;
+}
+)
+
 (define_insn "*aarch64_mul3_elt<mode>"
  [(set (match_operand:VMUL 0 "register_operand" "=w")
     (mult:VMUL
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0cf7d12186af3e05ba8742af5a03425f61f51754..1a69605db5d2a4a0efb8c9f97a019de9dded40eb 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11244,14 +11244,16 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
 
 /* Return a const_int vector of VAL.  */
 rtx
-aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
+aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
 {
   int nunits = GET_MODE_NUNITS (mode);
   rtvec v = rtvec_alloc (nunits);
   int i;
 
+  rtx cache = GEN_INT (val);
+
   for (i=0; i < nunits; i++)
-    RTVEC_ELT (v, i) = GEN_INT (val);
+    RTVEC_ELT (v, i) = cache;
 
   return gen_rtx_CONST_VECTOR (mode, v);
 }
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index b88d13c13f277e8cdb88b5dc8545ffa01408a0fa..12dbf475e31933cff781c2f9e9c1cfbe2ce108bb 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6158,7 +6158,8 @@ proc check_effective_target_vect_call_copysignf { } {
     } else {
 	set et_vect_call_copysignf_saved($et_index) 0
 	if { [istarget i?86-*-*] || [istarget x86_64-*-*]
-	     || [istarget powerpc*-*-*] } {
+	     || [istarget powerpc*-*-*]
+	     || [istarget aarch64*-*-*] } {
 	   set et_vect_call_copysignf_saved($et_index) 1
 	}
     }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]