This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[COMMITTED][AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm

From: Jiong Wang <jiong dot wang at foss dot arm dot com>
To: James Greenhalgh <james dot greenhalgh at arm dot com>
Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
Date: Fri, 8 Jul 2016 16:47:41 +0100
Subject: [COMMITTED][AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm
Authentication-results: sourceware.org; auth=none
References: <f8a47ab3-ee52-4964-79b8-199e0562152e@foss.arm.com> <20160706152911.GA28331@arm.com> <577D276D.70600@foss.arm.com> <CAKdteOakjLrKiqy3TTDFgZQFgQHtLxbHCV1zdhYkKR+F=p3mKg@mail.gmail.com> <aaccf73a-6c31-81f9-8aa5-7cb04be6a243@foss.arm.com> <20160707093457.GA8512@arm.com>

On 07/07/16 10:34, James Greenhalgh wrote:


To make backporting easier, could you please write a very simple
standalone test that exposes this bug, and submit this patch with just
that simple test? I've already OKed the functional part of this patch, and
I'm happy to pre-approve a simple testcase.

With that committed to trunk, this needs to go to all active release
branches please.


Committed attached patch to trunk as r238166, fmax/fmin pattern were
introduced by [1] which is available since gcc 6, so backported to
gcc 6 branch as r238167.

--
[1] https://gcc.gnu.org/ml/gcc-patches/2015-11/msg02654.html

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 3e4740c..f1ad325 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -244,13 +244,17 @@
   /* Implemented by <maxmin><mode>3.
      smax variants map to fmaxnm,
      smax_nan variants map to fmax.  */
-  BUILTIN_VDQIF (BINOP, smax, 3)
-  BUILTIN_VDQIF (BINOP, smin, 3)
+  BUILTIN_VDQ_BHSI (BINOP, smax, 3)
+  BUILTIN_VDQ_BHSI (BINOP, smin, 3)
   BUILTIN_VDQ_BHSI (BINOP, umax, 3)
   BUILTIN_VDQ_BHSI (BINOP, umin, 3)
   BUILTIN_VDQF (BINOP, smax_nan, 3)
   BUILTIN_VDQF (BINOP, smin_nan, 3)
 
+  /* Implemented by <fmaxmin><mode>3.  */
+  BUILTIN_VDQF (BINOP, fmax, 3)
+  BUILTIN_VDQF (BINOP, fmin, 3)
+
   /* Implemented by aarch64_<maxmin_uns>p<mode>.  */
   BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
   BUILTIN_VDQ_BHSI (BINOP, sminp, 0)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index ed24b59..b0ab1d3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -17588,19 +17588,19 @@ vpminnms_f32 (float32x2_t a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smaxv2sf (__a, __b);
+  return __builtin_aarch64_fmaxv2sf (__a, __b);
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smaxv4sf (__a, __b);
+  return __builtin_aarch64_fmaxv4sf (__a, __b);
 }
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smaxv2df (__a, __b);
+  return __builtin_aarch64_fmaxv2df (__a, __b);
 }
 
 /* vmaxv  */
@@ -17818,19 +17818,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vminnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_sminv2sf (__a, __b);
+  return __builtin_aarch64_fminv2sf (__a, __b);
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_sminv4sf (__a, __b);
+  return __builtin_aarch64_fminv4sf (__a, __b);
 }
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_sminv2df (__a, __b);
+  return __builtin_aarch64_fminv2df (__a, __b);
 }
 
 /* vminv  */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
new file mode 100644
index 0000000..8333f03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
@@ -0,0 +1,82 @@
+/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+#define CHECK(T, N, R, E) \
+  {\
+    int i = 0;\
+    for (; i < N; i++)\
+      if (* (T *) &R[i] != * (T *) &E[i])\
+	abort ();\
+  }
+
+int
+main (int argc, char **argv)
+{
+  float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
+  float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
+  float32x2_t f32x2_exp_minnm  = vdup_n_f32 (-1.0);
+  float32x2_t f32x2_exp_maxnm  = vdup_n_f32 (0.0);
+  float32x2_t f32x2_ret_minnm  = vminnm_f32 (f32x2_input1, f32x2_input2);
+  float32x2_t f32x2_ret_maxnm  = vmaxnm_f32 (f32x2_input1, f32x2_input2);
+
+  CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
+  CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
+
+  f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
+  f32x2_input2 = vdup_n_f32 (1.0);
+  f32x2_exp_minnm  = vdup_n_f32 (1.0);
+  f32x2_exp_maxnm  = vdup_n_f32 (1.0);
+  f32x2_ret_minnm  = vminnm_f32 (f32x2_input1, f32x2_input2);
+  f32x2_ret_maxnm  = vmaxnm_f32 (f32x2_input1, f32x2_input2);
+
+  CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
+  CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
+
+  float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
+  float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
+  float32x4_t f32x4_exp_minnm  = vdupq_n_f32 (-1024.0);
+  float32x4_t f32x4_exp_maxnm  = vdupq_n_f32 (77.0);
+  float32x4_t f32x4_ret_minnm  = vminnmq_f32 (f32x4_input1, f32x4_input2);
+  float32x4_t f32x4_ret_maxnm  = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
+
+  CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
+  CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
+
+  f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
+  f32x4_input2 = vdupq_n_f32 (-1.0);
+  f32x4_exp_minnm  = vdupq_n_f32 (-1.0);
+  f32x4_exp_maxnm  = vdupq_n_f32 (-1.0);
+  f32x4_ret_minnm  = vminnmq_f32 (f32x4_input1, f32x4_input2);
+  f32x4_ret_maxnm  = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
+
+  CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
+  CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
+
+  float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
+  float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
+  float64x2_t f64x2_exp_minnm  = vdupq_n_f64 (1.23);
+  float64x2_t f64x2_exp_maxnm  = vdupq_n_f64 (4.56);
+  float64x2_t f64x2_ret_minnm  = vminnmq_f64 (f64x2_input1, f64x2_input2);
+  float64x2_t f64x2_ret_maxnm  = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
+
+  CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
+  CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
+
+  f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
+  f64x2_input2 = vdupq_n_f64 (1.0);
+  f64x2_exp_minnm  = vdupq_n_f64 (1.0);
+  f64x2_exp_maxnm  = vdupq_n_f64 (1.0);
+  f64x2_ret_minnm  = vminnmq_f64 (f64x2_input1, f64x2_input2);
+  f64x2_ret_maxnm  = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
+
+  CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
+  CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
+
+  return 0;
+}

Follow-Ups:
- Re: [5.0 Backport][AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm
  - From: Jiong Wang

References:
- [AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm
  - From: Jiong Wang
- Re: [AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm
  - From: James Greenhalgh
- Re: [AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm
  - From: Kyrill Tkachov
- Re: [AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm
  - From: Christophe Lyon
- Re: [AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm
  - From: Jiong Wang
- Re: [AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm
  - From: James Greenhalgh

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]