This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [AArch64] Rewrite vabs<q>_s<8,16,32,64> AdvSIMD intrinsics to fold to tree.


On Fri, Jul 19, 2013 at 08:12:54PM +0100, Marek Polacek wrote:
> On Fri, Jul 19, 2013 at 08:09:05PM +0100, James Greenhalgh wrote:
> > +  /* Forecfully avoid optimization.  */				\
> 
> A typo.
 
Good spot, thanks for taking a look at this.

Hopefully the attached has "forcefully" correct in both places and is
otherwise typo free.

OK?

Thanks,
James

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index f49f06b..6816b9c 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1325,7 +1325,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
 
   switch (fcode)
     {
-      BUILTIN_VDQF (UNOP, abs, 2)
+      BUILTIN_VALLDI (UNOP, abs, 2)
 	return fold_build1 (ABS_EXPR, type, args[0]);
 	break;
       BUILTIN_VALLDI (BINOP, cmge, 0)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index af2dd6e..55dead6 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -347,7 +347,7 @@
   BUILTIN_VDQF (UNOP, frecpe, 0)
   BUILTIN_VDQF (BINOP, frecps, 0)
 
-  BUILTIN_VDQF (UNOP, abs, 2)
+  BUILTIN_VALLDI (UNOP, abs, 2)
 
   VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
   VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 122fd7d..99cf123 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -4468,83 +4468,6 @@ vabds_f32 (float32_t a, float32_t b)
   return result;
 }
 
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vabs_s8 (int8x8_t a)
-{
-  int8x8_t result;
-  __asm__ ("abs %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vabs_s16 (int16x4_t a)
-{
-  int16x4_t result;
-  __asm__ ("abs %0.4h,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vabs_s32 (int32x2_t a)
-{
-  int32x2_t result;
-  __asm__ ("abs %0.2s,%1.2s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vabsq_s8 (int8x16_t a)
-{
-  int8x16_t result;
-  __asm__ ("abs %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vabsq_s16 (int16x8_t a)
-{
-  int16x8_t result;
-  __asm__ ("abs %0.8h,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vabsq_s32 (int32x4_t a)
-{
-  int32x4_t result;
-  __asm__ ("abs %0.4s,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vabsq_s64 (int64x2_t a)
-{
-  int64x2_t result;
-  __asm__ ("abs %0.2d,%1.2d"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vaddlv_s8 (int8x8_t a)
 {
@@ -17395,6 +17318,30 @@ vabs_f32 (float32x2_t __a)
   return __builtin_aarch64_absv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vabs_f64 (float64x1_t __a)
+{
+  return __builtin_fabs (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vabs_s8 (int8x8_t __a)
+{
+  return __builtin_aarch64_absv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vabs_s16 (int16x4_t __a)
+{
+  return __builtin_aarch64_absv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vabs_s32 (int32x2_t __a)
+{
+  return __builtin_aarch64_absv2si (__a);
+}
+
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vabs_s64 (int64x1_t __a)
 {
@@ -17413,6 +17360,30 @@ vabsq_f64 (float64x2_t __a)
   return __builtin_aarch64_absv2df (__a);
 }
 
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabsq_s8 (int8x16_t __a)
+{
+  return __builtin_aarch64_absv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabsq_s16 (int16x8_t __a)
+{
+  return __builtin_aarch64_absv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabsq_s32 (int32x4_t __a)
+{
+  return __builtin_aarch64_absv4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabsq_s64 (int64x2_t __a)
+{
+  return __builtin_aarch64_absv2di (__a);
+}
+
 /* vadd */
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c
new file mode 100644
index 0000000..b34738c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define ETYPE(size) int##size##_t
+#define VTYPE(size, lanes) int##size##x##lanes##_t
+
+#define TEST_VABS(q, size, lanes)				\
+static void							\
+test_vabs##q##_##size (ETYPE (size) * res,			\
+			const ETYPE (size) *in1)		\
+{								\
+  VTYPE (size, lanes) a = vld1##q##_s##size (res);		\
+  VTYPE (size, lanes) b = vld1##q##_s##size (in1);		\
+  a = vabs##q##_s##size (b);					\
+  vst1##q##_s##size (res, a);					\
+}
+
+#define BUILD_VARS(width, n_lanes, n_half_lanes)		\
+TEST_VABS (, width, n_half_lanes)				\
+TEST_VABS (q, width, n_lanes)					\
+
+BUILD_VARS (64, 2, 1)
+BUILD_VARS (32, 4, 2)
+BUILD_VARS (16, 8, 4)
+BUILD_VARS (8, 16, 8)
+
+#define POOL1  {-10}
+#define POOL2  {2, -10}
+#define POOL4  {0, -10, 2, -3}
+#define POOL8  {0, -10, 2, -3, 4, -50, 6, -70}
+#define POOL16 {0, -10, 2, -3, 4, -50, 6, -70,			\
+		-5, 10, -2, 3, -4, 50, -6, 70}
+
+#define EXPECTED1  {10}
+#define EXPECTED2  {2, 10}
+#define EXPECTED4  {0, 10, 2, 3}
+#define EXPECTED8  {0, 10, 2, 3, 4, 50, 6, 70}
+#define EXPECTED16 {0, 10, 2, 3, 4, 50, 6, 70,			\
+		    5, 10, 2, 3, 4, 50, 6, 70}
+
+#define BUILD_TEST(size, lanes_64, lanes_128)			\
+static void							\
+test_##size (void)						\
+{								\
+  int i;							\
+  ETYPE (size) pool1[lanes_64] = POOL##lanes_64;		\
+  ETYPE (size) res1[lanes_64] = {0};				\
+  ETYPE (size) expected1[lanes_64] = EXPECTED##lanes_64;	\
+  ETYPE (size) pool2[lanes_128] = POOL##lanes_128;		\
+  ETYPE (size) res2[lanes_128] = {0};				\
+  ETYPE (size) expected2[lanes_128] = EXPECTED##lanes_128;	\
+								\
+  /* Forcefully avoid optimization.  */				\
+  asm volatile ("" : : : "memory");				\
+  test_vabs_##size (res1, pool1);				\
+  for (i = 0; i < lanes_64; i++)				\
+    if (res1[i] != expected1[i])				\
+      abort ();							\
+								\
+  /* Forcefully avoid optimization.  */				\
+  asm volatile ("" : : : "memory");				\
+  test_vabsq_##size (res2, pool2);				\
+  for (i = 0; i < lanes_128; i++)				\
+    if (res2[i] != expected2[i])				\
+      abort ();							\
+}
+
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+BUILD_TEST (8 , 8, 16)
+
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+BUILD_TEST (16, 4, 8)
+
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+BUILD_TEST (32, 2, 4)
+
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
+BUILD_TEST (64, 1, 2)
+
+#undef BUILD_TEST
+
+#define BUILD_TEST(size) test_##size ()
+
+int
+main (int argc, char **argv)
+{
+  BUILD_TEST (8);
+  BUILD_TEST (16);
+  BUILD_TEST (32);
+  BUILD_TEST (64);
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */

--------------1.8.3-rc0--



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]