Fix folding of vector mask EQ/NE expressions

Richard Sandiford richard.sandiford@linaro.org
Mon Nov 6 15:27:00 GMT 2017


fold_binary_loc assumed that if the type of the result wasn't a vector,
the operands wouldn't be either.  This isn't necessarily true for
EQ_EXPR and NE_EXPR of vector masks, which can return a single scalar
for the mask as a whole.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu.
OK to install?

Richard


2017-11-06  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* fold-const.c (fold_binary_loc): Check the argument types
	rather than the result type when testing for a vector operation.

gcc/testsuite/
	* gcc.target/aarch64/sve_vec_bool_cmp_1.c: New test.
	* gcc.target/aarch64/sve_vec_bool_cmp_1_run.c: Likweise.

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	2017-11-06 15:26:35.441288554 +0000
+++ gcc/fold-const.c	2017-11-06 15:26:35.595423552 +0000
@@ -9392,7 +9392,7 @@ fold_binary_loc (location_t loc,
 
   if ((code == BIT_AND_EXPR || code == BIT_IOR_EXPR
        || code == EQ_EXPR || code == NE_EXPR)
-      && TREE_CODE (type) != VECTOR_TYPE
+      && TREE_CODE (TREE_TYPE (arg0)) != VECTOR_TYPE
       && ((truth_value_p (TREE_CODE (arg0))
 	   && (truth_value_p (TREE_CODE (arg1))
 	       || (TREE_CODE (arg1) == BIT_AND_EXPR
Index: gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
===================================================================
--- /dev/null	2017-11-03 22:04:05.605699023 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c	2017-11-06 15:26:35.595423552 +0000
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#define VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE)				\
+void __attribute__ ((noinline, noclone))				\
+vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (VARTYPE *dst, VARTYPE *src,	\
+					   INDUCTYPE start,		\
+					   INDUCTYPE n,			\
+					   INDUCTYPE mask)		\
+{									\
+  for (INDUCTYPE i = 0; i < n; i++)					\
+    {									\
+      bool lhs = i >= start;						\
+      bool rhs = (i & mask) != 0x3D;					\
+      if (lhs OP rhs)							\
+        dst[i] = src[i];						\
+    }									\
+}
+
+#define TEST_OP(T, NAME, OP)			\
+  T (NAME, OP, uint8_t, uint8_t)		\
+  T (NAME, OP, uint16_t, uint16_t)		\
+  T (NAME, OP, uint32_t, uint32_t)		\
+  T (NAME, OP, uint64_t, uint64_t)		\
+  T (NAME, OP, float, uint32_t)			\
+  T (NAME, OP, double, uint64_t)
+
+#define TEST_ALL(T)				\
+  TEST_OP (T, cmpeq, ==)			\
+  TEST_OP (T, cmpne, !=)
+
+TEST_ALL (VEC_BOOL)
+
+/* Both cmpne and cmpeq loops will contain an exclusive predicate or.  */
+/* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */
+/* cmpeq will also contain a predicate not operation.  */
+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b\n} 6 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
===================================================================
--- /dev/null	2017-11-03 22:04:05.605699023 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c	2017-11-06 15:26:35.595423552 +0000
@@ -0,0 +1,37 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */
+
+#include "sve_vec_bool_cmp_1.c"
+
+#define N 103
+
+#define TEST_VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE)		\
+{								\
+  INDUCTYPE i;							\
+  VARTYPE src[N];						\
+  VARTYPE dst[N];						\
+  for (i = 0; i < N; i++)					\
+    {								\
+      src[i] = i;						\
+      dst[i] = i * 2;						\
+      asm volatile ("" ::: "memory");				\
+    }								\
+  vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (dst, src, 13,	\
+					     97, 0xFF);		\
+  for (i = 0; i < 13; i++)					\
+    if (dst[i] != (VARTYPE) (0 OP 1 ? i : i * 2))		\
+      __builtin_abort ();					\
+  for (i = 13; i < 97; i++)					\
+    if (dst[i] != (VARTYPE) (1 OP (i != 0x3D) ? i : i * 2))	\
+      __builtin_abort ();					\
+  for (i = 97; i < N; i++)					\
+    if (dst[i] != (i * 2))					\
+      __builtin_abort ();					\
+}
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (TEST_VEC_BOOL)
+  return 0;
+}



More information about the Gcc-patches mailing list