This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
RE: [AArch64][SVE2] Support for EOR3 and variants of BSL
- From: Yuliang Wang <Yuliang dot Wang at arm dot com>
- To: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Cc: nd <nd at arm dot com>, Richard Sandiford <Richard dot Sandiford at arm dot com>
- Date: Thu, 17 Oct 2019 10:47:58 +0000
- Subject: RE: [AArch64][SVE2] Support for EOR3 and variants of BSL
- Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=arm.com; dmarc=pass action=none header.from=arm.com; dkim=pass header.d=arm.com; arc=none
- Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=ET6kFHiO7ObQRDw5qSJZicCMFx+wpzkTpJ9pzi+ILtA=; b=Vp0X0RBuQ7EzKHcFUssCgSesjwUqOKEoVWwwRpVfZQALJmY6DMmYZTMGFQqNJp0vJnK4CQlpISZe3eW40mLqmigtvBbTKVRhRKHzrEedyeblR9XfDxL9cGorrM9XnHyjOisTjTvuPkWYwS9mLan6ka+Gtf98xs3gn7PNxY+nsQRYpX32R1KzY2yhZWppF/3vtcMypYf1oKsZ8eyan0RsLfIWMiqLmCLaw8PdTQzvEYCHUm1UVQ7+fWOo7LXjijMTxOxjph8++dXo8Y73u1ZW47xo71NPpDYIGP7a3JmYJf+YhqeBcdO+IacItsaT5KFQddH+FUAwcKWUz/vZnhrW1w==
- Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=MtNugRvLCVjwI9PVlVKTmN0gmMRyhTb95ZTWM6Kv75GruR3q+21rTFipebf++fuhCCwYV4KG5JwgN6hCUnt3NLLbteBTjcN8Uh7VQaTht3oVKVdlH2syiA3Kc7k6cQJzQfy1vMHbNDKZF0fuzXN2m4QTwcW6DapKd4MyAYWB7nntdb6axDrUhm/4LCK3pvcZnGESRlyek9KkHUNX97U4zcgCRLXW7+AaxVoLb3rSEp7iy9AkRbJ9gGZyBvjgBDFaVWUBodxNKrZQoVDvrv2KWmFsYHHnoVmTRgfHBAofCxYyoUUI34qc4dD+TSTSm0RDqHRgxnRyT/L9sE5/Ss57pQ==
- Original-authentication-results: spf=none (sender IP is ) smtp.mailfrom=Yuliang dot Wang at arm dot com;
- References: <AM0PR08MB3716F7B91261E2508A23B82B9B920@AM0PR08MB3716.eurprd08.prod.outlook.com> <mpto8ygsc9g.fsf@arm.com> <AM0PR08MB3716A6FA60C4D97BCEDB2CAD9B6D0@AM0PR08MB3716.eurprd08.prod.outlook.com> <mpty2xjoedu.fsf@arm.com>
Thanks very much, updated.
Regards,
Yuliang
gcc/ChangeLog:
2019-10-17 Yuliang Wang <yuliang.wang@arm.com>
* config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>)
(aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>)
(aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>)
(aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>):
New combine patterns.
* config/aarch64/iterators.md (BSL_DUP): New int iterator for the above.
(bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above.
gcc/testsuite/ChangeLog:
2019-10-17 Yuliang Wang <yuliang.wang@arm.com>
* gcc.target/aarch64/sve2/eor3_1.c: New test.
* gcc.target/aarch64/sve2/nlogic_1.c: As above.
* gcc.target/aarch64/sve2/nlogic_2.c: As above.
* gcc.target/aarch64/sve2/bitsel_1.c: As above.
* gcc.target/aarch64/sve2/bitsel_2.c: As above.
* gcc.target/aarch64/sve2/bitsel_3.c: As above.
* gcc.target/aarch64/sve2/bitsel_4.c: As above.
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index b018f5b0bc9b51edf831e2571f0f5a9af2210829..1158a76c49adc329d72a9eb9dbe6bf6f380f92c6 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -142,3 +142,188 @@
}
)
+;; Unpredicated 3-way exclusive OR.
+(define_insn "*aarch64_sve2_eor3<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
+ (xor:SVE_I
+ (xor:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "0, w, w, w")
+ (match_operand:SVE_I 2 "register_operand" "w, 0, w, w"))
+ (match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))]
+ "TARGET_SVE2"
+ "@
+ eor3\t%0.d, %0.d, %2.d, %3.d
+ eor3\t%0.d, %0.d, %1.d, %3.d
+ eor3\t%0.d, %0.d, %1.d, %2.d
+ movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d"
+ [(set_attr "movprfx" "*,*,*,yes")]
+)
+
+;; Use NBSL for vector NOR.
+(define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand 3)
+ (and:SVE_I
+ (not:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "%0, w"))
+ (not:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w, w")))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ "@
+ nbsl\t%0.d, %0.d, %2.d, %0.d
+ movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d"
+ "&& !CONSTANT_P (operands[3])"
+ {
+ operands[3] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Use NBSL for vector NAND.
+(define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand 3)
+ (ior:SVE_I
+ (not:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "%0, w"))
+ (not:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w, w")))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ "@
+ nbsl\t%0.d, %0.d, %2.d, %2.d
+ movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d"
+ "&& !CONSTANT_P (operands[3])"
+ {
+ operands[3] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select.
+;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_insn "*aarch64_sve2_bsl<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (xor:SVE_I
+ (and:SVE_I
+ (xor:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+ (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
+ (match_operand:SVE_I 3 "register_operand" "w, w"))
+ (match_dup BSL_DUP)))]
+ "TARGET_SVE2"
+ "@
+ bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+ movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise inverted select.
+;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
+(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand 4)
+ (not:SVE_I
+ (xor:SVE_I
+ (and:SVE_I
+ (xor:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+ (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
+ (match_operand:SVE_I 3 "register_operand" "w, w"))
+ (match_dup BSL_DUP)))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ "@
+ nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+ movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted first operand.
+;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (xor:SVE_I
+ (and:SVE_I
+ (unspec:SVE_I
+ [(match_operand 4)
+ (not:SVE_I
+ (xor:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+ (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_I 3 "register_operand" "w, w"))
+ (match_dup BSL_DUP)))]
+ "TARGET_SVE2"
+ "@
+ bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+ movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted second operand.
+;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (ior:SVE_I
+ (and:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+ (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
+ (unspec:SVE_I
+ [(match_operand 4)
+ (and:SVE_I
+ (not:SVE_I
+ (match_operand:SVE_I 3 "register_operand" "w, w"))
+ (not:SVE_I
+ (match_dup BSL_DUP)))]
+ UNSPEC_PRED_X)))]
+ "TARGET_SVE2"
+ "@
+ bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
+ movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted second operand, alternative form.
+;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (ior:SVE_I
+ (and:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
+ (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
+ (unspec:SVE_I
+ [(match_operand 4)
+ (and:SVE_I
+ (not:SVE_I
+ (match_dup BSL_DUP))
+ (not:SVE_I
+ (match_operand:SVE_I 3 "register_operand" "w, w")))]
+ UNSPEC_PRED_X)))]
+ "TARGET_SVE2"
+ "@
+ bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
+ movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 1e321af710bfe80606eedee7e0d191f36c70355b..f879fadb007a23749a523edbe7fe247dee33fa94 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1611,6 +1611,8 @@
(define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT])
+(define_int_iterator BSL_DUP [1 2])
+
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
(define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
@@ -1976,6 +1978,18 @@
(UNSPEC_RADDHN2 "add")
(UNSPEC_RSUBHN2 "sub")])
+;; BSL variants: first commutative operand.
+(define_int_attr bsl_1st [(1 "w") (2 "0")])
+
+;; BSL variants: second commutative operand.
+(define_int_attr bsl_2nd [(1 "0") (2 "w")])
+
+;; BSL variants: duplicated input operand.
+(define_int_attr bsl_dup [(1 "1") (2 "2")])
+
+;; BSL variants: operand which requires preserving via movprfx.
+(define_int_attr bsl_mov [(1 "2") (2 "1")])
+
(define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "")
(UNSPEC_SSRI "offset_")
(UNSPEC_USRI "offset_")])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..5c58ff54231d88a4ebf0a91fe4fac97079c8d992
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef OP
+#define OP(x,y,z) (((x) & (z)) | ((y) & ~(z)))
+#endif
+
+#define TYPE(N) int##N##_t
+
+#define TEMPLATE(SIZE) \
+void __attribute__ ((noinline, noclone)) \
+f_##SIZE##_##OP \
+ (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \
+ TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d, int n) \
+{ \
+ for (int i = 0; i < n; i++) \
+ a[i] = OP (b[i], c[i], d[i]); \
+}
+
+TEMPLATE (8);
+TEMPLATE (16);
+TEMPLATE (32);
+TEMPLATE (64);
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..ac0d27213e84bb5c7f3d236f3cac59c71ac674ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y,z) (~(((x) & (z)) | ((y) & ~(z))))
+
+#include "bitsel_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..93995bb8bade89cd821ed85153d13e96bd4422a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y,z) ((~(x) & (z)) | ((y) & ~(z)))
+
+#include "bitsel_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tbic\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..7ccec619b4d1e8de366c0b0c53879a89a00c2c49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y,z) (((x) & (z)) | (~(y) & ~(z)))
+
+#include "bitsel_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..551802a0c9f007273ddc68cc4ce77defe700d76e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y,z) ((x) ^ (y) ^ (z))
+
+#include "bitsel_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\teor3\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..ef0e266bd93bb3d3b5af204438ad8ef35faa5675
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef OP
+#define OP(x,y) (~((x) | (y)))
+#endif
+
+#define TYPE(N) int##N##_t
+
+#define TEMPLATE(SIZE) \
+void __attribute__ ((noinline, noclone)) \
+f_##SIZE##_##OP \
+ (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \
+ TYPE(SIZE) *restrict c, int n) \
+{ \
+ for (int i = 0; i < n; i++) \
+ a[i] = OP (b[i], c[i]); \
+}
+
+TEMPLATE (8);
+TEMPLATE (16);
+TEMPLATE (32);
+TEMPLATE (64);
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..da8c86161625ff51814c0d8d4e5d51035ad1b1f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#define OP(x,y) (~((x) & (y)))
+
+#include "nlogic_1.c"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
+/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
+
+/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com>
Sent: 17 October 2019 11:28
To: Yuliang Wang <Yuliang.Wang@arm.com>
Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>
Subject: Re: [AArch64][SVE2] Support for EOR3 and variants of BSL
Yuliang Wang <Yuliang.Wang@arm.com> writes:
> Hi Richard,
>
> Thanks for the suggestions, updated.
>
> Regards,
> Yuliang
>
>
> gcc/ChangeLog:
>
> 2019-10-17 Yuliang Wang <yuliang.wang@arm.com>
>
> * config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>)
> (aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>)
> (aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>)
> (aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>):
> New combine patterns.
> * config/aarch64/iterators.md (BSL_DUP): New int iterator for the above.
> (bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above.
> * config/aarch64/aarch64.h (AARCH64_ISA_SVE2_SHA3): New ISA flag macro.
> (TARGET_SVE2_SHA3): New CPU target.
>
> gcc/testsuite/ChangeLog:
>
> 2019-10-17 Yuliang Wang <yuliang.wang@arm.com>
>
> * gcc.target/aarch64/sve2/eor3_1.c: New test.
> * gcc.target/aarch64/sve2/eor3_2.c: As above.
> * gcc.target/aarch64/sve2/nlogic_1.c: As above.
> * gcc.target/aarch64/sve2/nlogic_2.c: As above.
> * gcc.target/aarch64/sve2/bitsel_1.c: As above.
> * gcc.target/aarch64/sve2/bitsel_2.c: As above.
> * gcc.target/aarch64/sve2/bitsel_3.c: As above.
> * gcc.target/aarch64/sve2/bitsel_4.c: As above.
>
>
> diff --git a/gcc/config/aarch64/aarch64-sve2.md
> b/gcc/config/aarch64/aarch64-sve2.md
> index
> b018f5b0bc9b51edf831e2571f0f5a9af2210829..08d5214a3debb9e9a0796da0af30
> 09ed3ff55774 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -142,3 +142,189 @@
> }
> )
>
> +;; Unpredicated 3-way exclusive OR.
> +(define_insn "*aarch64_sve2_eor3<mode>"
> + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
> + (xor:SVE_I
> + (xor:SVE_I
> + (match_operand:SVE_I 1 "register_operand" "0, w, w, w")
> + (match_operand:SVE_I 2 "register_operand" "w, 0, w, w"))
> + (match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))]
> + "TARGET_SVE2_SHA3"
EOR3 is part of base SVE2, it doesn't require the SHA3 extension.
> +;; Unpredicated bitwise select.
> +;; N.B. non-canonical equivalent form due to expand pass.
Think it would be better to drop this line (and similarly for the patterns below). The form isn't non-canonical -- there just isn't a defined canonical from here. :-) It is the expected form as things stand.
> +;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^
> +bsl_dup) (define_insn "*aarch64_sve2_bsl<mode>"
> + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> + (xor:SVE_I
> + (and:SVE_I
> + (xor:SVE_I
> + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
> + (match_operand:SVE_I 3 "register_operand" "w, w"))
> + (match_dup BSL_DUP)))]
> + "TARGET_SVE2"
> + "@
> + bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
> + movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
> + [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Unpredicated bitwise inverted select.
> +;; N.B. non-canonical equivalent form.
> +;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^
> +bsl_dup)) (define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
> + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> + (unspec:SVE_I
> + [(match_operand 4)
> + (not:SVE_I
> + (xor:SVE_I
> + (and:SVE_I
> + (xor:SVE_I
> + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
> + (match_operand:SVE_I 3 "register_operand" "w, w"))
> + (match_dup BSL_DUP)))]
> + UNSPEC_PRED_X))]
> + "TARGET_SVE2"
> + "@
> + nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
> + movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
> + "&& !CONSTANT_P (operands[4])"
> + {
> + operands[4] = CONSTM1_RTX (<VPRED>mode);
> + }
> + [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Unpredicated bitwise select with inverted first operand.
> +;; N.B. non-canonical equivalent form.
> +;; (op3 ? ~bsl_mov : bsl_dup) == (((~bsl_mov ^ bsl_dup) & op3) ^
> +bsl_dup)
That's true, but I think:
;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
is clearer, to match the rtl.
> +(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
> + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> + (xor:SVE_I
> + (and:SVE_I
> + (unspec:SVE_I
> + [(match_operand 4)
> + (not:SVE_I
> + (xor:SVE_I
> + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")))]
> + UNSPEC_PRED_X)
> + (match_operand:SVE_I 3 "register_operand" "w, w"))
> + (match_dup BSL_DUP)))]
> + "TARGET_SVE2"
> + "@
> + bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
> + movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
> + "&& !CONSTANT_P (operands[4])"
> + {
> + operands[4] = CONSTM1_RTX (<VPRED>mode);
> + }
> + [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Unpredicated bitwise select with inverted second operand.
> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
Would be good to have a comment here too:
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
> + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> + (ior:SVE_I
> + (and:SVE_I
> + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
> + (unspec:SVE_I
> + [(match_operand 4)
> + (and:SVE_I
> + (not:SVE_I
> + (match_operand:SVE_I 3 "register_operand" "w, w"))
> + (not:SVE_I
> + (match_dup BSL_DUP)))]
> + UNSPEC_PRED_X)))]
> + "TARGET_SVE2"
> + "@
> + bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
> + movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
> + "&& !CONSTANT_P (operands[4])"
> + {
> + operands[4] = CONSTM1_RTX (<VPRED>mode);
> + }
> + [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Unpredicated bitwise select with inverted second operand, alternative form.
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
> + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> + (ior:SVE_I
> + (and:SVE_I
> + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
> + (unspec:SVE_I
> + [(match_operand 4)
> + (and:SVE_I
> + (not:SVE_I
> + (match_dup BSL_DUP))
> + (not:SVE_I
> + (match_operand:SVE_I 3 "register_operand" "w, w")))]
> + UNSPEC_PRED_X)))]
> + "TARGET_SVE2"
> + "@
> + bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
> + movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
> + "&& !CONSTANT_P (operands[4])"
> + {
> + operands[4] = CONSTM1_RTX (<VPRED>mode);
> + }
> + [(set_attr "movprfx" "*,yes")]
> +)
> +
> diff --git a/gcc/config/aarch64/aarch64.h
> b/gcc/config/aarch64/aarch64.h index
> abd14a2f92c06828adfc6d2e2e81b63a6163d3a3..cad401ceb2419b6a0a64f2396c8e
> 7d5b9105fb22 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -236,6 +236,7 @@ extern unsigned aarch64_architecture_version;
> #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
> #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE)
> #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2)
> +#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
> #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
> #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
> #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
> @@ -285,6 +286,9 @@ extern unsigned aarch64_architecture_version;
> /* SVE2 instructions, enabled through +sve2. */ #define TARGET_SVE2
> (AARCH64_ISA_SVE2)
>
> +/* SVE2 SHA3 instructions, enabled through +sve2-sha3. */ #define
> +TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3)
> +
> /* ARMv8.3-A features. */
> #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3)
With the above change, these macros aren't needed.
Thanks,
Richard