This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[SVE ACLE] Various fixes and cleanups

From: Richard Sandiford <richard dot sandiford at arm dot com>
To: gcc-patches at gcc dot gnu dot org
Cc: Kugan Vivekanandarajah <kugan dot vivekanandarajah at linaro dot org>, Prathamesh Kulkarni <prathamesh dot kulkarni at linaro dot org>
Date: Wed, 19 Dec 2018 09:52:45 +0000
Subject: [SVE ACLE] Various fixes and cleanups

I've applied the following three patches to aarch64/sve-acle-branch.
The first just fixes some bugs I noticed while testing the current branch.
The other two try to tidy up the instruction generation code so that we
aren't passing so many values around, and so that it's easier to separate
"number of operands" from "how to get an icode".

Thanks,
Richard

[SVE ACLE] Some fixes

- Fix the SEL assembly syntax (it doesn't take a predication suffix)
- Fix the operand numbering in mul<SVE_F:mode>3
- Avoid using general_operand for things that don't accept memory


diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 2176be8cf9a..65eddc261d8 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1589,7 +1589,7 @@
 	  UNSPEC_SEL))]
   "TARGET_SVE"
   "@
-   sel\t%0.<Vetype>, %3/m, %1.<Vetype>, %2.<Vetype>
+   sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
    mov\t%0.<Vetype>, %3/m, #%1
    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %1.<Vetype>
    mov\t%0.<Vetype>, %3/z, #%1
@@ -1601,12 +1601,12 @@
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w, ?&w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 3 "register_operand" "Upa, Upl, Upl, Upl")
-	   (match_operand:SVE_F 1 "general_operand" "w, Dn, w, Dn")
+	   (match_operand:SVE_F 1 "aarch64_nonmemory_operand" "w, Dn, w, Dn")
 	   (match_operand:SVE_F 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, Dz")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
   "@
-   sel\t%0.<Vetype>, %3/m, %1.<Vetype>, %2.<Vetype>
+   sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
    * return aarch64_output_sve_mov_immediate (operands[1], 3, true);
    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
    * return aarch64_output_sve_mov_immediate (operands[1], 3, false);"
@@ -2553,8 +2553,8 @@
 	(unspec:SVE_F
 	  [(match_dup 3)
 	   (const_int SVE_ALLOW_NEW_FAULTS)
-	   (match_operand:SVE_F 2 "register_operand")
-	   (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand")]
+	   (match_operand:SVE_F 1 "register_operand")
+	   (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand")]
 	  UNSPEC_COND_MUL))]
   "TARGET_SVE"
   {
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s16.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s16.c
index 350af45b4ee..6a5af81ed3a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s16.c
@@ -339,7 +339,7 @@ TEST_UNIFORM_Z (dup_127_s16_m, svint16_t,
 /*
 ** dup_128_s16_m:
 **	mov	(z[0-9]+\.h), #128
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_s16_m, svint16_t,
@@ -359,7 +359,7 @@ TEST_UNIFORM_Z (dup_253_s16_m, svint16_t,
 /*
 ** dup_254_s16_m:
 **	mov	(z[0-9]+\.h), #254
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_s16_m, svint16_t,
@@ -369,7 +369,7 @@ TEST_UNIFORM_Z (dup_254_s16_m, svint16_t,
 /*
 ** dup_255_s16_m:
 **	mov	(z[0-9]+\.h), #255
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_s16_m, svint16_t,
@@ -388,7 +388,7 @@ TEST_UNIFORM_Z (dup_256_s16_m, svint16_t,
 /*
 ** dup_257_s16_m:
 **	mov	(z[0-9]+)\.b, #1
-**	sel	z0\.h, p0/m, \1\.h, z0\.h
+**	sel	z0\.h, p0, \1\.h, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_257_s16_m, svint16_t,
@@ -426,7 +426,7 @@ TEST_UNIFORM_Z (dup_7ffd_s16_m, svint16_t,
 /*
 ** dup_7ffe_s16_m:
 **	mov	(z[0-9]+\.h), #32766
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_s16_m, svint16_t,
@@ -436,7 +436,7 @@ TEST_UNIFORM_Z (dup_7ffe_s16_m, svint16_t,
 /*
 ** dup_7fff_s16_m:
 **	mov	(z[0-9]+\.h), #32767
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_s16_m, svint16_t,
@@ -464,7 +464,7 @@ TEST_UNIFORM_Z (dup_m128_s16_m, svint16_t,
 /*
 ** dup_m129_s16_m:
 **	mov	(z[0-9]+\.h), #-129
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_s16_m, svint16_t,
@@ -484,7 +484,7 @@ TEST_UNIFORM_Z (dup_m254_s16_m, svint16_t,
 /*
 ** dup_m255_s16_m:
 **	mov	(z[0-9]+\.h), #-255
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_s16_m, svint16_t,
@@ -503,7 +503,7 @@ TEST_UNIFORM_Z (dup_m256_s16_m, svint16_t,
 /*
 ** dup_m257_s16_m:
 **	mov	(z[0-9]+\.h), #-257
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_s16_m, svint16_t,
@@ -513,7 +513,7 @@ TEST_UNIFORM_Z (dup_m257_s16_m, svint16_t,
 /*
 ** dup_m258_s16_m:
 **	mov	(z[0-9]+)\.b, #-2
-**	sel	z0\.h, p0/m, \1\.h, z0\.h
+**	sel	z0\.h, p0, \1\.h, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m258_s16_m, svint16_t,
@@ -546,7 +546,7 @@ TEST_UNIFORM_Z (dup_m7f00_s16_m, svint16_t,
 /*
 ** dup_m7f01_s16_m:
 **	mov	(z[0-9]+\.h), #-32513
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_s16_m, svint16_t,
@@ -566,7 +566,7 @@ TEST_UNIFORM_Z (dup_m7ffe_s16_m, svint16_t,
 /*
 ** dup_m7fff_s16_m:
 **	mov	(z[0-9]+\.h), #-32767
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_s16_m, svint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s32.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s32.c
index 99c720a3cd9..90c5a3a04e1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s32.c
@@ -341,7 +341,7 @@ TEST_UNIFORM_Z (dup_127_s32_m, svint32_t,
 /*
 ** dup_128_s32_m:
 **	mov	(z[0-9]+\.s), #128
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_s32_m, svint32_t,
@@ -361,7 +361,7 @@ TEST_UNIFORM_Z (dup_253_s32_m, svint32_t,
 /*
 ** dup_254_s32_m:
 **	mov	(z[0-9]+\.s), #254
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_s32_m, svint32_t,
@@ -371,7 +371,7 @@ TEST_UNIFORM_Z (dup_254_s32_m, svint32_t,
 /*
 ** dup_255_s32_m:
 **	mov	(z[0-9]+\.s), #255
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_s32_m, svint32_t,
@@ -423,7 +423,7 @@ TEST_UNIFORM_Z (dup_7ffd_s32_m, svint32_t,
 /*
 ** dup_7ffe_s32_m:
 **	mov	(z[0-9]+\.s), #32766
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_s32_m, svint32_t,
@@ -433,7 +433,7 @@ TEST_UNIFORM_Z (dup_7ffe_s32_m, svint32_t,
 /*
 ** dup_7fff_s32_m:
 **	mov	(z[0-9]+\.s), #32767
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_s32_m, svint32_t,
@@ -461,7 +461,7 @@ TEST_UNIFORM_Z (dup_m128_s32_m, svint32_t,
 /*
 ** dup_m129_s32_m:
 **	mov	(z[0-9]+\.s), #-129
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_s32_m, svint32_t,
@@ -481,7 +481,7 @@ TEST_UNIFORM_Z (dup_m254_s32_m, svint32_t,
 /*
 ** dup_m255_s32_m:
 **	mov	(z[0-9]+\.s), #-255
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_s32_m, svint32_t,
@@ -500,7 +500,7 @@ TEST_UNIFORM_Z (dup_m256_s32_m, svint32_t,
 /*
 ** dup_m257_s32_m:
 **	mov	(z[0-9]+\.s), #-257
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_s32_m, svint32_t,
@@ -538,7 +538,7 @@ TEST_UNIFORM_Z (dup_m7f00_s32_m, svint32_t,
 /*
 ** dup_m7f01_s32_m:
 **	mov	(z[0-9]+\.s), #-32513
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_s32_m, svint32_t,
@@ -558,7 +558,7 @@ TEST_UNIFORM_Z (dup_m7ffe_s32_m, svint32_t,
 /*
 ** dup_m7fff_s32_m:
 **	mov	(z[0-9]+\.s), #-32767
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_s32_m, svint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s64.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s64.c
index 39003b407a8..2c694e9d87e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s64.c
@@ -341,7 +341,7 @@ TEST_UNIFORM_Z (dup_127_s64_m, svint64_t,
 /*
 ** dup_128_s64_m:
 **	mov	(z[0-9]+\.d), #128
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_s64_m, svint64_t,
@@ -361,7 +361,7 @@ TEST_UNIFORM_Z (dup_253_s64_m, svint64_t,
 /*
 ** dup_254_s64_m:
 **	mov	(z[0-9]+\.d), #254
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_s64_m, svint64_t,
@@ -371,7 +371,7 @@ TEST_UNIFORM_Z (dup_254_s64_m, svint64_t,
 /*
 ** dup_255_s64_m:
 **	mov	(z[0-9]+\.d), #255
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_s64_m, svint64_t,
@@ -423,7 +423,7 @@ TEST_UNIFORM_Z (dup_7ffd_s64_m, svint64_t,
 /*
 ** dup_7ffe_s64_m:
 **	mov	(z[0-9]+\.d), #32766
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_s64_m, svint64_t,
@@ -433,7 +433,7 @@ TEST_UNIFORM_Z (dup_7ffe_s64_m, svint64_t,
 /*
 ** dup_7fff_s64_m:
 **	mov	(z[0-9]+\.d), #32767
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_s64_m, svint64_t,
@@ -461,7 +461,7 @@ TEST_UNIFORM_Z (dup_m128_s64_m, svint64_t,
 /*
 ** dup_m129_s64_m:
 **	mov	(z[0-9]+\.d), #-129
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_s64_m, svint64_t,
@@ -481,7 +481,7 @@ TEST_UNIFORM_Z (dup_m254_s64_m, svint64_t,
 /*
 ** dup_m255_s64_m:
 **	mov	(z[0-9]+\.d), #-255
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_s64_m, svint64_t,
@@ -500,7 +500,7 @@ TEST_UNIFORM_Z (dup_m256_s64_m, svint64_t,
 /*
 ** dup_m257_s64_m:
 **	mov	(z[0-9]+\.d), #-257
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_s64_m, svint64_t,
@@ -538,7 +538,7 @@ TEST_UNIFORM_Z (dup_m7f00_s64_m, svint64_t,
 /*
 ** dup_m7f01_s64_m:
 **	mov	(z[0-9]+\.d), #-32513
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_s64_m, svint64_t,
@@ -558,7 +558,7 @@ TEST_UNIFORM_Z (dup_m7ffe_s64_m, svint64_t,
 /*
 ** dup_m7fff_s64_m:
 **	mov	(z[0-9]+\.d), #-32767
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_s64_m, svint64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u16.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u16.c
index 4d2c8b9f299..9871ac2fdbf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u16.c
@@ -339,7 +339,7 @@ TEST_UNIFORM_Z (dup_127_u16_m, svuint16_t,
 /*
 ** dup_128_u16_m:
 **	mov	(z[0-9]+\.h), #128
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_u16_m, svuint16_t,
@@ -359,7 +359,7 @@ TEST_UNIFORM_Z (dup_253_u16_m, svuint16_t,
 /*
 ** dup_254_u16_m:
 **	mov	(z[0-9]+\.h), #254
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_u16_m, svuint16_t,
@@ -369,7 +369,7 @@ TEST_UNIFORM_Z (dup_254_u16_m, svuint16_t,
 /*
 ** dup_255_u16_m:
 **	mov	(z[0-9]+\.h), #255
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_u16_m, svuint16_t,
@@ -388,7 +388,7 @@ TEST_UNIFORM_Z (dup_256_u16_m, svuint16_t,
 /*
 ** dup_257_u16_m:
 **	mov	(z[0-9]+)\.b, #1
-**	sel	z0\.h, p0/m, \1\.h, z0\.h
+**	sel	z0\.h, p0, \1\.h, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_257_u16_m, svuint16_t,
@@ -426,7 +426,7 @@ TEST_UNIFORM_Z (dup_7ffd_u16_m, svuint16_t,
 /*
 ** dup_7ffe_u16_m:
 **	mov	(z[0-9]+\.h), #32766
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_u16_m, svuint16_t,
@@ -436,7 +436,7 @@ TEST_UNIFORM_Z (dup_7ffe_u16_m, svuint16_t,
 /*
 ** dup_7fff_u16_m:
 **	mov	(z[0-9]+\.h), #32767
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_u16_m, svuint16_t,
@@ -464,7 +464,7 @@ TEST_UNIFORM_Z (dup_m128_u16_m, svuint16_t,
 /*
 ** dup_m129_u16_m:
 **	mov	(z[0-9]+\.h), #-129
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_u16_m, svuint16_t,
@@ -484,7 +484,7 @@ TEST_UNIFORM_Z (dup_m254_u16_m, svuint16_t,
 /*
 ** dup_m255_u16_m:
 **	mov	(z[0-9]+\.h), #-255
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_u16_m, svuint16_t,
@@ -503,7 +503,7 @@ TEST_UNIFORM_Z (dup_m256_u16_m, svuint16_t,
 /*
 ** dup_m257_u16_m:
 **	mov	(z[0-9]+\.h), #-257
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_u16_m, svuint16_t,
@@ -513,7 +513,7 @@ TEST_UNIFORM_Z (dup_m257_u16_m, svuint16_t,
 /*
 ** dup_m258_u16_m:
 **	mov	(z[0-9]+)\.b, #-2
-**	sel	z0\.h, p0/m, \1\.h, z0\.h
+**	sel	z0\.h, p0, \1\.h, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m258_u16_m, svuint16_t,
@@ -546,7 +546,7 @@ TEST_UNIFORM_Z (dup_m7f00_u16_m, svuint16_t,
 /*
 ** dup_m7f01_u16_m:
 **	mov	(z[0-9]+\.h), #-32513
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_u16_m, svuint16_t,
@@ -566,7 +566,7 @@ TEST_UNIFORM_Z (dup_m7ffe_u16_m, svuint16_t,
 /*
 ** dup_m7fff_u16_m:
 **	mov	(z[0-9]+\.h), #-32767
-**	sel	z0\.h, p0/m, \1, z0\.h
+**	sel	z0\.h, p0, \1, z0\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_u16_m, svuint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u32.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u32.c
index ee193c61f8b..1fd5be9ccbd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u32.c
@@ -341,7 +341,7 @@ TEST_UNIFORM_Z (dup_127_u32_m, svuint32_t,
 /*
 ** dup_128_u32_m:
 **	mov	(z[0-9]+\.s), #128
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_u32_m, svuint32_t,
@@ -361,7 +361,7 @@ TEST_UNIFORM_Z (dup_253_u32_m, svuint32_t,
 /*
 ** dup_254_u32_m:
 **	mov	(z[0-9]+\.s), #254
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_u32_m, svuint32_t,
@@ -371,7 +371,7 @@ TEST_UNIFORM_Z (dup_254_u32_m, svuint32_t,
 /*
 ** dup_255_u32_m:
 **	mov	(z[0-9]+\.s), #255
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_u32_m, svuint32_t,
@@ -423,7 +423,7 @@ TEST_UNIFORM_Z (dup_7ffd_u32_m, svuint32_t,
 /*
 ** dup_7ffe_u32_m:
 **	mov	(z[0-9]+\.s), #32766
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_u32_m, svuint32_t,
@@ -433,7 +433,7 @@ TEST_UNIFORM_Z (dup_7ffe_u32_m, svuint32_t,
 /*
 ** dup_7fff_u32_m:
 **	mov	(z[0-9]+\.s), #32767
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_u32_m, svuint32_t,
@@ -461,7 +461,7 @@ TEST_UNIFORM_Z (dup_m128_u32_m, svuint32_t,
 /*
 ** dup_m129_u32_m:
 **	mov	(z[0-9]+\.s), #-129
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_u32_m, svuint32_t,
@@ -481,7 +481,7 @@ TEST_UNIFORM_Z (dup_m254_u32_m, svuint32_t,
 /*
 ** dup_m255_u32_m:
 **	mov	(z[0-9]+\.s), #-255
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_u32_m, svuint32_t,
@@ -500,7 +500,7 @@ TEST_UNIFORM_Z (dup_m256_u32_m, svuint32_t,
 /*
 ** dup_m257_u32_m:
 **	mov	(z[0-9]+\.s), #-257
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_u32_m, svuint32_t,
@@ -538,7 +538,7 @@ TEST_UNIFORM_Z (dup_m7f00_u32_m, svuint32_t,
 /*
 ** dup_m7f01_u32_m:
 **	mov	(z[0-9]+\.s), #-32513
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_u32_m, svuint32_t,
@@ -558,7 +558,7 @@ TEST_UNIFORM_Z (dup_m7ffe_u32_m, svuint32_t,
 /*
 ** dup_m7fff_u32_m:
 **	mov	(z[0-9]+\.s), #-32767
-**	sel	z0\.s, p0/m, \1, z0\.s
+**	sel	z0\.s, p0, \1, z0\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_u32_m, svuint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u64.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u64.c
index 876f75951b1..afac1b0ea91 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u64.c
@@ -341,7 +341,7 @@ TEST_UNIFORM_Z (dup_127_u64_m, svuint64_t,
 /*
 ** dup_128_u64_m:
 **	mov	(z[0-9]+\.d), #128
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_u64_m, svuint64_t,
@@ -361,7 +361,7 @@ TEST_UNIFORM_Z (dup_253_u64_m, svuint64_t,
 /*
 ** dup_254_u64_m:
 **	mov	(z[0-9]+\.d), #254
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_u64_m, svuint64_t,
@@ -371,7 +371,7 @@ TEST_UNIFORM_Z (dup_254_u64_m, svuint64_t,
 /*
 ** dup_255_u64_m:
 **	mov	(z[0-9]+\.d), #255
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_u64_m, svuint64_t,
@@ -423,7 +423,7 @@ TEST_UNIFORM_Z (dup_7ffd_u64_m, svuint64_t,
 /*
 ** dup_7ffe_u64_m:
 **	mov	(z[0-9]+\.d), #32766
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_u64_m, svuint64_t,
@@ -433,7 +433,7 @@ TEST_UNIFORM_Z (dup_7ffe_u64_m, svuint64_t,
 /*
 ** dup_7fff_u64_m:
 **	mov	(z[0-9]+\.d), #32767
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_u64_m, svuint64_t,
@@ -461,7 +461,7 @@ TEST_UNIFORM_Z (dup_m128_u64_m, svuint64_t,
 /*
 ** dup_m129_u64_m:
 **	mov	(z[0-9]+\.d), #-129
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_u64_m, svuint64_t,
@@ -481,7 +481,7 @@ TEST_UNIFORM_Z (dup_m254_u64_m, svuint64_t,
 /*
 ** dup_m255_u64_m:
 **	mov	(z[0-9]+\.d), #-255
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_u64_m, svuint64_t,
@@ -500,7 +500,7 @@ TEST_UNIFORM_Z (dup_m256_u64_m, svuint64_t,
 /*
 ** dup_m257_u64_m:
 **	mov	(z[0-9]+\.d), #-257
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_u64_m, svuint64_t,
@@ -538,7 +538,7 @@ TEST_UNIFORM_Z (dup_m7f00_u64_m, svuint64_t,
 /*
 ** dup_m7f01_u64_m:
 **	mov	(z[0-9]+\.d), #-32513
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_u64_m, svuint64_t,
@@ -558,7 +558,7 @@ TEST_UNIFORM_Z (dup_m7ffe_u64_m, svuint64_t,
 /*
 ** dup_m7fff_u64_m:
 **	mov	(z[0-9]+\.d), #-32767
-**	sel	z0\.d, p0/m, \1, z0\.d
+**	sel	z0\.d, p0, \1, z0\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_u64_m, svuint64_t,

[SVE ACLE] Tidy handling of expand routines (1)

This patch makes the lower-level expand routines calculate the
number of operands themselves, rather than passing it all the
way down from the top-level expand functions.


diff --git a/gcc/config/aarch64/aarch64-sve-builtins.c b/gcc/config/aarch64/aarch64-sve-builtins.c
index 1f239b54e1d..97e18dd7c1a 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.c
+++ b/gcc/config/aarch64/aarch64-sve-builtins.c
@@ -419,15 +419,12 @@ private:
 
   rtx expand_signed_pred_op (rtx_code, rtx_code, int);
   rtx expand_signed_pred_op (int, int, int);
-  rtx expand_via_unpred_direct_optab (optab op, unsigned int nops,
-				      unsigned int = 0);
-  rtx expand_via_unpred_insn (insn_code icode, unsigned int nops,
-			      unsigned int = 0);
-  rtx expand_via_pred_direct_optab (optab, unsigned int, unsigned int);
-  rtx expand_via_pred_insn (insn_code, unsigned int, unsigned int,
-			    unsigned int, bool);
+  rtx expand_via_unpred_direct_optab (optab, unsigned int = 0);
+  rtx expand_via_unpred_insn (insn_code, unsigned int = 0);
+  rtx expand_via_pred_direct_optab (optab, unsigned int);
+  rtx expand_via_pred_insn (insn_code, unsigned int, unsigned int, bool);
   rtx expand_via_signed_unpred_insn (rtx_code, rtx_code);
-  rtx expand_via_pred_x_insn (insn_code, unsigned int);
+  rtx expand_via_pred_x_insn (insn_code);
   rtx expand_pred_shift_right_imm (insn_code);
 
   void require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT);
@@ -1666,16 +1663,16 @@ function_expander::expand_add (unsigned int merge_argno)
   if (m_fi.pred == PRED_x)
     {
       if (type_suffixes[m_fi.types[0]].integer_p)
-	return expand_via_unpred_direct_optab (add_optab, 2);
+	return expand_via_unpred_direct_optab (add_optab);
       else
 	{
 	  /* Try to take advantage of unpredicated FP addition, rather than
 	     simply treating _x as _m.  */
 	  insn_code icode = code_for_aarch64_pred_add (get_mode (0));
-	  return expand_via_pred_x_insn (icode, 2);
+	  return expand_via_pred_x_insn (icode);
 	}
     }
-  return expand_via_pred_direct_optab (cond_add_optab, 2, merge_argno);
+  return expand_via_pred_direct_optab (cond_add_optab, merge_argno);
 }
 
 /* Expand a call to svasrd.  */
@@ -1691,7 +1688,7 @@ function_expander::expand_dup ()
 {
   if (m_fi.pred == PRED_none
       || m_fi.pred == PRED_x)
-    return expand_via_unpred_direct_optab (vec_duplicate_optab, 1, 1);
+    return expand_via_unpred_direct_optab (vec_duplicate_optab, 1);
   else
     {
       insn_code icode;
@@ -1699,12 +1696,12 @@ function_expander::expand_dup ()
       if (valid_for_const_vector_p (GET_MODE_INNER (mode), m_args.last ()))
 	{
 	  icode = code_for_vcond_mask (get_mode (0), get_mode (0));
-	  return expand_via_pred_insn (icode, 1, 0, 1, true);
+	  return expand_via_pred_insn (icode, 0, 1, true);
 	}
       else
 	{
 	  icode = code_for_aarch64_sel_dup (get_mode (0));
-	  return expand_via_pred_insn (icode, 1, 1, 1, true);
+	  return expand_via_pred_insn (icode, 1, 1, true);
 	}
     }
 }
@@ -1713,7 +1710,7 @@ function_expander::expand_dup ()
 rtx
 function_expander::expand_index ()
 {
-  return expand_via_unpred_direct_optab (vec_series_optab, 2, 2);
+  return expand_via_unpred_direct_optab (vec_series_optab, 2);
 }
 
 /* Expand a call to svmax.  */
@@ -1737,10 +1734,10 @@ function_expander::expand_mul ()
   if (m_fi.pred == PRED_x)
     {
       insn_code icode = code_for_aarch64_pred_mul (get_mode (0));
-      return expand_via_pred_x_insn (icode, 2);
+      return expand_via_pred_x_insn (icode);
     }
   else
-    return expand_via_pred_direct_optab (cond_smul_optab, 2, 1);
+    return expand_via_pred_direct_optab (cond_smul_optab, 1);
 }
 
 /* Expand a call to sqadd.  */
@@ -1790,38 +1787,38 @@ function_expander::expand_sub (bool reversed_p)
   if (m_fi.pred == PRED_x)
     {
       if (type_suffixes[m_fi.types[0]].integer_p)
-	return expand_via_unpred_direct_optab (sub_optab, 2);
+	return expand_via_unpred_direct_optab (sub_optab);
       else
 	{
 	  /* Try to take advantage of unpredicated FP addition, rather than
 	     simply treating _x as _m.  */
 	  insn_code icode = code_for_aarch64_pred_sub (mode);
-	  return expand_via_pred_x_insn (icode, 2);
+	  return expand_via_pred_x_insn (icode);
 	}
     }
-  return expand_via_pred_direct_optab (cond_sub_optab, 2, merge_argno);
+  return expand_via_pred_direct_optab (cond_sub_optab, merge_argno);
 }
 
 /* Implement the call using optab OP, which is an unpredicated direct
-   (i.e. single-mode) optab.  The optab takes NOPS input operands.
-   The last NSCALAR inputs are scalar, and map to scalar operands
-   in the underlying instruction.  */
+   (i.e. single-mode) optab.  The last NSCALAR inputs are scalar, and
+   map to scalar operands in the underlying instruction.  */
 rtx
-function_expander::expand_via_unpred_direct_optab (optab op, unsigned int nops,
+function_expander::expand_via_unpred_direct_optab (optab op,
 						   unsigned int nscalar)
 {
   machine_mode mode = get_mode (0);
   insn_code icode = direct_optab_handler (op, mode);
-  return expand_via_unpred_insn (icode, nops, nscalar);
+  return expand_via_unpred_insn (icode, nscalar);
 }
 
-/* Implement the call using instruction ICODE.  The instruction takes
-   NOPS input operands.  The last NSCALAR inputs are scalar, and map
-   to scalar operands in the underlying instruction.  */
+/* Implement the call using instruction ICODE.  The last NSCALAR inputs
+   are scalar, and map to scalar operands in the underlying instruction.  */
 rtx
-function_expander::expand_via_unpred_insn (insn_code icode, unsigned int nops,
+function_expander::expand_via_unpred_insn (insn_code icode,
 					   unsigned int nscalar)
 {
+  /* Discount the output operand.  */
+  unsigned int nops = insn_data[icode].n_operands - 1;
   /* Drop the predicate argument in the case of _x predication.  */
   unsigned int bias = (m_fi.pred == PRED_x ? 1 : 0);
   machine_mode mode = get_mode (0);
@@ -1838,32 +1835,30 @@ function_expander::expand_via_unpred_insn (insn_code icode, unsigned int nops,
 }
 
 /* Implement the call using optab OP, which is a predicated direct
-   (i.e. single-mode) optab.  The operation performed by OP takes NOPS
-   input operands (not counting the predicate and the fallback value).
-   The last NSCALAR inputs are scalar, and map to scalar operands
-   in the underlying instruction.  Merging forms use argument MERGE_ARGNO
+   (i.e. single-mode) optab.  Merging forms of OP use argument MERGE_ARGNO
    as the fallback value.  */
 rtx
-function_expander::expand_via_pred_direct_optab (optab op, unsigned int nops,
+function_expander::expand_via_pred_direct_optab (optab op,
 						 unsigned int merge_argno)
 {
   machine_mode mode = get_mode (0);
   insn_code icode = direct_optab_handler (op, mode);
-  return expand_via_pred_insn (icode, nops, 0, merge_argno, false);
+  return expand_via_pred_insn (icode, 0, merge_argno, false);
 }
 
 /* Implement the call using instruction ICODE.  The instruction takes
-   NOPS input operand (not counting the predicate and the fallback value).
    The last NSCALAR inputs are scalar, and map to scalar operands
    in the underlying instruction.  Merging forms use argument MERGE_ARGNO
    as the fallback value.  If PRED_LAST_P is true, predicated register is
    at the end.  */
 rtx
-function_expander::expand_via_pred_insn (insn_code icode, unsigned int nops,
+function_expander::expand_via_pred_insn (insn_code icode,
 					 unsigned int nscalar,
 					 unsigned int merge_argno,
 					 bool pred_last_p)
 {
+  /* Discount the output, predicate, and fallback value.  */
+  unsigned int nops = insn_data[icode].n_operands - 3;
   machine_mode mode = get_mode (0);
   machine_mode pred_mode = get_pred_mode (0);
 
@@ -1911,11 +1906,11 @@ function_expander::expand_via_pred_insn (insn_code icode, unsigned int nops,
 }
 
 /* Implement the call using instruction ICODE, which is a predicated
-   operation that returns arbitrary values for inactive lanes.  NOPS is
-   the number of inputs operands, not counting the governing predicate.  */
+   operation that returns arbitrary values for inactive lanes.  */
 rtx
-function_expander::expand_via_pred_x_insn (insn_code icode, unsigned int nops)
+function_expander::expand_via_pred_x_insn (insn_code icode)
 {
+  unsigned int nops = m_args.length () - 1;
   machine_mode mode = get_mode (0);
   machine_mode pred_mode = get_pred_mode (0);
 
@@ -1965,7 +1960,7 @@ function_expander::expand_signed_pred_op (rtx_code code_for_sint,
 	}
       else
 	icode = code_for_aarch64_pred (unspec_cond, get_mode (0));
-      return expand_via_pred_x_insn (icode, 2);
+      return expand_via_pred_x_insn (icode);
     }
   else
     {
@@ -1978,7 +1973,7 @@ function_expander::expand_signed_pred_op (rtx_code code_for_sint,
 	}
       else
 	icode = code_for_cond (unspec_cond, get_mode (0));
-      return expand_via_pred_insn (icode, 2, 0, 1, false);
+      return expand_via_pred_insn (icode, 0, 1, false);
     }
 }
 
@@ -2006,7 +2001,7 @@ function_expander::expand_signed_pred_op (int unspec_for_sint,
 	}
       else
 	icode = code_for_aarch64_pred (unspec_for_fp, get_mode (0));
-      return expand_via_pred_x_insn (icode, 2);
+      return expand_via_pred_x_insn (icode);
     }
   else
     {
@@ -2019,7 +2014,7 @@ function_expander::expand_signed_pred_op (int unspec_for_sint,
 	}
       else
 	icode = code_for_cond (unspec_for_fp, get_mode (0));
-      return expand_via_pred_insn (icode, 2, 0, 1, false);
+      return expand_via_pred_insn (icode, 0, 1, false);
     }
 }
 
@@ -2036,7 +2031,7 @@ function_expander::expand_via_signed_unpred_insn (rtx_code code_for_sint,
     icode = code_for_aarch64 (code_for_uint, code_for_uint, get_mode (0));
   else
     icode = code_for_aarch64 (code_for_sint, code_for_sint, get_mode (0));
-  return expand_via_unpred_insn (icode, 2);
+  return expand_via_unpred_insn (icode);
 }
 
 /* Expand a call to a SHAPE_shift_right_imm function using predicated
@@ -2046,7 +2041,7 @@ rtx
 function_expander::expand_pred_shift_right_imm (insn_code icode)
 {
   require_immediate_range (2, 1, GET_MODE_UNIT_BITSIZE (get_mode (0)));
-  return expand_via_pred_insn (icode, 2, 0, 1, false);
+  return expand_via_pred_insn (icode, 0, 1, false);
 }
 
 /* Require that argument ARGNO is a constant integer in the range

[SVE ACLE] Tidy handling of expand routines (2)

This patch adds a utility function for selecting the value to use
for inactive lanes of a predicated operation.  This in turn lets
us separate out the SEL-like functions from the cond_*_optab-like
functions, which in turn reduces the clutter in callers.


diff --git a/gcc/config/aarch64/aarch64-sve-builtins.c b/gcc/config/aarch64/aarch64-sve-builtins.c
index 97e18dd7c1a..6b4018c0e45 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.c
+++ b/gcc/config/aarch64/aarch64-sve-builtins.c
@@ -45,6 +45,9 @@
 
 namespace aarch64_sve {
 
+/* Used to represent the default merge argument index for _m functions.  */
+const unsigned int DEFAULT_MERGE_ARGNO = ~0U;
+
 /* Enumerates the SVE predicate and (data) vector types, together called
    "vector types" for brevity.  */
 enum vector_type {
@@ -421,8 +424,9 @@ private:
   rtx expand_signed_pred_op (int, int, int);
   rtx expand_via_unpred_direct_optab (optab, unsigned int = 0);
   rtx expand_via_unpred_insn (insn_code, unsigned int = 0);
-  rtx expand_via_pred_direct_optab (optab, unsigned int);
-  rtx expand_via_pred_insn (insn_code, unsigned int, unsigned int, bool);
+  rtx expand_via_pred_direct_optab (optab, unsigned int = DEFAULT_MERGE_ARGNO);
+  rtx expand_via_sel_insn (insn_code, unsigned int);
+  rtx expand_via_pred_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
   rtx expand_via_signed_unpred_insn (rtx_code, rtx_code);
   rtx expand_via_pred_x_insn (insn_code);
   rtx expand_pred_shift_right_imm (insn_code);
@@ -433,6 +437,8 @@ private:
 
   machine_mode get_mode (unsigned int);
   machine_mode get_pred_mode (unsigned int);
+  rtx get_fallback_value (machine_mode, unsigned int,
+			  unsigned int, unsigned int &);
 
   void add_output_operand (machine_mode);
   void add_input_operand (rtx, machine_mode);
@@ -1696,12 +1702,12 @@ function_expander::expand_dup ()
       if (valid_for_const_vector_p (GET_MODE_INNER (mode), m_args.last ()))
 	{
 	  icode = code_for_vcond_mask (get_mode (0), get_mode (0));
-	  return expand_via_pred_insn (icode, 0, 1, true);
+	  return expand_via_sel_insn (icode, 0);
 	}
       else
 	{
 	  icode = code_for_aarch64_sel_dup (get_mode (0));
-	  return expand_via_pred_insn (icode, 1, 1, true);
+	  return expand_via_sel_insn (icode, 1);
 	}
     }
 }
@@ -1737,7 +1743,7 @@ function_expander::expand_mul ()
       return expand_via_pred_x_insn (icode);
     }
   else
-    return expand_via_pred_direct_optab (cond_smul_optab, 1);
+    return expand_via_pred_direct_optab (cond_smul_optab);
 }
 
 /* Expand a call to sqadd.  */
@@ -1843,65 +1849,65 @@ function_expander::expand_via_pred_direct_optab (optab op,
 {
   machine_mode mode = get_mode (0);
   insn_code icode = direct_optab_handler (op, mode);
-  return expand_via_pred_insn (icode, 0, merge_argno, false);
+  return expand_via_pred_insn (icode, merge_argno);
 }
 
-/* Implement the call using instruction ICODE.  The instruction takes
+/* Implement the call using instruction ICODE, which is a select-like
+   operation with the following operands:
+
+   0: output
+   1: true value
+   2: false value
+   3: predicate
+
    The last NSCALAR inputs are scalar, and map to scalar operands
-   in the underlying instruction.  Merging forms use argument MERGE_ARGNO
-   as the fallback value.  If PRED_LAST_P is true, predicated register is
-   at the end.  */
+   in the underlying instruction.  */
+rtx
+function_expander::expand_via_sel_insn (insn_code icode,
+					unsigned int nscalar)
+{
+  machine_mode mode = get_mode (0);
+  machine_mode pred_mode = get_pred_mode (0);
+
+  unsigned int opno = 0;
+  rtx false_arg = get_fallback_value (mode, 1, 0, opno);
+  rtx pred_arg = m_args[opno++];
+  rtx true_arg = m_args[opno++];
+
+  add_output_operand (mode);
+  if (nscalar)
+    add_input_operand (true_arg, GET_MODE_INNER (mode));
+  else
+    add_input_operand (true_arg, mode);
+  add_input_operand (false_arg, mode);
+  add_input_operand (pred_arg, pred_mode);
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which does the equivalent of:
+
+     OUTPUT = COND ? FN (INPUTS) : FALLBACK;
+
+   The operands are in the order above: OUTPUT, COND, INPUTS and FALLBACK.
+   Merging forms use argument MERGE_ARGNO as the fallback value.  */
 rtx
 function_expander::expand_via_pred_insn (insn_code icode,
-					 unsigned int nscalar,
-					 unsigned int merge_argno,
-					 bool pred_last_p)
+					 unsigned int merge_argno)
 {
   /* Discount the output, predicate, and fallback value.  */
   unsigned int nops = insn_data[icode].n_operands - 3;
   machine_mode mode = get_mode (0);
   machine_mode pred_mode = get_pred_mode (0);
 
+  unsigned int opno = 0;
+  rtx fallback_arg = get_fallback_value (mode, nops, merge_argno, opno);
+  rtx pred_arg = m_args[opno++];
+
   add_output_operand (mode);
-  if (nops == 1 && m_fi.pred == PRED_m)
-    {
-      /* For unary ops, the fallback value is provided by a separate
-	 argument that is passed before the governing predicate.  */
-      /* If the predicate should go first.  */
-      if (!pred_last_p)
-	add_input_operand (m_args[1], pred_mode);
-      /* If the only input is vector or scalar.  */
-      if (nscalar)
-	add_input_operand (m_args[2], GET_MODE_INNER (mode));
-      else
-	add_input_operand (m_args[2], mode);
-      add_input_operand (m_args[0], mode);
-      /* If the predicate should go last.  */
-      if (pred_last_p)
-	add_input_operand (m_args[1], pred_mode);
-    }
-  else
-    {
-      unsigned int i = 0;
-      /* If the predicate should go first.  */
-      if (!pred_last_p)
-	add_input_operand (m_args[0], pred_mode);
-      /* First vector inputs.  */
-      for (; i < nops - nscalar; ++i)
-	add_input_operand (m_args[i + 1], mode);
-      /* Rest are scalar.  */
-      for (; i < nops; ++i)
-	add_input_operand (m_args[i + 1], GET_MODE_INNER (mode));
-      if (m_fi.pred == PRED_z)
-	/* Use zero as the fallback value.  */
-	add_input_operand (CONST0_RTX (mode), mode);
-      else
-	/* Use the first data input as the fallback value.  */
-	add_input_operand (copy_rtx (m_ops[merge_argno + 1].value), mode);
-      /* If the predicate should go last.  */
-      if (pred_last_p)
-	add_input_operand (m_args[0], pred_mode);
-    }
+  add_input_operand (pred_arg, pred_mode);
+  for (unsigned int i = 0; i < nops; ++i)
+    add_input_operand (m_args[opno + i], mode);
+  add_input_operand (fallback_arg, mode);
   return generate_insn (icode);
 }
 
@@ -1973,7 +1979,7 @@ function_expander::expand_signed_pred_op (rtx_code code_for_sint,
 	}
       else
 	icode = code_for_cond (unspec_cond, get_mode (0));
-      return expand_via_pred_insn (icode, 0, 1, false);
+      return expand_via_pred_insn (icode);
     }
 }
 
@@ -2014,7 +2020,7 @@ function_expander::expand_signed_pred_op (int unspec_for_sint,
 	}
       else
 	icode = code_for_cond (unspec_for_fp, get_mode (0));
-      return expand_via_pred_insn (icode, 0, 1, false);
+      return expand_via_pred_insn (icode);
     }
 }
 
@@ -2041,7 +2047,7 @@ rtx
 function_expander::expand_pred_shift_right_imm (insn_code icode)
 {
   require_immediate_range (2, 1, GET_MODE_UNIT_BITSIZE (get_mode (0)));
-  return expand_via_pred_insn (icode, 0, 1, false);
+  return expand_via_pred_insn (icode);
 }
 
 /* Require that argument ARGNO is a constant integer in the range
@@ -2097,6 +2103,35 @@ function_expander::get_pred_mode (unsigned int i)
   return aarch64_sve_pred_mode (elem_bytes).require ();
 }
 
+/* For a function that does the equivalent of:
+
+     OUTPUT = COND ? FN (INPUTS) : FALLBACK;
+
+   return the value of FALLBACK.
+
+   MODE is the mode of the value.  NOPS is the number of operands
+   in INPUTS.  MERGE_ARGNO is member of m_args to use for _m functions,
+   or DEFAULT_MERGE_ARGNO if we should apply the usual rules.
+
+   OPNO is the caller's index into m_args.  If the returned value is
+   argument 0 (as for unary _m operations), increment OPNO past the
+   returned argument.  */
+rtx
+function_expander::get_fallback_value (machine_mode mode, unsigned int nops,
+				       unsigned int merge_argno,
+				       unsigned int &opno)
+{
+  if (m_fi.pred == PRED_z)
+    return CONST0_RTX (mode);
+
+  if (merge_argno == DEFAULT_MERGE_ARGNO)
+    merge_argno = nops == 1 && m_fi.pred == PRED_m ? 0 : 1;
+
+  if (merge_argno == 0)
+    return m_args[opno++];
+  return m_args[merge_argno];
+}
+
 /* Add an output operand of mode MODE to the instruction, binding it
    to the preferred target rtx if possible.  */
 void

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]