This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[committed][AArch64] Add more SVE FMLA and FMAD /z alternatives
- From: Richard Sandiford <richard dot sandiford at arm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Kugan Vivekanandarajah <kugan dot vivekanandarajah at linaro dot org>
- Date: Thu, 15 Aug 2019 09:40:35 +0100
- Subject: [committed][AArch64] Add more SVE FMLA and FMAD /z alternatives
This patch makes the floating-point conditional FMA patterns provide the
same /z alternatives as the integer patterns added by a previous patch.
We can handle cases in which individual inputs are allocated to the same
register as the output, so we don't need to force all registers to be
different.
Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274516.
Richard
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
gcc/
* config/aarch64/aarch64-sve.md
(*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_any): Add /z
alternatives in which one of the inputs is in the same register
as the output.
gcc/testsuite/
* gcc.target/aarch64/sve/cond_mla_5.c: Allow FMAD as well as FMLA
and FMSB as well as FMLS.
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md 2019-08-15 09:37:10.528856480 +0100
+++ gcc/config/aarch64/aarch64-sve.md 2019-08-15 09:38:53.656095524 +0100
@@ -3844,17 +3844,17 @@ (define_insn_and_rewrite "*cond_<optab><
;; Predicated floating-point ternary operations, merging with an
;; independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
+ [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
(unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
(unspec:SVE_F
[(match_operand 6)
(match_operand:SI 7 "aarch64_sve_gp_strictness")
- (match_operand:SVE_F 2 "register_operand" "w, w, w")
- (match_operand:SVE_F 3 "register_operand" "w, w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w, w")]
+ (match_operand:SVE_F 2 "register_operand" "w, w, 0, w, w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w, w, 0, w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, 0, w, w, w, w")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+ (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
UNSPEC_SEL))]
"TARGET_SVE
&& !rtx_equal_p (operands[2], operands[5])
@@ -3863,6 +3863,9 @@ (define_insn_and_rewrite "*cond_<optab><
&& aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
"@
movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
#"
"&& 1"
Index: gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c 2019-08-15 09:22:03.047558159 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c 2019-08-15 09:38:53.656095524 +0100
@@ -39,13 +39,13 @@ TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
-/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
-/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
-/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
-/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
-/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
-/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z,} 2 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z,} 4 } } */