[PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select
Richard Henderson
rth@twiddle.net
Mon Jul 2 03:41:00 GMT 2018
* config/aarch64/aarch64-protos.h, config/aarch64/aarch64.c
(aarch64_sve_prepare_conditional_op): Remove.
* config/aarch64/aarch64-sve.md (cond_<SVE_INT_BINARY><SVE_I>):
Allow aarch64_simd_reg_or_zero as select operand; remove
the aarch64_sve_prepare_conditional_op call.
(cond_<SVE_INT_BINARY_SD><SVE_SDI>): Likewise.
(cond_<SVE_COND_FP_BINARY><SVE_F>): Likewise.
(*cond_<SVE_INT_BINARY><SVE_I>_z): New pattern.
(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_z): New pattern.
(*cond_<SVE_COND_FP_BINARY><SVE_F>_z): New pattern.
(*cond_<SVE_INT_BINARY><SVE_I>_any): New pattern.
(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_any): New pattern.
(*cond_<SVE_COND_FP_BINARY><SVE_F>_any): New pattern
and a splitters to match all of the *_any patterns.
* config/aarch64/predicates.md (aarch64_sve_any_binary_operator): New.
---
gcc/config/aarch64/aarch64-protos.h | 1 -
gcc/config/aarch64/aarch64.c | 54 ----------
gcc/config/aarch64/aarch64-sve.md | 154 ++++++++++++++++++++++++----
gcc/config/aarch64/predicates.md | 3 +
4 files changed, 136 insertions(+), 76 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 87c6ae20278..514ddc457ca 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -513,7 +513,6 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
-void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
#endif /* RTX_CODE */
void aarch64_init_builtins (void);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3af7e98e166..d75d45f4b8b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -16058,60 +16058,6 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
}
-/* Prepare a cond_<optab><mode> operation that has the operands
- given by OPERANDS, where:
-
- - operand 0 is the destination
- - operand 1 is a predicate
- - operands 2 to NOPS - 2 are the operands to an operation that is
- performed for active lanes
- - operand NOPS - 1 specifies the values to use for inactive lanes.
-
- COMMUTATIVE_P is true if operands 2 and 3 are commutative. In that case,
- no pattern is provided for a tie between operands 3 and NOPS - 1. */
-
-void
-aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
- bool commutative_p)
-{
- /* We can do the operation directly if the "else" value matches one
- of the other inputs. */
- for (unsigned int i = 2; i < nops - 1; ++i)
- if (rtx_equal_p (operands[i], operands[nops - 1]))
- {
- if (i == 3 && commutative_p)
- std::swap (operands[2], operands[3]);
- return;
- }
-
- /* If the "else" value is different from the other operands, we have
- the choice of doing a SEL on the output or a SEL on an input.
- Neither choice is better in all cases, but one advantage of
- selecting the input is that it can avoid a move when the output
- needs to be distinct from the inputs. E.g. if operand N maps to
- register N, selecting the output would give:
-
- MOVPRFX Z0.S, Z2.S
- ADD Z0.S, P1/M, Z0.S, Z3.S
- SEL Z0.S, P1, Z0.S, Z4.S
-
- whereas selecting the input avoids the MOVPRFX:
-
- SEL Z0.S, P1, Z2.S, Z4.S
- ADD Z0.S, P1/M, Z0.S, Z3.S.
-
- ??? Matching the other input can produce
-
- MOVPRFX Z4.S, P1/M, Z2.S
- ADD Z4.S, P1/M, Z4.S, Z3.S
- */
- machine_mode mode = GET_MODE (operands[0]);
- rtx temp = gen_reg_rtx (mode);
- rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
- emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
- operands[2] = operands[nops - 1] = temp;
-}
-
/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
true. However due to issues with register allocation it is preferable
to avoid tieing integer scalar and FP scalar modes. Executing integer
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index db16affc093..b16d0455159 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1817,13 +1817,10 @@
(SVE_INT_BINARY:SVE_I
(match_operand:SVE_I 2 "register_operand")
(match_operand:SVE_I 3 "register_operand"))
- (match_operand:SVE_I 4 "register_operand")]
+ (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
-{
- bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
- aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
-})
+)
(define_expand "cond_<optab><mode>"
[(set (match_operand:SVE_SDI 0 "register_operand")
@@ -1832,19 +1829,12 @@
(SVE_INT_BINARY_SD:SVE_SDI
(match_operand:SVE_SDI 2 "register_operand")
(match_operand:SVE_SDI 3 "register_operand"))
- (match_operand:SVE_SDI 4 "register_operand")]
+ (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
-{
- bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
- aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
-})
-
-;; Predicated integer operations.
-;; All other things being equal, prefer the patterns for which the
-;; destination matches the select input, as that gives us the most
-;; freedom to swap the other operands.
+)
+;; Predicated integer operations with select matching the output operand.
(define_insn "*cond_<optab><mode>_0"
[(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w")
(unspec:SVE_I
@@ -1945,6 +1935,87 @@
[(set_attr "movprfx" "*,yes")]
)
+;; Predicated integer operations with select matching zero.
+(define_insn "*cond_<optab><mode>_z"
+ [(set (match_operand:SVE_I 0 "register_operand" "=&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w")
+ (match_operand:SVE_I 3 "register_operand" "w"))
+ (match_operand:SVE_I 4 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "yes")]
+)
+
+(define_insn "*cond_<optab><mode>_z"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand" "w")
+ (match_operand:SVE_SDI 3 "register_operand" "w"))
+ (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "yes")]
+)
+
+;; Synthetic predications with select unmatched.
+(define_insn "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_I 0 "register_operand" "=&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w")
+ (match_operand:SVE_I 3 "register_operand" "w"))
+ (match_operand:SVE_I 4 "register_operand" "w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "#"
+)
+
+(define_insn "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_INT_BINARY_SD:SVE_I
+ (match_operand:SVE_SDI 2 "register_operand" "w")
+ (match_operand:SVE_SDI 3 "register_operand" "w"))
+ (match_operand:SVE_SDI 4 "register_operand" "w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "#"
+)
+
+(define_split
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator"
+ [(match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "register_operand")])
+ (match_operand:SVE_I 4 "register_operand")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && reload_completed
+ && !(rtx_equal_p (operands[0], operands[4])
+ || rtx_equal_p (operands[2], operands[4])
+ || rtx_equal_p (operands[3], operands[4]))"
+ ; Not matchable by any one insn or movprfx insn. We need a separate select.
+ [(set (match_dup 0)
+ (unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)]
+ UNSPEC_SEL))
+ (set (match_dup 0)
+ (unspec:SVE_I
+ [(match_dup 1)
+ (match_op_dup 5 [(match_dup 0) (match_dup 3)])
+ (match_dup 0)]
+ UNSPEC_SEL))]
+)
+
;; Set operand 0 to the last active element in operand 3, or to tied
;; operand 1 if no elements are active.
(define_insn "fold_extract_last_<mode>"
@@ -2731,12 +2802,10 @@
[(match_operand:SVE_F 2 "register_operand")
(match_operand:SVE_F 3 "register_operand")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_F 4 "register_operand")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
-{
- aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
-})
+)
;; Predicated floating-point operations with select matching output.
(define_insn "*cond_<optab><mode>_0"
@@ -2744,8 +2813,7 @@
(unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:SVE_F
- [(match_dup 1)
- (match_operand:SVE_F 2 "register_operand" "0, w, w")
+ [(match_operand:SVE_F 2 "register_operand" "0, w, w")
(match_operand:SVE_F 3 "register_operand" "w, 0, w")]
SVE_COND_FP_BINARY)
(match_dup 0)]
@@ -2794,6 +2862,50 @@
[(set_attr "movprfx" "*,yes")]
)
+;; Predicated floating-point operations with select matching zero.
+(define_insn "*cond_<optab><mode>_z"
+ [(set (match_operand:SVE_F 0 "register_operand" "=&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w")]
+ SVE_COND_FP_BINARY)
+ (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "yes")]
+)
+
+;; Synthetic predication of floating-point operations with select unmatched.
+(define_insn_and_split "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_F 0 "register_operand" "=&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w")]
+ SVE_COND_FP_BINARY)
+ (match_operand:SVE_F 4 "register_operand" "w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "#"
+ "&& reload_completed
+ && !(rtx_equal_p (operands[0], operands[4])
+ || rtx_equal_p (operands[2], operands[4])
+ || rtx_equal_p (operands[3], operands[4]))"
+ ; Not matchable by any one insn or movprfx insn. We need a separate select.
+ [(set (match_dup 0)
+ (unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL))
+ (set (match_dup 0)
+ (unspec:SVE_F
+ [(match_dup 1)
+ (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY)
+ (match_dup 0)]
+ UNSPEC_SEL))]
+)
+
;; Shift an SVE vector left and insert a scalar into element 0.
(define_insn "vec_shl_insert_<mode>"
[(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 7aec76d681f..4acbc218a8d 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -625,3 +625,6 @@
;; A special predicate that doesn't match a particular mode.
(define_special_predicate "aarch64_any_register_operand"
(match_code "reg"))
+
+(define_predicate "aarch64_sve_any_binary_operator"
+ (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))
--
2.17.1
More information about the Gcc-patches
mailing list