[PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select

Mon Jul 2 03:41:00 GMT 2018

* config/aarch64/aarch64-protos.h, config/aarch64/aarch64.c
	(aarch64_sve_prepare_conditional_op): Remove.
	* config/aarch64/aarch64-sve.md (cond_<SVE_INT_BINARY><SVE_I>):
	Allow aarch64_simd_reg_or_zero as select operand; remove
	the aarch64_sve_prepare_conditional_op call.
	(cond_<SVE_INT_BINARY_SD><SVE_SDI>): Likewise.
	(cond_<SVE_COND_FP_BINARY><SVE_F>): Likewise.
	(*cond_<SVE_INT_BINARY><SVE_I>_z): New pattern.
	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_z): New pattern.
	(*cond_<SVE_COND_FP_BINARY><SVE_F>_z): New pattern.
	(*cond_<SVE_INT_BINARY><SVE_I>_any): New pattern.
	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_any): New pattern.
	(*cond_<SVE_COND_FP_BINARY><SVE_F>_any): New pattern
	and a splitters to match all of the *_any patterns.
	* config/aarch64/predicates.md (aarch64_sve_any_binary_operator): New.
---
 gcc/config/aarch64/aarch64-protos.h |   1 -
 gcc/config/aarch64/aarch64.c        |  54 ----------
 gcc/config/aarch64/aarch64-sve.md   | 154 ++++++++++++++++++++++++----
 gcc/config/aarch64/predicates.md    |   3 +
 4 files changed, 136 insertions(+), 76 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 87c6ae20278..514ddc457ca 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -513,7 +513,6 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
 void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
 bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
-void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
 #endif /* RTX_CODE */
 
 void aarch64_init_builtins (void);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3af7e98e166..d75d45f4b8b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -16058,60 +16058,6 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
   emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
 }
 
-/* Prepare a cond_<optab><mode> operation that has the operands
-   given by OPERANDS, where:
-
-   - operand 0 is the destination
-   - operand 1 is a predicate
-   - operands 2 to NOPS - 2 are the operands to an operation that is
-     performed for active lanes
-   - operand NOPS - 1 specifies the values to use for inactive lanes.
-
-   COMMUTATIVE_P is true if operands 2 and 3 are commutative.  In that case,
-   no pattern is provided for a tie between operands 3 and NOPS - 1.  */
-
-void
-aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
-				    bool commutative_p)
-{
-  /* We can do the operation directly if the "else" value matches one
-     of the other inputs.  */
-  for (unsigned int i = 2; i < nops - 1; ++i)
-    if (rtx_equal_p (operands[i], operands[nops - 1]))
-      {
-	if (i == 3 && commutative_p)
-	  std::swap (operands[2], operands[3]);
-	return;
-      }
-
-  /* If the "else" value is different from the other operands, we have
-     the choice of doing a SEL on the output or a SEL on an input.
-     Neither choice is better in all cases, but one advantage of
-     selecting the input is that it can avoid a move when the output
-     needs to be distinct from the inputs.  E.g. if operand N maps to
-     register N, selecting the output would give:
-
-	MOVPRFX Z0.S, Z2.S
-	ADD Z0.S, P1/M, Z0.S, Z3.S
-	SEL Z0.S, P1, Z0.S, Z4.S
-
-     whereas selecting the input avoids the MOVPRFX:
-
-	SEL Z0.S, P1, Z2.S, Z4.S
-	ADD Z0.S, P1/M, Z0.S, Z3.S.
-
-     ??? Matching the other input can produce
-
-	MOVPRFX Z4.S, P1/M, Z2.S
-	ADD Z4.S, P1/M, Z4.S, Z3.S
-   */
-  machine_mode mode = GET_MODE (operands[0]);
-  rtx temp = gen_reg_rtx (mode);
-  rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
-  emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
-  operands[2] = operands[nops - 1] = temp;
-}
-
 /* Implement TARGET_MODES_TIEABLE_P.  In principle we should always return
    true.  However due to issues with register allocation it is preferable
    to avoid tieing integer scalar and FP scalar modes.  Executing integer
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index db16affc093..b16d0455159 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1817,13 +1817,10 @@
 	   (SVE_INT_BINARY:SVE_I
 	     (match_operand:SVE_I 2 "register_operand")
 	     (match_operand:SVE_I 3 "register_operand"))
-	   (match_operand:SVE_I 4 "register_operand")]
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-{
-  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
-  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
-})
+)
 
 (define_expand "cond_<optab><mode>"
   [(set (match_operand:SVE_SDI 0 "register_operand")
@@ -1832,19 +1829,12 @@
 	   (SVE_INT_BINARY_SD:SVE_SDI
 	     (match_operand:SVE_SDI 2 "register_operand")
 	     (match_operand:SVE_SDI 3 "register_operand"))
-	   (match_operand:SVE_SDI 4 "register_operand")]
+	   (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-{
-  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
-  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
-})
-
-;; Predicated integer operations.
-;; All other things being equal, prefer the patterns for which the
-;; destination matches the select input, as that gives us the most
-;; freedom to swap the other operands.
+)
 
+;; Predicated integer operations with select matching the output operand.
 (define_insn "*cond_<optab><mode>_0"
   [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w")
 	(unspec:SVE_I
@@ -1945,6 +1935,87 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; Predicated integer operations with select matching zero.
+(define_insn "*cond_<optab><mode>_z"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w")
+	     (match_operand:SVE_I 3 "register_operand" "w"))
+	   (match_operand:SVE_I 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn "*cond_<optab><mode>_z"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
+	(unspec:SVE_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY_SD:SVE_SDI
+	     (match_operand:SVE_SDI 2 "register_operand" "w")
+	     (match_operand:SVE_SDI 3 "register_operand" "w"))
+	   (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Synthetic predications with select unmatched.
+(define_insn "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w")
+	     (match_operand:SVE_I 3 "register_operand" "w"))
+	   (match_operand:SVE_I 4 "register_operand"   "w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "#"
+)
+
+(define_insn "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
+	(unspec:SVE_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY_SD:SVE_I
+	     (match_operand:SVE_SDI 2 "register_operand" "w")
+	     (match_operand:SVE_SDI 3 "register_operand" "w"))
+	   (match_operand:SVE_SDI 4 "register_operand"   "w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "#"
+)
+
+(define_split
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator"
+	     [(match_operand:SVE_I 2 "register_operand")
+	      (match_operand:SVE_I 3 "register_operand")])
+	   (match_operand:SVE_I 4 "register_operand")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && reload_completed
+   && !(rtx_equal_p (operands[0], operands[4])
+        || rtx_equal_p (operands[2], operands[4])
+        || rtx_equal_p (operands[3], operands[4]))"
+  ; Not matchable by any one insn or movprfx insn.  We need a separate select.
+  [(set (match_dup 0)
+	(unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)]
+                      UNSPEC_SEL))
+   (set (match_dup 0)
+	(unspec:SVE_I
+	  [(match_dup 1)
+	   (match_op_dup 5 [(match_dup 0) (match_dup 3)])
+           (match_dup 0)]
+	  UNSPEC_SEL))]
+)
+
 ;; Set operand 0 to the last active element in operand 3, or to tied
 ;; operand 1 if no elements are active.
 (define_insn "fold_extract_last_<mode>"
@@ -2731,12 +2802,10 @@
 	     [(match_operand:SVE_F 2 "register_operand")
 	      (match_operand:SVE_F 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
-	   (match_operand:SVE_F 4 "register_operand")]
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-{
-  aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
-})
+)
 
 ;; Predicated floating-point operations with select matching output.
 (define_insn "*cond_<optab><mode>_0"
@@ -2744,8 +2813,7 @@
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
-	      (match_operand:SVE_F 2 "register_operand" "0, w, w")
+	     [(match_operand:SVE_F 2 "register_operand" "0, w, w")
 	      (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 0)]
@@ -2794,6 +2862,50 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; Predicated floating-point operations with select matching zero.
+(define_insn "*cond_<optab><mode>_z"
+  [(set (match_operand:SVE_F 0 "register_operand" "=&w")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_F
+	     [(match_operand:SVE_F 2 "register_operand" "w")
+	      (match_operand:SVE_F 3 "register_operand" "w")]
+	     SVE_COND_FP_BINARY)
+	   (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Synthetic predication of floating-point operations with select unmatched.
+(define_insn_and_split "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_F 0 "register_operand" "=&w")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_F
+	     [(match_operand:SVE_F 2 "register_operand" "w")
+	      (match_operand:SVE_F 3 "register_operand" "w")]
+	     SVE_COND_FP_BINARY)
+	   (match_operand:SVE_F 4 "register_operand" "w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "#"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[4])
+        || rtx_equal_p (operands[2], operands[4])
+        || rtx_equal_p (operands[3], operands[4]))"
+  ; Not matchable by any one insn or movprfx insn.  We need a separate select.
+  [(set (match_dup 0)
+	(unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL))
+   (set (match_dup 0)
+	(unspec:SVE_F
+	  [(match_dup 1)
+	   (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY)
+           (match_dup 0)]
+	  UNSPEC_SEL))]
+)
+
 ;; Shift an SVE vector left and insert a scalar into element 0.
 (define_insn "vec_shl_insert_<mode>"
   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 7aec76d681f..4acbc218a8d 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -625,3 +625,6 @@
 ;; A special predicate that doesn't match a particular mode.
 (define_special_predicate "aarch64_any_register_operand"
   (match_code "reg"))
+
+(define_predicate "aarch64_sve_any_binary_operator"
+  (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))
-- 
2.17.1