This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[AArch64][SVE] Remove unnecessary PTRUEs from integer arithmetic


When using the unpredicated immediate forms of MUL, LSL, LSR and ASR,
the rtl patterns would still have the predicate operand we created for
the other forms.  This patch splits the patterns after reload in order
to get rid of the predicate, like we already do for WHILE.

Tested on aarch64-linux-gnu and applied.

Richard


2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/aarch64-sve.md (*mul<mode>3, *v<optab><mode>3):
	Split the patterns after reload if we don't need the predicate
	operand.
	(*post_ra_mul<mode>3, *post_ra_v<optab><mode>3): New patterns.

gcc/testsuite/
	* gcc.target/aarch64/sve/pred_elim_2.c: New test.

Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2018-12-07 15:01:42.602176516 +0000
+++ gcc/config/aarch64/aarch64-sve.md	2018-12-07 15:02:00.230028176 +0000
@@ -936,7 +936,7 @@ (define_expand "mul<mode>3"
 ;; predicate for the first alternative, but using Upa or X isn't likely
 ;; to gain much and would make the instruction seem less uniform to the
 ;; register allocator.
-(define_insn "*mul<mode>3"
+(define_insn_and_split "*mul<mode>3"
   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_I
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
@@ -946,12 +946,30 @@ (define_insn "*mul<mode>3"
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
-   mul\t%0.<Vetype>, %0.<Vetype>, #%3
+   #
    mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && !register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
+  ""
   [(set_attr "movprfx" "*,*,yes")]
 )
 
+;; Unpredicated multiplications by a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_mul<mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(mult:SVE_I
+	  (match_operand:SVE_I 1 "register_operand" "0")
+	  (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
+  "TARGET_SVE && reload_completed"
+  "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
+)
+
 (define_insn "*madd<mode>"
   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 	(plus:SVE_I
@@ -1232,7 +1250,7 @@ (define_expand "v<optab><mode>3"
 ;; actually need the predicate for the first alternative, but using Upa
 ;; or X isn't likely to gain much and would make the instruction seem
 ;; less uniform to the register allocator.
-(define_insn "*v<optab><mode>3"
+(define_insn_and_split "*v<optab><mode>3"
   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_I
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
@@ -1242,12 +1260,28 @@ (define_insn "*v<optab><mode>3"
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
-   <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
+   #
    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& reload_completed
+   && !register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
+  ""
   [(set_attr "movprfx" "*,*,yes")]
 )
 
+;; Unpredicated shift operations by a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_v<optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(ASHIFT:SVE_I
+	  (match_operand:SVE_I 1 "register_operand" "w")
+	  (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
+  "TARGET_SVE && reload_completed"
+  "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
+)
+
 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
 ;; shifts above.
 (define_expand "<ASHIFT:optab><mode>3"
Index: gcc/testsuite/gcc.target/aarch64/sve/pred_elim_2.c
===================================================================
--- /dev/null	2018-11-29 13:15:04.463550658 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/pred_elim_2.c	2018-12-07 15:02:00.230028176 +0000
@@ -0,0 +1,31 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_OP(NAME, TYPE, OP)					\
+  void								\
+  NAME##_##TYPE (TYPE *restrict a, TYPE *restrict b, int n)	\
+  {								\
+    for (int i = 0; i < n; ++i)					\
+      a[i] = b[i] OP;						\
+  }
+
+#define TEST_TYPE(TYPE) \
+  TEST_OP (shl, TYPE, << 6) \
+  TEST_OP (shr, TYPE, >> 6) \
+  TEST_OP (mult, TYPE, * 0x2b)
+
+TEST_TYPE (int8_t)
+TEST_TYPE (int16_t)
+TEST_TYPE (int32_t)
+TEST_TYPE (int64_t)
+TEST_TYPE (uint8_t)
+TEST_TYPE (uint16_t)
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+
+/* { dg-final { scan-assembler-times {\tlsl\t} 8 } } */
+/* { dg-final { scan-assembler-times {\tlsr\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tasr\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tmul\t} 8 } } */
+/* { dg-final { scan-assembler-not {\tptrue\t} } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]