This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][AArch64]Add vec_shr pattern for 64-bit vectors using ush{l,r}; enable tests.



...Patch attached...

Alan Lawrence wrote:
Following recent vectorizer changes to reductions via shifts, AArch64 will now reduce loops such as this

unsigned char in[8] = {1, 3, 5, 7, 9, 11, 13, 15};

int
main (unsigned char argc, char **argv)
{
   unsigned char prod = 1;

   /* Prevent constant propagation of the entire loop below.  */
   asm volatile ("" : : : "memory");

   for (unsigned char i = 0; i < 8; i++)
     prod *= in[i];

   if (prod != 17)
       __builtin_printf("Failed %d\n", prod);

   return 0;
}

using an 'ext' instruction from aarch64_expand_vec_perm_const:

main:
         adrp    x0, .LANCHOR0
         movi    v2.2s, 0    <=== note reg used here
         ldr     d1, [x0, #:lo12:.LANCHOR0]
         ext     v0.8b, v1.8b, v2.8b, #4
         mul     v1.8b, v1.8b, v0.8b
         ext     v0.8b, v1.8b, v2.8b, #2
         mul     v0.8b, v1.8b, v0.8b
         ext     v2.8b, v0.8b, v2.8b, #1
         mul     v0.8b, v0.8b, v2.8b
         umov    w1, v0.b[0]

The 'ext' works for both 64-bit vectors, and 128-bit vectors; but for 64-bit vectors, we can do slightly better using ushr; this patch improves the above to:

main:
         adrp    x0, .LANCHOR0
         ldr     d0, [x0, #:lo12:.LANCHOR0]
         ushr d1, d0, 32
         mul     v0.8b, v0.8b, v1.8b
         ushr d1, d0, 16
         mul     v0.8b, v0.8b, v1.8b
         ushr d1, d0, 8
         mul     v0.8b, v0.8b, v1.8b
         umov    w1, v0.b[0]
	...

Tested with bootstrap + check-gcc on aarch64-none-linux-gnu.
Cross-testing of check-gcc on aarch64_be-none-elf in progress.

Ok if no regressions on big-endian?

Cheers,
--Alan

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (vec_shr<mode>): New.

gcc/testsuite/ChangeLog:

	* lib/target-supports.exp
	(check_effective_target_whole_vector_shift): Add aarch64{,_be}.



diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ef196e4b6fb39c0d2fd9ebfee76abab8369b1e92..397cb5186dd4ff000307f3b14bb4964d84c79469 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -779,6 +779,21 @@
   }
 )
 
+;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
+(define_insn "vec_shr_<mode>"
+  [(set (match_operand:VD 0 "register_operand" "=w")
+        (lshiftrt:VD (match_operand:VD 1 "register_operand" "w")
+		     (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_SIMD"
+  {
+    if (BYTES_BIG_ENDIAN)
+      return "ushl %d0, %d1, %2";
+    else
+      return "ushr %d0, %d1, %2";
+  }
+  [(set_attr "type" "neon_shift_imm")]
+)
+
 (define_insn "aarch64_simd_vec_setv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
         (vec_merge:V2DI
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3361c2f9e8d98c5d1cc194617db6281127db2277..464c910777a53867110b462f121c02525d8dd140 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3335,6 +3335,7 @@ proc check_effective_target_vect_shift { } {
 proc check_effective_target_whole_vector_shift { } {
     if { [istarget i?86-*-*] || [istarget x86_64-*-*]
 	 || [istarget ia64-*-*]
+	 || [istarget aarch64*-*-*]
 	 || ([check_effective_target_arm32]
 	     && [check_effective_target_arm_little_endian])
 	 || ([istarget mips*-*-*]

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]