This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Improve V?TImode shifts (PR target/82370)


Hi!

The following patch tweaks the TImode vector shifts similarly
to the earlier vector shift patch, so that for shifts by immediate
we can accept a memory input.  Additionally, it removes the vec_shl_*
expander, because the middle-end has dropped that a few years ago,
and merges the left and right shift patterns using code iterators.
Appart from the code/names that can be handled by mode attributes,
the only difference was that one of the insns had
(set_attr "atom_unit" "sishuf")
and the other didn't.  I hope that is just an error, I'd really expect
both vpslldq and vpsrldq to use the same atom unit, isn't that the case?
CCing H.J. for that.  If it is intentional difference, I can of course
adjust the patch and undo the merging of the 4 define_insns into 2.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-10-04  Jakub Jelinek  <jakub@redhat.com>

	PR target/82370
	* config/i386/sse.md (VIMAX_AVX2): Remove V4TImode.
	(VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators.
	(vec_shl_<mode>): Remove unused expander.
	(avx512bw_<shift_insn><mode>3): New define_insn.
	(<sse2_avx2>_ashl<mode>3, <sse2_avx2>_lshr<mode>3): Replaced by ...
	(<sse2_avx2>_<shift_insn><mode>3): ... this.  New define_insn.

	* gcc.target/i386/pr82370.c: New test.

--- gcc/config/i386/sse.md.jj	2017-10-04 12:18:19.000000000 +0200
+++ gcc/config/i386/sse.md	2017-10-04 15:34:00.541860351 +0200
@@ -371,10 +371,17 @@ (define_mode_iterator V16FI
   [V16SF V16SI])
 
 ;; ??? We should probably use TImode instead.
-(define_mode_iterator VIMAX_AVX2
+(define_mode_iterator VIMAX_AVX2_AVX512BW
   [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
 
-;; ??? This should probably be dropped in favor of VIMAX_AVX2.
+;; Suppose TARGET_AVX512BW as baseline
+(define_mode_iterator VIMAX_AVX512VL
+  [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
+
+(define_mode_iterator VIMAX_AVX2
+  [(V2TI "TARGET_AVX2") V1TI])
+
+;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
 (define_mode_iterator SSESCALARMODE
   [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
 
@@ -10792,9 +10799,9 @@ (define_insn "<shift_insn><mode>3<mask_n
    (set_attr "mode" "<sseinsnmode>")])
 
 
-(define_expand "vec_shl_<mode>"
+(define_expand "vec_shr_<mode>"
   [(set (match_dup 3)
-	(ashift:V1TI
+	(lshiftrt:V1TI
 	 (match_operand:VI_128 1 "register_operand")
 	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
    (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
@@ -10805,48 +10812,24 @@ (define_expand "vec_shl_<mode>"
   operands[4] = gen_lowpart (<MODE>mode, operands[3]);
 })
 
-(define_insn "<sse2_avx2>_ashl<mode>3"
-  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
-	(ashift:VIMAX_AVX2
-	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
-	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
-  "TARGET_SSE2"
+(define_insn "avx512bw_<shift_insn><mode>3"
+  [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
+	(any_lshift:VIMAX_AVX512VL
+	 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_AVX512BW"
 {
   operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
-
-  switch (which_alternative)
-    {
-    case 0:
-      return "pslldq\t{%2, %0|%0, %2}";
-    case 1:
-      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
-    default:
-      gcc_unreachable ();
-    }
+  return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
 }
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseishft")
+  [(set_attr "type" "sseishft")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "vec_shr_<mode>"
-  [(set (match_dup 3)
-	(lshiftrt:V1TI
-	 (match_operand:VI_128 1 "register_operand")
-	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
-   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
-  "TARGET_SSE2"
-{
-  operands[1] = gen_lowpart (V1TImode, operands[1]);
-  operands[3] = gen_reg_rtx (V1TImode);
-  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
-})
-
-(define_insn "<sse2_avx2>_lshr<mode>3"
+(define_insn "<sse2_avx2>_<shift_insn><mode>3"
   [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
-	(lshiftrt:VIMAX_AVX2
+	(any_lshift:VIMAX_AVX2
 	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
 	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
   "TARGET_SSE2"
@@ -10856,9 +10839,9 @@ (define_insn "<sse2_avx2>_lshr<mode>3"
   switch (which_alternative)
     {
     case 0:
-      return "psrldq\t{%2, %0|%0, %2}";
+      return "p<vshift>dq\t{%2, %0|%0, %2}";
     case 1:
-      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+      return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
     default:
       gcc_unreachable ();
     }
--- gcc/testsuite/gcc.target/i386/pr82370.c.jj	2017-10-04 16:01:16.350247297 +0200
+++ gcc/testsuite/gcc.target/i386/pr82370.c	2017-10-04 16:03:06.704922288 +0200
@@ -0,0 +1,18 @@
+/* PR target/82370 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */
+
+#include <x86intrin.h>
+
+__m512i f1 (__m512i *x) { return _mm512_bslli_epi128 (*x, 5); }
+__m512i f2 (__m512i *x) { return _mm512_bsrli_epi128 (*x, 5); }
+__m256i f3 (__m256i *x) { return _mm256_bslli_epi128 (*x, 5); }
+__m256i f4 (__m256i *x) { return _mm256_bsrli_epi128 (*x, 5); }
+__m128i f5 (__m128i *x) { return _mm_bslli_si128 (*x, 5); }
+__m128i f6 (__m128i *x) { return _mm_bsrli_si128 (*x, 5); }

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]