This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix MMX/SSE/AVX* shifts by non-immediate scalar (PR target/80286)


Hi!

This patch deals just with correctness of vector shifts by scalar
non-immediate.  The manuals say the shift count is bits [0:63] of
the corresponding source operand (XMM reg or memory in some cases),
and if the count is bigger than number of bits - 1 in the vector element,
it is treated as number of bits shift count.
We are modelling it as SImode shift count though, the upper 32 bits
may be random in some cases which causes wrong-code.
Fixed by using DImode that matches what the insns do.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Any thoughts on what to do to generate reasonable code when the shift count
comes from memory (e.g. as int variable) or is in the low bits of some XMM
regioster?
First of all, perhaps we could have some combiner (or peephole) pattern that would
transform sign-extend from e.g. SI to DI on the shift count into zero-extend
if there are no other uses of the extension result - if the shift count is
negative in SImode (or even QImode), then it is already large number and the
upper 32 bits or more don't really change anything on that.
Then perhaps we could emit pmovzxdq for SSE4.1+ instead of going through
GPRs and back, or for SSE2 pxor on a scratch reg and punpck* to get it zero
extended.  Not sure if we want to add =v / vm alternative to
zero_extendsidi2*, it already has some x but with ?s that prevent the RA
from using it.  So thoughts on that?

2017-04-03  Jakub Jelinek  <jakub@redhat.com>

	PR target/80286
	* config/i386/i386.c (ix86_expand_args_builtin): If op has scalar
	int mode, convert_modes it to mode as unsigned, otherwise use
	lowpart_subreg to mode rather than SImode.
	* config/i386/sse.md (<mask_codefor>ashr<mode>3<mask_name>,
	ashr<mode>3, ashr<mode>3<mask_name>, <shift_insn><mode>3<mask_name>):
	Use DImode instead of SImode for the shift count operand.
	* config/i386/mmx.md (mmx_ashr<mode>3, mmx_<shift_insn><mode>3):
	Likewise.
testsuite/
	* gcc.target/i386/avx-pr80286.c: New test.
	* gcc.dg/pr80286.c: New test.

--- gcc/config/i386/i386.c.jj	2017-04-03 10:40:22.000000000 +0200
+++ gcc/config/i386/i386.c	2017-04-03 18:31:39.482367634 +0200
@@ -35582,10 +35582,17 @@ ix86_expand_args_builtin (const struct b
 	{
 	  /* SIMD shift insns take either an 8-bit immediate or
 	     register as count.  But builtin functions take int as
-	     count.  If count doesn't match, we put it in register.  */
+	     count.  If count doesn't match, we put it in register.
+	     The instructions are using 64-bit count, if op is just
+	     32-bit, zero-extend it, as negative shift counts
+	     are undefined behavior and zero-extension is more
+	     efficient.  */
 	  if (!match)
 	    {
-	      op = lowpart_subreg (SImode, op, GET_MODE (op));
+	      if (SCALAR_INT_MODE_P (GET_MODE (op)))
+		op = convert_modes (mode, GET_MODE (op), op, 1);
+	      else
+		op = lowpart_subreg (mode, op, GET_MODE (op));
 	      if (!insn_p->operand[i + 1].predicate (op, mode))
 		op = copy_to_reg (op);
 	    }
--- gcc/config/i386/sse.md.jj	2017-04-03 13:43:50.179572564 +0200
+++ gcc/config/i386/sse.md	2017-04-03 18:01:19.713852914 +0200
@@ -10620,7 +10620,7 @@ (define_insn "<mask_codefor>ashr<mode>3<
   [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
 	(ashiftrt:VI24_AVX512BW_1
 	  (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
-	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
   "TARGET_AVX512VL"
   "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseishft")
@@ -10634,7 +10634,7 @@ (define_insn "ashr<mode>3"
   [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
 	(ashiftrt:VI24_AVX2
 	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
-	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
+	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
   "TARGET_SSE2"
   "@
    psra<ssemodesuffix>\t{%2, %0|%0, %2}
@@ -10667,7 +10667,7 @@ (define_insn "ashr<mode>3<mask_name>"
   [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
 	(ashiftrt:VI248_AVX512BW_AVX512VL
 	  (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
-	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
   "TARGET_AVX512F"
   "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseishft")
@@ -10681,7 +10681,7 @@ (define_insn "<shift_insn><mode>3<mask_n
   [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
 	(any_lshift:VI2_AVX2_AVX512BW
 	  (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
-	  (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
+	  (match_operand:DI 2 "nonmemory_operand" "xN,vN")))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -10700,7 +10700,7 @@ (define_insn "<shift_insn><mode>3<mask_n
   [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,x,v")
 	(any_lshift:VI48_AVX2
 	  (match_operand:VI48_AVX2 1 "register_operand" "0,x,v")
-	  (match_operand:SI 2 "nonmemory_operand" "xN,xN,vN")))]
+	  (match_operand:DI 2 "nonmemory_operand" "xN,xN,vN")))]
   "TARGET_SSE2 && <mask_mode512bit_condition>"
   "@
    p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -10720,7 +10720,7 @@ (define_insn "<shift_insn><mode>3<mask_n
   [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
 	(any_lshift:VI48_512
 	  (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
-	  (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
+	  (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
   "TARGET_AVX512F && <mask_mode512bit_condition>"
   "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "avx512f")
--- gcc/config/i386/mmx.md.jj	2017-04-03 13:43:50.119573339 +0200
+++ gcc/config/i386/mmx.md	2017-04-03 18:01:19.708852979 +0200
@@ -930,7 +930,7 @@ (define_insn "mmx_ashr<mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
         (ashiftrt:MMXMODE24
 	  (match_operand:MMXMODE24 1 "register_operand" "0")
-	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
+	  (match_operand:DI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "psra<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
@@ -944,7 +944,7 @@ (define_insn "mmx_<shift_insn><mode>3"
   [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
         (any_lshift:MMXMODE248
 	  (match_operand:MMXMODE248 1 "register_operand" "0")
-	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
+	  (match_operand:DI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "p<vshift><mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
--- gcc/testsuite/gcc.target/i386/avx-pr80286.c.jj	2017-04-03 18:44:07.552698281 +0200
+++ gcc/testsuite/gcc.target/i386/avx-pr80286.c	2017-04-03 18:43:51.000000000 +0200
@@ -0,0 +1,26 @@
+/* PR target/80286 */
+/* { dg-do run { target avx } } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+#include <immintrin.h>
+
+__m256i m;
+
+__attribute__((noinline, noclone)) __m128i
+foo (__m128i x)
+{
+  int s = _mm_cvtsi128_si32 (_mm256_castsi256_si128 (m));
+  return _mm_srli_epi16 (x, s);
+}
+
+static void
+avx_test (void)
+{
+  __m128i a = (__m128i) (__v8hi) { 1 << 7, 2 << 8, 3 << 9, 4 << 10, 5 << 11, 6 << 12, 7 << 13, 8 << 12 };
+  m = (__m256i) (__v8si) { 7, 8, 9, 10, 11, 12, 13, 14 };
+  __m128i c = foo (a);
+  __m128i b = (__m128i) (__v8hi) { 1, 2 << 1, 3 << 2, 4 << 3, 5 << 4, 6 << 5, 7 << 6, 8 << 5 };
+  if (__builtin_memcmp (&c, &b, sizeof (__m128i)))
+    __builtin_abort ();
+}
--- gcc/testsuite/gcc.dg/pr80286.c.jj	2017-04-03 18:45:27.574663948 +0200
+++ gcc/testsuite/gcc.dg/pr80286.c	2017-04-03 18:45:18.386782707 +0200
@@ -0,0 +1,23 @@
+/* PR target/80286 */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wno-psabi" } */
+
+typedef int V __attribute__((vector_size (4 * sizeof (int))));
+
+__attribute__((noinline, noclone)) V
+foo (V x, V y)
+{
+  return x << y[0];
+}
+
+int
+main ()
+{
+  V x = { 1, 2, 3, 4 };
+  V y = { 5, 6, 7, 8 };
+  V z = foo (x, y);
+  V e = { 1 << 5, 2 << 5, 3 << 5, 4 << 5 };
+  if (__builtin_memcmp (&z, &e, sizeof (V)))
+    __builtin_abort ();
+  return 0;
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]