This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411)


Hi!

All of these _mm{,256}_permute2_p[sd] and _mm_roti_epi{8,16,32,64}
intrinsics ICE if the last argument is constant integer, but not in the
expected range.

I could only find MSFT documentation for these intrinsics, where for
*permute2* it says that the last argument must be 0, 1, 2 or 3,
for *roti* it says that the last argument is integer rotation count,
preferrably constant and that if count is negative, it performs right
rotation instead of left rotation.
This patch adjusts the builtins to match that, if we want to instead
e.g. always mandate _mm_roti_epi* last argument is constant integer,
or constant integer in the range -N+1 .. N-1 where N is the number
after _mm_roti_epi, or in the range 0 .. N-1, it can be easily adjusted.

Regtested on x86_64-linux {-m32,-m64}, unfortunately on a SandyBridge
box, so I couldn't verify if xop-rotate[12]-int.c actually succeeds
on xop capable HW.

2011-06-15  Jakub Jelinek  <jakub@redhat.com>

	PR target/49411
	* config/i386/i386.c (ix86_expand_multi_arg_builtins): If
	last_arg_constant and last argument doesn't match its predicate,
	for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3
	if it is CONST_INT, mask it, otherwise expand using rotl<mode>3.

	* gcc.target/i386/xop-vpermil2px-1.c: New test.
	* gcc.target/i386/xop-vpermil2px-2.c: New test.
	* gcc.target/i386/xop-rotate1-int.c: New test.
	* gcc.target/i386/xop-rotate2-int.c: New test.

--- gcc/config/i386/i386.c.jj	2011-06-09 16:56:56.000000000 +0200
+++ gcc/config/i386/i386.c	2011-06-15 11:17:12.000000000 +0200
@@ -26149,16 +26149,66 @@ ix86_expand_multi_arg_builtin (enum insn
       int adjust = (comparison_p) ? 1 : 0;
       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
 
-      if (last_arg_constant && i == nargs-1)
+      if (last_arg_constant && i == nargs - 1)
 	{
-	  if (!CONST_INT_P (op))
+	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
 	    {
-	      error ("last argument must be an immediate");
-	      return gen_reg_rtx (tmode);
+	      enum insn_code new_icode = icode;
+	      switch (icode)
+		{
+		case CODE_FOR_xop_vpermil2v2df3:
+		case CODE_FOR_xop_vpermil2v4sf3:
+		case CODE_FOR_xop_vpermil2v4df3:
+		case CODE_FOR_xop_vpermil2v8sf3:
+		  if (!CONST_INT_P (op))
+		    {
+		      error ("last argument must be an immediate");
+		      return gen_reg_rtx (tmode);
+		    }
+		  error ("last argument must be in the range 0 .. 3");
+		  return gen_reg_rtx (tmode);
+		case CODE_FOR_xop_rotlv2di3:
+		  new_icode = CODE_FOR_rotlv2di3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv4si3:
+		  new_icode = CODE_FOR_rotlv4si3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv8hi3:
+		  new_icode = CODE_FOR_rotlv8hi3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv16qi3:
+		  new_icode = CODE_FOR_rotlv16qi3;
+		xop_rotl:
+		  if (CONST_INT_P (op))
+		    {
+		      int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
+		      op = GEN_INT (INTVAL (op) & mask);
+		      gcc_checking_assert
+			(insn_data[icode].operand[i + 1].predicate (op, mode));
+		    }
+		  else
+		    {
+		      gcc_checking_assert
+			(nargs == 2
+			 && insn_data[new_icode].operand[0].mode == tmode
+			 && insn_data[new_icode].operand[1].mode == tmode
+			 && insn_data[new_icode].operand[2].mode == mode
+			 && insn_data[new_icode].operand[0].predicate
+			    == insn_data[icode].operand[0].predicate
+			 && insn_data[new_icode].operand[1].predicate
+			    == insn_data[icode].operand[1].predicate);
+		      icode = new_icode;
+		      goto non_constant;
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
 	    }
 	}
       else
 	{
+	non_constant:
 	  if (VECTOR_MODE_P (mode))
 	    op = safe_vector_operand (op, mode);
 
--- gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c.jj	2011-06-15 10:18:29.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c	2011-06-15 10:41:13.000000000 +0200
@@ -0,0 +1,25 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mxop" } */
+
+#include <x86intrin.h>
+
+__m128d a1, a2, a3;
+__m256d b1, b2, b3;
+__m128 c1, c2, c3;
+__m256 d1, d2, d3;
+__m128i s;
+__m256i t;
+
+void
+foo (int i)
+{
+  a1 = _mm_permute2_pd (a2, a3, s, 3);
+  b1 = _mm256_permute2_pd (b2, b3, t, 3);
+  c1 = _mm_permute2_ps (c2, c3, s, 3);
+  d1 = _mm256_permute2_ps (d2, d3, t, 3);
+  a1 = _mm_permute2_pd (a2, a3, s, 17);		/* { dg-error "last argument must be in the range 0 .. 3" } */
+  b1 = _mm256_permute2_pd (b2, b3, t, 17);	/* { dg-error "last argument must be in the range 0 .. 3" } */
+  c1 = _mm_permute2_ps (c2, c3, s, 17);		/* { dg-error "last argument must be in the range 0 .. 3" } */
+  d1 = _mm256_permute2_ps (d2, d3, t, 17);	/* { dg-error "last argument must be in the range 0 .. 3" } */
+}
--- gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c.jj	2011-06-15 10:39:36.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c	2011-06-15 10:39:44.000000000 +0200
@@ -0,0 +1,21 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mxop" } */
+
+#include <x86intrin.h>
+
+__m128d a1, a2, a3;
+__m256d b1, b2, b3;
+__m128 c1, c2, c3;
+__m256 d1, d2, d3;
+__m128i s;
+__m256i t;
+
+void
+foo (int i)
+{
+  a1 = _mm_permute2_pd (a2, a3, s, i);		/* { dg-error "last argument must be an immediate" } */
+  b1 = _mm256_permute2_pd (b2, b3, t, i);	/* { dg-error "last argument must be an immediate" } */
+  c1 = _mm_permute2_ps (c2, c3, s, i);		/* { dg-error "last argument must be an immediate" } */
+  d1 = _mm256_permute2_ps (d2, d3, t, i);	/* { dg-error "last argument must be an immediate" } */
+}
--- gcc/testsuite/gcc.target/i386/xop-rotate1-int.c.jj	2011-06-15 10:47:29.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-rotate1-int.c	2011-06-15 11:25:25.000000000 +0200
@@ -0,0 +1,63 @@
+/* PR target/49411 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O2 -mxop" } */
+
+#include "xop-check.h"
+
+#include <x86intrin.h>
+
+extern void abort (void);
+
+union
+{
+  __m128i v;
+  unsigned char c[16];
+  unsigned short s[8];
+  unsigned int i[4];
+  unsigned long long l[2];
+} a, b, c, d;
+
+#define TEST1(F, N, S, SS) \
+do {							\
+  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++)	\
+    a.F[i] = i * 17;					\
+  s = _mm_set1_epi##SS (N);				\
+  b.v = _mm_roti_epi##S (a.v, N);			\
+  c.v = _mm_rot_epi##S (a.v, s);			\
+  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++)	\
+    {							\
+      int mask = __CHAR_BIT__ * sizeof (a.F[i]) - 1;	\
+      d.F[i] = a.F[i] << (N & mask);			\
+      if (N & mask)					\
+	d.F[i] |= a.F[i] >> (mask + 1 - (N & mask));	\
+      if (b.F[i] != c.F[i] || b.F[i] != d.F[i])		\
+	abort ();					\
+    }							\
+} while (0)
+#define TEST(N) \
+  TEST1 (c, N, 8, 8);					\
+  TEST1 (s, N, 16, 16);					\
+  TEST1 (i, N, 32, 32);					\
+  TEST1 (l, N, 64, 64x)
+
+volatile int n;
+
+static void
+xop_test (void)
+{
+  unsigned int i;
+  __m128i s;
+
+#ifndef NON_CONST
+  TEST (5);
+  TEST (-5);
+  TEST (0);
+  TEST (31);
+#else
+  n = 5; TEST (n);
+  n = -5; TEST (n);
+  n = 0; TEST (n);
+  n = 31; TEST (n);
+#endif
+}
--- gcc/testsuite/gcc.target/i386/xop-rotate2-int.c.jj	2011-06-15 11:25:42.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-rotate2-int.c	2011-06-15 11:26:03.000000000 +0200
@@ -0,0 +1,7 @@
+/* PR target/49411 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O2 -mxop" } */
+
+#define NON_CONST 1
+#include "xop-rotate1-int.c"

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]