[PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)

Jakub Jelinek jakub@redhat.com
Wed Sep 7 19:25:00 GMT 2011


Hi!

The attached testcase ICEs, because the vectorizer assumes that if vcond*
is available, it supports all comparisons, not just a subset of them.
With -mavx vcmpd etc. already support all the needed comparisons (and
several more - we wouldn't even need to swap the arguments), for SSE
the only missing ones (LTGT and UNEQ) can be handled as ORDERED & NE
resp. UNORDERED | EQ.

Bootstrapped/regtested on x86_64-linux and i686-linux (on non-AVX host),
plus regtested on x86_64-linux on AVX box.  Ok for trunk and 4.6?

2011-09-07  Jakub Jelinek  <jakub@redhat.com>

	PR target/50310
	* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
	TARGET_AVX return code for LTGT and UNEQ.
	(ix86_expand_fp_vcond): Handle LTGT and UNEQ.

	* gcc.c-torture/execute/ieee/pr50310.c: New test.
	* gcc.dg/pr50310-2.c: New test.

--- gcc/config/i386/i386.c.jj	2011-09-02 16:29:38.000000000 +0200
+++ gcc/config/i386/i386.c	2011-09-07 13:34:17.000000000 +0200
@@ -18308,6 +18308,10 @@ ix86_prepare_sse_fp_compare_args (rtx de
     {
     case LTGT:
     case UNEQ:
+      /* With AVX these are supported directly.  */
+      if (TARGET_AVX)
+	break;
+
       /* We have no LTGT as an operator.  We could implement it with
 	 NE & ORDERED, but this requires an extra temporary.  It's
 	 not clear that it's worth it.  */
@@ -18559,7 +18563,32 @@ ix86_expand_fp_vcond (rtx operands[])
   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
 					   &operands[4], &operands[5]);
   if (code == UNKNOWN)
-    return false;
+    {
+      rtx temp;
+      switch (GET_CODE (operands[3]))
+	{
+	case LTGT:
+	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = AND;
+	  break;
+	case UNEQ:
+	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = IOR;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+				 OPTAB_DIRECT);
+      ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+      return true;
+    }
 
   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
 				 operands[5], operands[1], operands[2]))
--- gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c.jj	2011-09-07 14:16:12.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c	2011-09-07 14:40:57.000000000 +0200
@@ -0,0 +1,73 @@
+/* PR target/50310 */
+
+extern void abort (void);
+double s1[4], s2[4], s3[64];
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[13 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[15 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+}
+
+int
+main ()
+{
+  int i;
+  s1[0] = 5.0;
+  s1[1] = 6.0;
+  s1[2] = 5.0;
+  s1[3] = __builtin_nan ("");
+  s2[0] = 6.0;
+  s2[1] = 5.0;
+  s2[2] = 5.0;
+  s2[3] = 5.0;
+  asm volatile ("" : : : "memory");
+  foo ();
+  asm volatile ("" : : : "memory");
+  for (i = 0; i < 16 * 4; i++)
+    if (i >= 12 * 4 && (i & 3) == 3)
+      {
+	if (s3[i] != 0.0) abort ();
+      }
+    else
+      {
+        static int masks[] = { 2, 2|4, 1, 1|4, 1|2, 8, 2, 1 };
+        if (s3[i]
+	    != (((1 << (i & 3)) & ((i & 4) ? ~masks[i / 8] : masks[i / 8]))
+		? -1.0 : 0.0))
+	  abort ();
+      }
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr50310-2.c.jj	2011-09-07 12:53:43.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr50310-2.c	2011-09-07 12:53:16.000000000 +0200
@@ -0,0 +1,47 @@
+/* PR target/50310 */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-options "-O3 -mavx" { target avx_runtime } } */
+
+double s1[4], s2[4], s3[64];
+
+int
+main (void)
+{
+  int i;
+  asm volatile ("" : : : "memory");
+  for (i = 0; i < 4; i++)
+    s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[13 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[15 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+  asm volatile ("" : : : "memory");
+  return 0;
+}

	Jakub



More information about the Gcc-patches mailing list