This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[AArch64] Fix unordered comparisons to floating-point vcond.


Hi,

When I added support for floating-point vcond on aarch64,
I didn't consider the various UNORDERED cases gcc required
support for.  This could in some circumstances lead to
an ICE.

This patch fixes that oversight, adding support for
the UNLT, UNLE, UNEQ, UNGE, UNGT, ORDERED and UNORDERED
cases to the AArch64 backend.

I've tested this patch with no regressions on aarch64-none-elf.

Is this OK to commit?

Regards,
James Greenhalgh

---

gcc/

2013-01-18  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64-simd.md
	(aarch64_vcond_internal<mode>): Handle unordered cases.
	* config/aarch64/iterators.md (v_cmp_result): New.

gcc/testsuite/

2013-01-18  James Greenhalgh  <james.greenhalgh@arm.com>

	* gcc.target/aarch64/vect-fcm-gt-f.c: Change expected output.
	* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
	* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
	* gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
	* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 50297a9..19cc87d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1586,37 +1586,128 @@
   "TARGET_SIMD"
 {
   int inverse = 0;
+  int swap_bsl_operands = 0;
   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
+  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
 
-  if (!REG_P (operands[5])
-      && (operands[5] != CONST0_RTX (<MODE>mode)))
-    operands[5] = force_reg (<MODE>mode, operands[5]);
+  rtx (*base_comparison) (rtx, rtx, rtx);
+  rtx (*complimentary_comparison) (rtx, rtx, rtx);
+
+  switch (GET_CODE (operands[3]))
+    {
+    case GE:
+    case LE:
+    case EQ:
+      if (!REG_P (operands[5])
+	  && (operands[5] != CONST0_RTX (<MODE>mode)))
+	operands[5] = force_reg (<MODE>mode, operands[5]);
+      break;
+    default:
+      if (!REG_P (operands[5]))
+	operands[5] = force_reg (<MODE>mode, operands[5]);
+    }
 
   switch (GET_CODE (operands[3]))
     {
     case LT:
+    case UNLT:
       inverse = 1;
       /* Fall through.  */
     case GE:
-      emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
+    case UNGE:
+    case ORDERED:
+    case UNORDERED:
+      base_comparison = gen_aarch64_cmge<mode>;
+      complimentary_comparison = gen_aarch64_cmgt<mode>;
       break;
     case LE:
+    case UNLE:
       inverse = 1;
       /* Fall through.  */
     case GT:
-      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
+    case UNGT:
+      base_comparison = gen_aarch64_cmgt<mode>;
+      complimentary_comparison = gen_aarch64_cmge<mode>;
       break;
+    case EQ:
     case NE:
-      inverse = 1;
-      /* Fall through.  */
+    case UNEQ:
+      base_comparison = gen_aarch64_cmeq<mode>;
+      complimentary_comparison = gen_aarch64_cmeq<mode>;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[3]))
+    {
+    case LT:
+    case LE:
+    case GT:
+    case GE:
     case EQ:
-      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
+      /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
+	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
+	 a GE b -> a GE b
+	 a GT b -> a GT b
+	 a LE b -> b GE a
+	 a LT b -> b GT a
+	 a EQ b -> a EQ b  */
+
+      if (!inverse)
+	emit_insn (base_comparison (mask, operands[4], operands[5]));
+      else
+	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
+      break;
+    case UNLT:
+    case UNLE:
+    case UNGT:
+    case UNGE:
+    case NE:
+      /* FCM returns false for lanes which are unordered, so if we use
+	 the inverse of the comparison we actually want to emit, then
+	 swap the operands to BSL, we will end up with the correct result.
+	 Note that a NE NaN and NaN NE b are true for all a, b.
+
+	 Our transformations are:
+	 a GE b -> !(b GT a)
+	 a GT b -> !(b GE a)
+	 a LE b -> !(a GT b)
+	 a LT b -> !(a GE b)
+	 a NE b -> !(a EQ b)  */
+
+      if (inverse)
+	emit_insn (base_comparison (mask, operands[4], operands[5]));
+      else
+	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
+
+      swap_bsl_operands = 1;
+      break;
+    case UNEQ:
+      /* We check (a > b ||  b > a).  combining these comparisons give us
+	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
+	 will then give us (a == b ||  a UNORDERED b) as intended.  */
+
+      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[5], operands[4]));
+      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+      swap_bsl_operands = 1;
+      break;
+    case UNORDERED:
+       /* Operands are ORDERED iff (a > b || b >= a).
+	 Swapping the operands to BSL will give the UNORDERED case.  */
+     swap_bsl_operands = 1;
+     /* Fall through.  */
+    case ORDERED:
+      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmge<mode> (mask, operands[5], operands[4]));
+      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
       break;
     default:
       gcc_unreachable ();
     }
 
-  if (inverse)
+  if (swap_bsl_operands)
     emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
 				    operands[1]));
   else
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3a57494..b66418e 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -430,6 +430,14 @@
 				(V2SF "V2SI") (V4SF  "V4SI")
 				(V2DF "V2DI")])
 
+;; Lower case mode of results of comparison operations.
+(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
+				(V4HI "v4hi") (V8HI  "v8hi")
+				(V2SI "v2si") (V4SI  "v4si")
+				(DI   "di")   (V2DI  "v2di")
+				(V2SF "v2si") (V4SF  "v4si")
+				(V2DF "v2di")])
+
 ;; Vm for lane instructions is restricted to FP_LO_REGS.
 (define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
 		       (V2SI "w") (V4SI "w") (SI "w")])
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
index a177d28..b6fb5ae 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
@@ -8,7 +8,7 @@
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
-/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
+/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
index 01f3880..283d34f 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
@@ -8,7 +8,7 @@
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
-/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
+/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
index 6027593..868e1f8 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
@@ -8,7 +8,7 @@
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
-/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
+/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
index 0337d70..e3258f3 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
@@ -8,7 +8,7 @@
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
-/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
+/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
index b812a39..ed8b452 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
@@ -8,7 +8,7 @@
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
-/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
+/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
index 5e012a4..e90a875 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
@@ -8,7 +8,7 @@
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
-/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
+/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]