This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AArch64] Fix vcond where comparison and result have different types.
- From: James Greenhalgh <james dot greenhalgh at arm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: marcus dot shawcroft at arm dot com
- Date: Tue, 14 May 2013 14:43:14 +0100
- Subject: [AArch64] Fix vcond where comparison and result have different types.
- References: <CAFqB+PxnZVJJxM0j7YF29=3eiNPTq4zPzo2utWs-MHC=ZVtB9A at mail dot gmail dot com>
Hi,
For a statement like:
INT = FLOAT > FLOAT ? INT : INT.
The vcond implementation in AArch64 is broken. We will try to force
the INT value to a FLOAT register and will ICE.
This patch fixes this.
Regression suite run for aarch64-none-elf with no regressions,
and more cases added to the testsuite to ensure this is caught
in future.
Thanks,
James Greenhalgh
---
gcc/
* config/aarch64/aarch64-simd.md
(aarch64_vcond_internal<mode>): Rename to...
(aarch64_vcond_internal<mode><mode>): ...This, for integer modes.
(aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>): ...This for
float modes. Clarify all iterator modes.
(vcond<mode><mode>): Use new name for vcond expanders.
(vcond<v_cmp_result><mode>): Likewise.
(vcondu<mode><mode>: Likewise.
* config/aarch64/iterators.md (VDQF_COND): New.
gcc/testsuite/
* gcc.target/aarch64/vect-fcm.x: Add cases testing
FLOAT cmp FLOAT ? INT : INT.
* gcc.target/aarch64/vect-fcm-eq-d.c: Define IMODE.
* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
* gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
* gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 5626b55..6bc7dd7 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1725,7 +1725,7 @@
DONE;
})
-(define_expand "aarch64_vcond_internal<mode>"
+(define_expand "aarch64_vcond_internal<mode><mode>"
[(set (match_operand:VDQ 0 "register_operand")
(if_then_else:VDQ
(match_operator 3 "comparison_operator"
@@ -1820,14 +1820,14 @@
DONE;
})
-(define_expand "aarch64_vcond_internal<mode>"
- [(set (match_operand:VDQF 0 "register_operand")
+(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
+ [(set (match_operand:VDQF_COND 0 "register_operand")
(if_then_else:VDQF
(match_operator 3 "comparison_operator"
[(match_operand:VDQF 4 "register_operand")
(match_operand:VDQF 5 "nonmemory_operand")])
- (match_operand:VDQF 1 "nonmemory_operand")
- (match_operand:VDQF 2 "nonmemory_operand")))]
+ (match_operand:VDQF_COND 1 "nonmemory_operand")
+ (match_operand:VDQF_COND 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
int inverse = 0;
@@ -1835,8 +1835,8 @@
int swap_bsl_operands = 0;
rtx op1 = operands[1];
rtx op2 = operands[2];
- rtx mask = gen_reg_rtx (<V_cmp_result>mode);
- rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
+ rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
+ rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
rtx (*base_comparison) (rtx, rtx, rtx);
rtx (*complimentary_comparison) (rtx, rtx, rtx);
@@ -1856,7 +1856,7 @@
/* Fall through. */
default:
if (!REG_P (operands[5]))
- operands[5] = force_reg (<MODE>mode, operands[5]);
+ operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
}
switch (GET_CODE (operands[3]))
@@ -1869,8 +1869,8 @@
case UNGE:
case ORDERED:
case UNORDERED:
- base_comparison = gen_aarch64_cmge<mode>;
- complimentary_comparison = gen_aarch64_cmgt<mode>;
+ base_comparison = gen_aarch64_cmge<VDQF:mode>;
+ complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
break;
case LE:
case UNLE:
@@ -1878,14 +1878,14 @@
/* Fall through. */
case GT:
case UNGT:
- base_comparison = gen_aarch64_cmgt<mode>;
- complimentary_comparison = gen_aarch64_cmge<mode>;
+ base_comparison = gen_aarch64_cmgt<VDQF:mode>;
+ complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
break;
case EQ:
case NE:
case UNEQ:
- base_comparison = gen_aarch64_cmeq<mode>;
- complimentary_comparison = gen_aarch64_cmeq<mode>;
+ base_comparison = gen_aarch64_cmeq<VDQF:mode>;
+ complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
break;
default:
gcc_unreachable ();
@@ -1913,10 +1913,10 @@
switch (GET_CODE (operands[3]))
{
case LT:
- base_comparison = gen_aarch64_cmlt<mode>;
+ base_comparison = gen_aarch64_cmlt<VDQF:mode>;
break;
case LE:
- base_comparison = gen_aarch64_cmle<mode>;
+ base_comparison = gen_aarch64_cmle<VDQF:mode>;
break;
default:
/* Do nothing, other zero form cases already have the correct
@@ -1959,9 +1959,9 @@
true iff !(a != b && a ORDERED b), swapping the operands to BSL
will then give us (a == b || a UNORDERED b) as intended. */
- emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
- emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[5], operands[4]));
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
+ emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
swap_bsl_operands = 1;
break;
case UNORDERED:
@@ -1970,9 +1970,9 @@
swap_bsl_operands = 1;
/* Fall through. */
case ORDERED:
- emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[4], operands[5]));
- emit_insn (gen_aarch64_cmge<mode> (mask, operands[5], operands[4]));
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
+ emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
+ emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
break;
default:
gcc_unreachable ();
@@ -1987,16 +1987,16 @@
/* If we have (a = (b CMP c) ? -1 : 0);
Then we can simply move the generated mask. */
- if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
- && op2 == CONST0_RTX (<V_cmp_result>mode))
+ if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
+ && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
emit_move_insn (operands[0], mask);
else
{
if (!REG_P (op1))
- op1 = force_reg (<MODE>mode, op1);
+ op1 = force_reg (<VDQF_COND:MODE>mode, op1);
if (!REG_P (op2))
- op2 = force_reg (<MODE>mode, op2);
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
+ op2 = force_reg (<VDQF_COND:MODE>mode, op2);
+ emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
op1, op2));
}
@@ -2013,7 +2013,7 @@
(match_operand:VALL 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
- emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
+ emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
operands[2], operands[3],
operands[4], operands[5]));
DONE;
@@ -2029,7 +2029,7 @@
(match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
- emit_insn (gen_aarch64_vcond_internal<v_cmp_result> (
+ emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
operands[0], operands[1],
operands[2], operands[3],
operands[4], operands[5]));
@@ -2046,7 +2046,7 @@
(match_operand:VDQ 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
- emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
+ emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
operands[2], operands[3],
operands[4], operands[5]));
DONE;
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 5945d23..860d4d9 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -83,6 +83,9 @@
;; Vector Float modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF])
+;; Modes suitable to use as the return type of a vcond expression.
+(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
+
;; All Float modes.
(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
index 19ecd63..6c2e2c8 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
@@ -2,12 +2,13 @@
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE double
+#define ITYPE long
#define OP ==
#define INV_OP !=
#include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
index 30be5ad..5a2109c 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
@@ -2,12 +2,13 @@
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE float
+#define ITYPE int
#define OP ==
#define INV_OP !=
#include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
index b922833..8fad799 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
@@ -2,12 +2,13 @@
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE double
+#define ITYPE long
#define OP >=
#define INV_OP <
#include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
index 04d3533..7aab9e6 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
@@ -2,12 +2,13 @@
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE float
+#define ITYPE int
#define OP >=
#define INV_OP <
#include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
index 421a04a..d26acaa 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
@@ -2,12 +2,13 @@
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE double
+#define ITYPE long
#define OP >
#define INV_OP <=
#include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
index cdeab14..2797fd1 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
@@ -2,12 +2,13 @@
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE float
+#define ITYPE int
#define OP >
#define INV_OP <=
#include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
index 803861b..614f0de 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
@@ -13,6 +13,8 @@ FTYPE input2[N] =
2.0, -4.0, 8.0, -16.0,
-2.125, 4.25, -8.5, 17.0};
+/* Float comparisons, float results. */
+
void
foo (FTYPE *in1, FTYPE *in2, FTYPE *output)
{
@@ -49,11 +51,52 @@ foobarbar (FTYPE *in1, FTYPE *in2, FTYPE *output)
output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0;
}
+/* Float comparisons, int results. */
+
+void
+foo_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
+{
+ int i = 0;
+ /* Vectorizable. */
+ for (i = 0; i < N; i++)
+ output[i] = (in1[i] OP in2[i]) ? 2 : 4;
+}
+
+void
+bar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
+{
+ int i = 0;
+ /* Vectorizable. */
+ for (i = 0; i < N; i++)
+ output[i] = (in1[i] INV_OP in2[i]) ? 4 : 2;
+}
+
+void
+foobar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
+{
+ int i = 0;
+ /* Vectorizable. */
+ for (i = 0; i < N; i++)
+ output[i] = (in1[i] OP 0.0) ? 4 : 2;
+}
+
+void
+foobarbar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
+{
+ int i = 0;
+ /* Vectorizable. */
+ for (i = 0; i < N; i++)
+ output[i] = (in1[i] INV_OP 0.0) ? 4 : 2;
+}
+
int
main (int argc, char **argv)
{
FTYPE out1[N];
FTYPE out2[N];
+ ITYPE outi1[N];
+ ITYPE outi2[N];
+
int i = 0;
foo (input1, input2, out1);
bar (input1, input2, out2);
@@ -65,6 +108,17 @@ main (int argc, char **argv)
for (i = 0; i < N; i++)
if (out1[i] == out2[i])
abort ();
+
+ foo_int (input1, input2, outi1);
+ bar_int (input1, input2, outi2);
+ for (i = 0; i < N; i++)
+ if (outi1[i] != outi2[i])
+ abort ();
+ foobar_int (input1, input2, outi1);
+ foobarbar_int (input1, input2, outi2);
+ for (i = 0; i < N; i++)
+ if (outi1[i] == outi2[i])
+ abort ();
return 0;
}