[PATCH 5/5][Arm] New pattern for CSEL, CSET and CSETM instructions
Omar Tahir
Omar.Tahir2@arm.com
Wed Sep 16 10:45:59 GMT 2020
Hi Kyrill,
It's been a while, but I believe you had the following comment about implementing CSEL:
> (define_insn_and_split "*thumb2_movsicc_insn"
> [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")
> (if_then_else:SI
> @@ -449,17 +473,14 @@
> it\\t%d3\;mvn%d3\\t%0, #%B1
> #
> #
> - #
> - #
> - #
> + ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
> + ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
> + ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
> #"
> ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
> ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
> - ; alt 8: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
> - ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
> - ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
> ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
> - "&& reload_completed"
> + "&& reload_completed && !TARGET_COND_ARITH"
>
> Hmm... I think the approach makes sense, but I'd rather we left the alternatives as '#' and refine the condition so that in the TARGET_COND_ARITH case we split in precisely the cases where the TARGET_COND_ARITH can't handle the operands.
> I appreciate that would complicate this condition somewhat, but it would have the benefit of expressing the RTL structure to allow for further optimisation.
I've made the changes you suggested, let me know if it's good to commit.
Thanks,
Omar
--
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 950e46edfeeee1b851b8968cbcf071564416dbf6..b8dd6af50a842c924996d528e95ce9873dcb913a 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9760,7 +9760,7 @@
[(match_operand:SI 2 "s_register_operand" "r,r")
(match_operand:SI 3 "arm_add_operand" "rI,L")]))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_32BIT"
+ "TARGET_32BIT && !TARGET_COND_ARITH"
"#"
"&& reload_completed"
[(set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 3)))
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 011badc9957655a0fba67946c1db6fa6334b2bbb..57db29f92f4caee4c9384a9740e79dba2217144a 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -36,7 +36,7 @@
;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, Ra,
;; Rg, Ri
-;; in all states: Pf, Pg
+;; in all states: Pf, Pg, UM, U1
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Up, Uf, Ux, Ul
@@ -479,6 +479,16 @@
(and (match_code "mem")
(match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1, true)")))
+(define_constraint "UM"
+ "@internal
+ A constraint that matches the immediate constant -1."
+ (match_test "op == constm1_rtx"))
+
+(define_constraint "U1"
+ "@internal
+ A constraint that matches the immediate constant +1."
+ (match_test "op == const1_rtx"))
+
(define_memory_constraint "Ux"
"@internal
In ARM/Thumb-2 state a valid address and load into CORE regs or only to
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 2144520829cc4a28cd7ac1ef528ecd54f0af13c1..5d75341c9efe82dcda27daa74d2b22c52065dd02 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -454,6 +454,13 @@
&& arm_general_register_operand (op, GET_MODE (op))")
(match_test "satisfies_constraint_Pg (op)")))
+(define_predicate "arm_reg_or_m1_or_1_or_zero"
+ (and (match_code "reg,subreg,const_int")
+ (ior (match_operand 0 "arm_general_register_operand")
+ (match_test "op == constm1_rtx")
+ (match_test "op == const1_rtx")
+ (match_test "op == const0_rtx"))))
+
;; True for MULT, to identify which variant of shift_operator is in use.
(define_special_predicate "mult_operator"
(match_code "mult"))
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 69460f3665b0bc7f47c307aa4ae789bab6a94f92..db0b4c53754747a915d51a6df417fa97b60828da 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -432,6 +432,30 @@
(set_attr "type" "multiple")]
)
+(define_insn "*cmovsi_insn"
+ [(set (match_operand:SI 0 "arm_general_register_operand" "=r,r,r,r,r,r,r,r,r")
+ (if_then_else:SI
+ (match_operator 1 "arm_comparison_operator"
+ [(match_operand 2 "cc_register" "") (const_int 0)])
+ (match_operand:SI 3 "arm_reg_or_m1_or_1_or_zero" "r, r,UM, r,U1,U1,Pz,UM,Pz")
+ (match_operand:SI 4 "arm_reg_or_m1_or_1_or_zero" "r,UM, r,U1, r,Pz,U1,Pz,UM")))]
+ "TARGET_THUMB2 && TARGET_COND_ARITH
+ && (!((operands[3] == const1_rtx && operands[4] == constm1_rtx)
+ || (operands[3] == constm1_rtx && operands[4] == const1_rtx)))"
+ "@
+ csel\\t%0, %3, %4, %d1
+ csinv\\t%0, %3, zr, %d1
+ csinv\\t%0, %4, zr, %D1
+ csinc\\t%0, %3, zr, %d1
+ csinc\\t%0, %4, zr, %D1
+ cset\\t%0, %d1
+ cset\\t%0, %D1
+ csetm\\t%0, %d1
+ csetm\\t%0, %D1"
+ [(set_attr "type" "csel")
+ (set_attr "predicable" "no")]
+)
+
(define_insn_and_split "*thumb2_movsicc_insn"
[(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")
(if_then_else:SI
@@ -459,7 +483,9 @@
; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
- "&& reload_completed"
+ ; Conditional arithmetic (csel etc.) can handle all alternatives except 8-10
+ "&& reload_completed && (!TARGET_COND_ARITH ||
+ (which_alternative >= 8 && which_alternative <= 10))"
[(const_int 0)]
{
enum rtx_code rev_code;
diff --git a/gcc/testsuite/gcc.target/arm/csel.c b/gcc/testsuite/gcc.target/arm/csel.c
new file mode 100644
index 0000000000000000000000000000000000000000..79a4c161eb52b2986c2c2990d2dda3d8c3628782
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/csel.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_csel32_condasn(int w0, int w1, int w2, int w3)
+{
+ int w4;
+
+ /* { dg-final { scan-assembler "csel\tr\[0-9\]*.*eq" } } */
+ w4 = (w0 == w1) ? w2 : w3;
+ return w4;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cset.c b/gcc/testsuite/gcc.target/arm/cset.c
new file mode 100644
index 0000000000000000000000000000000000000000..e63b7b5041ece7905306876c2c6f9f2f95964951
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cset.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_cset32_condasn1(int w0, int w1)
+{
+ int w2;
+
+ /* { dg-final { scan-assembler "cset\tr\[0-9\]*.*eq" } } */
+ w2 = (w0 == w1) ? 1 : 0;
+ return w2;
+}
+
+int
+test_cset32_condasn2(int w0, int w1)
+{
+ int w2;
+
+ /* { dg-final { scan-assembler "cset\tr\[0-9\]*.*ne" } } */
+ w2 = (w0 == w1) ? 0 : 1;
+ return w2;
+}
diff --git a/gcc/testsuite/gcc.target/arm/csetm.c b/gcc/testsuite/gcc.target/arm/csetm.c
new file mode 100644
index 0000000000000000000000000000000000000000..c04520c2f6514850b299208a477893ee40a02aca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/csetm.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_csetm32_condasn1(int w0, int w1)
+{
+ int w2;
+
+ /* { dg-final { scan-assembler "csetm\tr\[0-9\]*.*eq" } } */
+ w2 = (w0 == w1) ? -1 : 0;
+ return w2;
+}
+
+int
+test_csetm32_condasn2(int w0, int w1)
+{
+ int w2;
+
+ /* { dg-final { scan-assembler "csetm\tr\[0-9\]*.*ne" } } */
+ w2 = (w0 == w1) ? 0 : -1;
+ return w2;
+}
diff --git a/gcc/testsuite/gcc.target/arm/csinc-2.c b/gcc/testsuite/gcc.target/arm/csinc-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..45e3815eb0c4f2d252e7f0326728dbd0f7debd86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/csinc-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_csinc32_condasn1(int w0, int w1, int w2)
+{
+ int w3;
+
+ /* { dg-final { scan-assembler "csinc\tr\[0-9\]*.*zr.*eq" } } */
+ w3 = (w0 == w1) ? w2 : 1;
+ return w3;
+}
+
+int
+test_csinc32_condasn2(int w0, int w1, int w2)
+{
+ int w3;
+
+ /* { dg-final { scan-assembler "csinc\tr\[0-9\]*.*zr.*ne" } } */
+ w3 = (w0 == w1) ? 1 : w2;
+ return w3;
+}
diff --git a/gcc/testsuite/gcc.target/arm/csinv-2.c b/gcc/testsuite/gcc.target/arm/csinv-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..d55de1b5a3342128cfcb25f48361541d0ae38c06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/csinv-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_csinv32_condasn1(int w0, int w1, int w2)
+{
+ int w3;
+
+ /* { dg-final { scan-assembler "csinv\tr\[0-9\]*.*zr.*eq" } } */
+ w3 = (w0 == w1) ? w2 : -1;
+ return w3;
+}
+
+int
+test_csinv32_condasn2(int w0, int w1, int w2)
+{
+ int w3;
+
+ /* { dg-final { scan-assembler "csinv\tr\[0-9\]*.*zr.*ne" } } */
+ w3 = (w0 == w1) ? -1 : w2;
+ return w3;
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: rb13325.patch
Type: application/octet-stream
Size: 7931 bytes
Desc: rb13325.patch
URL: <https://gcc.gnu.org/pipermail/gcc-patches/attachments/20200916/349fb8db/attachment-0001.obj>
More information about the Gcc-patches
mailing list