This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][AArch64] Improve csinc/csneg/csinv opportunities on immediates
- From: Kyrill Tkachov <kyrylo dot tkachov at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: James Greenhalgh <james dot greenhalgh at arm dot com>, Marcus Shawcroft <marcus dot shawcroft at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>
- Date: Fri, 10 Jul 2015 09:34:32 +0100
- Subject: [PATCH][AArch64] Improve csinc/csneg/csinv opportunities on immediates
- Authentication-results: sourceware.org; auth=none
Hi all,
Currently when evaluating expressions like (a ? 24 : 25) we will move 24 and 25 into
registers and perform a csel on them. This misses the opportunity to instead move just 24
into a register and then perform a csinc, saving us an instruction and a register use.
Similarly for csneg and csinv.
This patch implements that idea by allowing such pairs of immediates in *cmov<mode>_insn
and adding an early splitter that performs the necessary transformation.
The testcase included in the patch demonstrates the kind of opportunities that are now picked up.
With this patch I see about 9.6% more csinc instructions being generated for SPEC2006
and the generated code looks objectively better (i.e. fewer mov-immediates and slightly
lower register pressure).
Bootstrapped and tested on aarch64.
Ok for trunk?
Thanks,
Kyrill
2015-07-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.md (*cmov<mode>_insn): Move stricter
check for operands 3 and 4 to pattern predicate. Allow immediates
that can be expressed as csinc/csneg/csinv. New define_split.
(*csinv3<mode>_insn): Rename to...
(csinv3<mode>_insn): ... This.
* config/aarch64/aarch64.h (AARCH64_IMMS_OK_FOR_CSNEG): New macro.
(AARCH64_IMMS_OK_FOR_CSINC): Likewise.
(AARCH64_IMMS_OK_FOR_CSINV): Likewise.
* config/aarch64/aarch64.c (aarch64_imms_ok_for_cond_op_1):
New function.
(aarch64_imms_ok_for_cond_op): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_imms_ok_for_cond_op_1):
Declare prototype.
(aarch64_imms_ok_for_cond_op): Likewise.
2015-07-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/cond-op-imm_1.c: New test.
commit eed5af149229609215327b62b7b3b4018adc6f3f
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed Jul 8 10:22:20 2015 +0100
[AArch64] Improve csinc/csneg/csinv opportunities on immediates
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 4fe437f..6e3781e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -254,6 +254,8 @@ bool aarch64_float_const_zero_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
bool aarch64_gen_movmemqi (rtx *);
bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
+bool aarch64_imms_ok_for_cond_op_1 (rtx, rtx);
+bool aarch64_imms_ok_for_cond_op (rtx, rtx, machine_mode);
bool aarch64_is_extend_from_extract (machine_mode, rtx, rtx);
bool aarch64_is_long_call_p (rtx);
bool aarch64_label_mentioned_p (rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0d81921..8babefb 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3268,6 +3268,36 @@ aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
return aarch64_bitmask_imm (val, mode);
}
+/* Helper for aarch64_imms_ok_for_cond_op. */
+
+bool
+aarch64_imms_ok_for_cond_op_1 (rtx a, rtx b)
+{
+ return AARCH64_IMMS_OK_FOR_CSNEG (a, b)
+ || AARCH64_IMMS_OK_FOR_CSINC (a, b)
+ || AARCH64_IMMS_OK_FOR_CSINV (a, b);
+}
+
+/* Return true if A and B are CONST_INT rtxes that can appear in
+ the two arms of an IF_THEN_ELSE used for a CSINC, CSNEG or CSINV
+ operation in mode MODE. This is used in the splitter below
+ *cmov<mode>_insn in aarch64.md. */
+
+bool
+aarch64_imms_ok_for_cond_op (rtx a, rtx b, machine_mode mode)
+{
+ if (!CONST_INT_P (a) || !CONST_INT_P (b))
+ return false;
+
+ /* No need to do smart splitting with constant 0. We can just do
+ normal csinc, csneg, csinv on {w,x}zr. */
+ if (a == const0_rtx || b == const0_rtx)
+ return false;
+
+ return aarch64_imms_ok_for_cond_op_1 (a, b)
+ || aarch64_imms_ok_for_cond_op_1 (b, a);
+}
+
static bool
aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 7c31376..e7aecd1 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -678,6 +678,15 @@ do { \
/* Maximum bytes moved by a single instruction (load/store pair). */
#define MOVE_MAX (UNITS_PER_WORD * 2)
+/* Check if CONST_INTs A and B can be used as two arguments to CSNEG. */
+#define AARCH64_IMMS_OK_FOR_CSNEG(A, B) (INTVAL (A) == -INTVAL (B))
+
+/* Check if CONST_INTs A and B can be used as two arguments to CSINC. */
+#define AARCH64_IMMS_OK_FOR_CSINC(A, B) (INTVAL (A) == (INTVAL (B) + 1))
+
+/* Check if CONST_INTs A and B can be used as two arguments to CSINV. */
+#define AARCH64_IMMS_OK_FOR_CSINV(A, B) (INTVAL (A) == ~INTVAL (B))
+
/* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */
#define AARCH64_CALL_RATIO 8
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1e343fa..358f89b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2848,10 +2848,14 @@ (define_insn "*cmov<mode>_insn"
(if_then_else:ALLI
(match_operator 1 "aarch64_comparison_operator"
[(match_operand 2 "cc_register" "") (const_int 0)])
- (match_operand:ALLI 3 "aarch64_reg_zero_or_m1_or_1" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1")
- (match_operand:ALLI 4 "aarch64_reg_zero_or_m1_or_1" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1")))]
- "!((operands[3] == const1_rtx && operands[4] == constm1_rtx)
- || (operands[3] == constm1_rtx && operands[4] == const1_rtx))"
+ (match_operand:ALLI 3 "aarch64_reg_or_imm" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1")
+ (match_operand:ALLI 4 "aarch64_reg_or_imm" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1")))]
+ "(aarch64_reg_zero_or_m1_or_1 (operands[3], <MODE>mode)
+ && aarch64_reg_zero_or_m1_or_1 (operands[4], <MODE>mode)
+ && !((operands[3] == const1_rtx && operands[4] == constm1_rtx)
+ || (operands[3] == constm1_rtx && operands[4] == const1_rtx)))
+ || (!reload_completed && (<MODE>mode == SImode || <MODE>mode == DImode)
+ && aarch64_imms_ok_for_cond_op (operands[3], operands[4], <MODE>mode))"
;; Final two alternatives should be unreachable, but included for completeness
"@
csel\\t%<w>0, %<w>3, %<w>4, %m1
@@ -2864,6 +2868,55 @@ (define_insn "*cmov<mode>_insn"
[(set_attr "type" "csel")]
)
+;; This interacts with the *cmov<mode>_insn pattern above and is used to
+;; optimize cases like:
+;; mov xa, #imm1
+;; mov xb, #imm2
+;; csel xc, xa, xb, <cond>
+;; into:
+;; mov xa, #imm1
+;; cs{inc,inv,neg} xc, xa, xa, <cond>
+;; when imm1 and imm2 have that relationship.
+(define_split
+ [(set (match_operand:GPI 0 "register_operand" "")
+ (if_then_else:GPI
+ (match_operator 1 "aarch64_comparison_operator"
+ [(match_operand 2 "cc_register" "") (const_int 0)])
+ (match_operand:GPI 3 "const_int_operand" "")
+ (match_operand:GPI 4 "const_int_operand" "")))]
+ "!reload_completed
+ && aarch64_imms_ok_for_cond_op (operands[3], operands[4], <MODE>mode)"
+ [(const_int 0)]
+ {
+
+ bool swap_p = !aarch64_imms_ok_for_cond_op_1 (operands[3],
+ operands[4]);
+
+ if (swap_p)
+ std::swap (operands[3], operands[4]);
+
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (tmp, operands[4]);
+
+ enum rtx_code code = GET_CODE (operands[1]);
+ if (swap_p)
+ code = REVERSE_CONDITION (code, GET_MODE (operands[2]));
+
+ rtx comp = gen_rtx_fmt_ee (code, VOIDmode, operands[2], const0_rtx);
+
+ if (AARCH64_IMMS_OK_FOR_CSINC (operands[3], operands[4]))
+ emit_insn (gen_csinc3<mode>_insn (operands[0], comp, tmp, tmp));
+ else if (AARCH64_IMMS_OK_FOR_CSNEG (operands[3], operands[4]))
+ emit_insn (gen_csneg3<mode>_insn (operands[0], comp, tmp, tmp));
+ else if (AARCH64_IMMS_OK_FOR_CSINV (operands[3], operands[4]))
+ emit_insn (gen_csinv3<mode>_insn (operands[0], comp, tmp, tmp));
+ else
+ gcc_unreachable ();
+
+ DONE;
+ }
+)
+
;; zero_extend version of above
(define_insn "*cmovsi_insn_uxtw"
[(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r")
@@ -2998,7 +3051,7 @@ (define_insn "csinc3<mode>_insn"
[(set_attr "type" "csel")]
)
-(define_insn "*csinv3<mode>_insn"
+(define_insn "csinv3<mode>_insn"
[(set (match_operand:GPI 0 "register_operand" "=r")
(if_then_else:GPI
(match_operand 1 "aarch64_comparison_operation" "")
diff --git a/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c b/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c
new file mode 100644
index 0000000..1cd1494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c
@@ -0,0 +1,138 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -fno-inline" } */
+
+extern void abort (void);
+
+#define N 30
+#define M 25089992
+
+int
+fooincsi (int a)
+{
+ return a ? N : (N + 1);
+}
+
+/* { dg-final { scan-assembler "csinc\tw\[0-9\]*.*" } } */
+
+int
+foonegsi (int a)
+{
+ return a ? N : -N;
+}
+
+/* { dg-final { scan-assembler "csneg\tw\[0-9\]*.*" } } */
+
+
+int
+fooinvsi (int a)
+{
+ return a ? N : ~N;
+}
+
+/* { dg-final { scan-assembler "csinv\tw\[0-9\]*.*" } } */
+
+long
+fooincdi (long a)
+{
+ return a ? N : (N + 1);
+}
+
+long
+largefooinc (long a)
+{
+ return a ? M : (M + 1);
+}
+
+/* { dg-final { scan-assembler "csinc\tx\[0-9\]*.*" } } */
+
+long
+foonegdi (long a)
+{
+ return a ? N : -N;
+}
+
+long
+largefooneg (long a)
+{
+ return a ? M : -M;
+}
+
+/* { dg-final { scan-assembler "csneg\tx\[0-9\]*.*" } } */
+
+long
+fooinvdi (long a)
+{
+ return a ? N : ~N;
+}
+
+long
+largefooinv (long a)
+{
+ return a ? M : ~M;
+}
+
+/* { dg-final { scan-assembler "csinv\tx\[0-9\]*.*" } } */
+
+
+int
+main (void)
+{
+ if (fooincsi (1) != N)
+ abort ();
+
+ if (fooincsi (0) != (N + 1))
+ abort ();
+
+ if (foonegsi (1) != N)
+ abort ();
+
+ if (foonegsi (0) != -N)
+ abort ();
+
+ if (fooinvsi (1) != N)
+ abort ();
+
+ if (fooinvsi (0) != ~N)
+ abort ();
+
+ if (fooincdi (1) != N)
+ abort ();
+
+ if (fooincdi (0) != (N + 1))
+ abort ();
+
+ if (foonegdi (1) != N)
+ abort ();
+
+ if (foonegdi (0) != -N)
+ abort ();
+
+ if (fooinvdi (1) != N)
+ abort ();
+
+ if (fooinvdi (0) != ~N)
+ abort ();
+
+ if (largefooinc (0) != (M + 1))
+ abort ();
+
+ if (largefooinv (0) != ~M)
+ abort ();
+
+ if (largefooneg (0) != -M)
+ abort ();
+
+ if (largefooinc (1) != M)
+ abort ();
+
+ if (largefooinv (1) != M)
+ abort ();
+
+ if (largefooneg (1) != M)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "csel\tx\[0-9\]*.*" } } */
+/* { dg-final { scan-assembler-not "csel\tw\[0-9\]*.*" } } */