This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][AArch64] Improve csinc/csneg/csinv opportunities on immediates


Hi all,

Currently when evaluating expressions like (a ? 24 : 25) we will move 24 and 25 into
registers and perform a csel on them.  This misses the opportunity to instead move just 24
into a register and then perform a csinc, saving us an instruction and a register use.
Similarly for csneg and csinv.

This patch implements that idea by allowing such pairs of immediates in *cmov<mode>_insn
and adding an early splitter that performs the necessary transformation.

The testcase included in the patch demonstrates the kind of opportunities that are now picked up.

With this patch I see about 9.6% more csinc instructions being generated for SPEC2006
and the generated code looks objectively better (i.e. fewer mov-immediates and slightly
lower register pressure).

Bootstrapped and tested on aarch64.

Ok for trunk?

Thanks,
Kyrill

2015-07-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

    * config/aarch64/aarch64.md (*cmov<mode>_insn): Move stricter
    check for operands 3 and 4 to pattern predicate.  Allow immediates
    that can be expressed as csinc/csneg/csinv.  New define_split.
    (*csinv3<mode>_insn): Rename to...
    (csinv3<mode>_insn): ... This.
    * config/aarch64/aarch64.h (AARCH64_IMMS_OK_FOR_CSNEG): New macro.
    (AARCH64_IMMS_OK_FOR_CSINC): Likewise.
    (AARCH64_IMMS_OK_FOR_CSINV): Likewise.
    * config/aarch64/aarch64.c (aarch64_imms_ok_for_cond_op_1):
    New function.
    (aarch64_imms_ok_for_cond_op): Likewise.
    * config/aarch64/aarch64-protos.h (aarch64_imms_ok_for_cond_op_1):
    Declare prototype.
    (aarch64_imms_ok_for_cond_op): Likewise.

2015-07-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

    * gcc.target/aarch64/cond-op-imm_1.c: New test.
commit eed5af149229609215327b62b7b3b4018adc6f3f
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed Jul 8 10:22:20 2015 +0100

    [AArch64] Improve csinc/csneg/csinv opportunities on immediates

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 4fe437f..6e3781e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -254,6 +254,8 @@ bool aarch64_float_const_zero_rtx_p (rtx);
 bool aarch64_function_arg_regno_p (unsigned);
 bool aarch64_gen_movmemqi (rtx *);
 bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
+bool aarch64_imms_ok_for_cond_op_1 (rtx, rtx);
+bool aarch64_imms_ok_for_cond_op (rtx, rtx, machine_mode);
 bool aarch64_is_extend_from_extract (machine_mode, rtx, rtx);
 bool aarch64_is_long_call_p (rtx);
 bool aarch64_label_mentioned_p (rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0d81921..8babefb 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3268,6 +3268,36 @@ aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
   return aarch64_bitmask_imm (val, mode);
 }
 
+/* Helper for aarch64_imms_ok_for_cond_op.  */
+
+bool
+aarch64_imms_ok_for_cond_op_1 (rtx a, rtx b)
+{
+  return AARCH64_IMMS_OK_FOR_CSNEG (a, b)
+	 || AARCH64_IMMS_OK_FOR_CSINC (a, b)
+	 || AARCH64_IMMS_OK_FOR_CSINV (a, b);
+}
+
+/* Return true if A and B are CONST_INT rtxes that can appear in
+   the two arms of an IF_THEN_ELSE used for a CSINC, CSNEG or CSINV
+   operation in mode MODE.  This is used in the splitter below
+   *cmov<mode>_insn in aarch64.md.  */
+
+bool
+aarch64_imms_ok_for_cond_op (rtx a, rtx b, machine_mode mode)
+{
+  if (!CONST_INT_P (a) || !CONST_INT_P (b))
+    return false;
+
+  /* No need to do smart splitting with constant 0.  We can just do
+     normal csinc, csneg, csinv on {w,x}zr.  */
+  if (a == const0_rtx || b == const0_rtx)
+    return false;
+
+  return aarch64_imms_ok_for_cond_op_1 (a, b)
+	 || aarch64_imms_ok_for_cond_op_1 (b, a);
+}
+
 static bool
 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 7c31376..e7aecd1 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -678,6 +678,15 @@ do {									     \
 /* Maximum bytes moved by a single instruction (load/store pair).  */
 #define MOVE_MAX (UNITS_PER_WORD * 2)
 
+/* Check if CONST_INTs A and B can be used as two arguments to CSNEG.  */
+#define AARCH64_IMMS_OK_FOR_CSNEG(A, B) (INTVAL (A) == -INTVAL (B))
+
+/* Check if CONST_INTs A and B can be used as two arguments to CSINC.  */
+#define AARCH64_IMMS_OK_FOR_CSINC(A, B) (INTVAL (A) == (INTVAL (B) + 1))
+
+/* Check if CONST_INTs A and B can be used as two arguments to CSINV.  */
+#define AARCH64_IMMS_OK_FOR_CSINV(A, B) (INTVAL (A) == ~INTVAL (B))
+
 /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends.  */
 #define AARCH64_CALL_RATIO 8
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1e343fa..358f89b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2848,10 +2848,14 @@ (define_insn "*cmov<mode>_insn"
 	(if_then_else:ALLI
 	 (match_operator 1 "aarch64_comparison_operator"
 	  [(match_operand 2 "cc_register" "") (const_int 0)])
-	 (match_operand:ALLI 3 "aarch64_reg_zero_or_m1_or_1" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1")
-	 (match_operand:ALLI 4 "aarch64_reg_zero_or_m1_or_1" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1")))]
-  "!((operands[3] == const1_rtx && operands[4] == constm1_rtx)
-     || (operands[3] == constm1_rtx && operands[4] == const1_rtx))"
+	 (match_operand:ALLI 3 "aarch64_reg_or_imm" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1")
+	 (match_operand:ALLI 4 "aarch64_reg_or_imm" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1")))]
+  "(aarch64_reg_zero_or_m1_or_1 (operands[3], <MODE>mode)
+    && aarch64_reg_zero_or_m1_or_1 (operands[4], <MODE>mode)
+    && !((operands[3] == const1_rtx && operands[4] == constm1_rtx)
+	  || (operands[3] == constm1_rtx && operands[4] == const1_rtx)))
+   || (!reload_completed && (<MODE>mode == SImode || <MODE>mode == DImode)
+	&& aarch64_imms_ok_for_cond_op (operands[3], operands[4], <MODE>mode))"
   ;; Final two alternatives should be unreachable, but included for completeness
   "@
    csel\\t%<w>0, %<w>3, %<w>4, %m1
@@ -2864,6 +2868,55 @@ (define_insn "*cmov<mode>_insn"
   [(set_attr "type" "csel")]
 )
 
+;; This interacts with the *cmov<mode>_insn pattern above and is used to
+;; optimize cases like:
+;; mov xa, #imm1
+;; mov xb, #imm2
+;; csel xc, xa, xb, <cond>
+;; into:
+;; mov xa, #imm1
+;; cs{inc,inv,neg} xc, xa, xa, <cond>
+;; when imm1 and imm2 have that relationship.
+(define_split
+  [(set (match_operand:GPI 0 "register_operand" "")
+	(if_then_else:GPI
+	 (match_operator 1 "aarch64_comparison_operator"
+	  [(match_operand 2 "cc_register" "") (const_int 0)])
+	 (match_operand:GPI 3 "const_int_operand" "")
+	 (match_operand:GPI 4 "const_int_operand" "")))]
+  "!reload_completed
+   && aarch64_imms_ok_for_cond_op (operands[3], operands[4], <MODE>mode)"
+  [(const_int 0)]
+  {
+
+    bool swap_p = !aarch64_imms_ok_for_cond_op_1 (operands[3],
+						   operands[4]);
+
+    if (swap_p)
+      std::swap (operands[3], operands[4]);
+
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    emit_move_insn (tmp, operands[4]);
+
+    enum rtx_code code = GET_CODE (operands[1]);
+    if (swap_p)
+      code = REVERSE_CONDITION (code, GET_MODE (operands[2]));
+
+    rtx comp = gen_rtx_fmt_ee (code, VOIDmode, operands[2], const0_rtx);
+
+    if (AARCH64_IMMS_OK_FOR_CSINC (operands[3], operands[4]))
+      emit_insn (gen_csinc3<mode>_insn (operands[0], comp, tmp, tmp));
+    else if (AARCH64_IMMS_OK_FOR_CSNEG (operands[3], operands[4]))
+      emit_insn (gen_csneg3<mode>_insn (operands[0], comp, tmp, tmp));
+    else if (AARCH64_IMMS_OK_FOR_CSINV (operands[3], operands[4]))
+      emit_insn (gen_csinv3<mode>_insn (operands[0], comp, tmp, tmp));
+    else
+      gcc_unreachable ();
+
+    DONE;
+  }
+)
+
 ;; zero_extend version of above
 (define_insn "*cmovsi_insn_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r")
@@ -2998,7 +3051,7 @@ (define_insn "csinc3<mode>_insn"
   [(set_attr "type" "csel")]
 )
 
-(define_insn "*csinv3<mode>_insn"
+(define_insn "csinv3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (if_then_else:GPI
 	  (match_operand 1 "aarch64_comparison_operation" "")
diff --git a/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c b/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c
new file mode 100644
index 0000000..1cd1494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c
@@ -0,0 +1,138 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -fno-inline" } */
+
+extern void abort (void);
+
+#define N 30
+#define M 25089992
+
+int
+fooincsi (int a)
+{
+  return a ? N : (N + 1);
+}
+
+/* { dg-final { scan-assembler "csinc\tw\[0-9\]*.*" } } */
+
+int
+foonegsi (int a)
+{
+  return a ? N : -N;
+}
+
+/* { dg-final { scan-assembler "csneg\tw\[0-9\]*.*" } } */
+
+
+int
+fooinvsi (int a)
+{
+  return a ? N : ~N;
+}
+
+/* { dg-final { scan-assembler "csinv\tw\[0-9\]*.*" } } */
+
+long
+fooincdi (long a)
+{
+  return a ? N : (N + 1);
+}
+
+long
+largefooinc (long a)
+{
+  return a ? M : (M + 1);
+}
+
+/* { dg-final { scan-assembler "csinc\tx\[0-9\]*.*" } } */
+
+long
+foonegdi (long a)
+{
+  return a ? N : -N;
+}
+
+long
+largefooneg (long a)
+{
+  return a ? M : -M;
+}
+
+/* { dg-final { scan-assembler "csneg\tx\[0-9\]*.*" } } */
+
+long
+fooinvdi (long a)
+{
+  return a ? N : ~N;
+}
+
+long
+largefooinv (long a)
+{
+  return a ? M : ~M;
+}
+
+/* { dg-final { scan-assembler "csinv\tx\[0-9\]*.*" } } */
+
+
+int
+main (void)
+{
+  if (fooincsi (1) != N)
+    abort ();
+
+  if (fooincsi (0) != (N + 1))
+    abort ();
+
+  if (foonegsi (1) != N)
+    abort ();
+
+  if (foonegsi (0) != -N)
+    abort ();
+
+  if (fooinvsi (1) != N)
+    abort ();
+
+  if (fooinvsi (0) != ~N)
+    abort ();
+
+  if (fooincdi (1) != N)
+    abort ();
+
+  if (fooincdi (0) != (N + 1))
+    abort ();
+
+  if (foonegdi (1) != N)
+    abort ();
+
+  if (foonegdi (0) != -N)
+    abort ();
+
+  if (fooinvdi (1) != N)
+    abort ();
+
+  if (fooinvdi (0) != ~N)
+    abort ();
+
+  if (largefooinc (0) != (M + 1))
+    abort ();
+
+  if (largefooinv (0) != ~M)
+    abort ();
+
+  if (largefooneg (0) != -M)
+    abort ();
+
+  if (largefooinc (1) != M)
+    abort ();
+
+  if (largefooinv (1) != M)
+    abort ();
+
+  if (largefooneg (1) != M)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "csel\tx\[0-9\]*.*" } } */
+/* { dg-final { scan-assembler-not "csel\tw\[0-9\]*.*" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]