This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][ARM][3/3] Expand mod by power of 2
- From: Kyrill Tkachov <kyrylo dot tkachov at arm dot com>
- To: Ramana Radhakrishnan <ramana dot radhakrishnan at foss dot arm dot com>, GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Richard Earnshaw <Richard dot Earnshaw at arm dot com>, Marcus Shawcroft <Marcus dot Shawcroft at arm dot com>, James Greenhalgh <James dot Greenhalgh at arm dot com>
- Date: Tue, 08 Sep 2015 09:33:41 +0100
- Subject: Re: [PATCH][ARM][3/3] Expand mod by power of 2
- Authentication-results: sourceware.org; auth=none
- References: <55B219AE dot 6010102 at arm dot com> <55ED5864 dot 9080607 at foss dot arm dot com>
On 07/09/15 10:27, Ramana Radhakrishnan wrote:
On 24/07/15 11:55, Kyrill Tkachov wrote:
commit d562629e36ba013b8f77956a74139330d191bc30
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Fri Jul 17 16:30:01 2015 +0100
[ARM][3/3] Expand mod by power of 2
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e1bc727..6ade07c 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9556,6 +9556,22 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
case MOD:
case UMOD:
+ /* MOD by a power of 2 can be expanded as:
+ rsbs r1, r0, #0
+ and r0, r0, #(n - 1)
+ and r1, r1, #(n - 1)
+ rsbpl r0, r1, #0. */
+ if (code == MOD
+ && CONST_INT_P (XEXP (x, 1))
+ && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+ && mode == SImode)
+ {
+ *cost += COSTS_N_INSNS (3)
+ + 2 * extra_cost->alu.logical
+ + extra_cost->alu.arith;
+ return true;
+ }
+
*cost = LIBCALL_COST (2);
return false; /* All arguments must be in registers. */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f341109..8301648 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1229,7 +1229,7 @@ (define_peephole2
""
)
-(define_insn "*subsi3_compare0"
+(define_insn "subsi3_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
(minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
@@ -2158,7 +2158,7 @@ (define_expand "andsi3"
)
; ??? Check split length for Thumb-2
-(define_insn_and_split "*arm_andsi3_insn"
+(define_insn_and_split "arm_andsi3_insn"
[(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r,r")
(and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
(match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
@@ -11105,6 +11105,78 @@ (define_expand "thumb_legacy_rev"
""
)
This shouldn't be necessary - you are just adding another interface to produce an and insn.
I see what you mean. Ok, I'll drop this.
+;; ARM-specific expansion of signed mod by power of 2
+;; using conditional negate.
+;; For r0 % n where n is a power of 2 produce:
+;; rsbs r1, r0, #0
+;; and r0, r0, #(n - 1)
+;; and r1, r1, #(n - 1)
+;; rsbpl r0, r1, #0
+
+(define_expand "modsi3"
+ [(match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_32BIT"
+ {
+ HOST_WIDE_INT val = INTVAL (operands[2]);
+
+ if (val <= 0
+ || exact_log2 (INTVAL (operands[2])) <= 0
+ || !const_ok_for_arm (INTVAL (operands[2]) - 1))
+ FAIL;
+
+ rtx mask = GEN_INT (val - 1);
+
+ /* In the special case of x0 % 2 we can do the even shorter:
+ cmp r0, #0
+ and r0, r0, #1
+ rsblt r0, r0, #0. */
+
+ if (val == 2)
+ {
+ rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+ rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
+
+ emit_insn (gen_rtx_SET (cc_reg,
+ gen_rtx_COMPARE (CCmode, operands[1], const0_rtx)));
+
+ rtx masked = gen_reg_rtx (SImode);
+ emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask));
Use emit_insn (gen_andsi3 (masked, operands[1], mask) instead and likewise below.
Ok, done that. A side effect of this is that since the andsi3 expander handles
any reg_or_int we can catch more masks this way. Also, due to the way the andsi3 expander
is written, for the mask 255 it will generate a zero_extend instead of an and.
This may or may not be optimal on some cores but perhaps we should look at the andsi3 expander
to make it more robust as a separate task.
+ emit_move_insn (operands[0],
+ gen_rtx_IF_THEN_ELSE (SImode, cond,
+ gen_rtx_NEG (SImode,
+ masked),
+ masked));
+ DONE;
+ }
+
+ rtx neg_op = gen_reg_rtx (SImode);
+ rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
+ operands[1]));
+
+ /* Extract the condition register and mode. */
+ rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
+ rtx cc_reg = SET_DEST (cmp);
+ rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
+
+ emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask));
+
+ rtx masked_neg = gen_reg_rtx (SImode);
+ emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask));
+
+ /* We want a conditional negate here, but emitting COND_EXEC rtxes
+ during expand does not always work. Do an IF_THEN_ELSE instead. */
+ emit_move_insn (operands[0],
+ gen_rtx_IF_THEN_ELSE (SImode, cond,
+ gen_rtx_NEG (SImode, masked_neg),
+ operands[0]));
+
+
+ DONE;
+ }
+)
+
(define_expand "bswapsi2"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c
new file mode 100644
index 0000000..2645c18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_2.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x
new file mode 100644
index 0000000..2b079a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+ return x % 2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c
new file mode 100644
index 0000000..567332c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_256.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x
new file mode 100644
index 0000000..c1de42c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+ return x % 256;
+}
diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c
new file mode 100644
index 0000000..93017a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_2.x"
+
+/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c
new file mode 100644
index 0000000..92ab05a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_256.x"
+
+/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */
OK with those changes if no regressions.
Thanks,
I'm attaching the updated patch.
I'll commit it when the aarch64 one is ok'd as the tests are partly shared with this patch.
Kyrill
2015-09-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/arm.md (*subsi3_compare0): Rename to...
(subsi3_compare0): ... This.
(modsi3): New define_expand.
* config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
when operand is power of 2.
2015-09-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/mod_2.x: New file.
* gcc.target/aarch64/mod_256.x: Likewise.
* gcc.target/arm/mod_2.c: New test.
* gcc.target/arm/mod_256.c: Likewise.
* gcc.target/aarch64/mod_2.c: Likewise.
* gcc.target/aarch64/mod_256.c: Likewise.
Ramana
commit 9a205ad46fe96e35b0dcbdbfcd89e2a2933a7cbd
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Fri Jul 17 16:30:01 2015 +0100
[ARM][3/3] Expand mod by power of 2
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index fa4e083..a81114d 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9580,6 +9580,24 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
return false; /* All arguments must be in registers. */
case MOD:
+ /* MOD by a power of 2 can be expanded as:
+ rsbs r1, r0, #0
+ and r0, r0, #(n - 1)
+ and r1, r1, #(n - 1)
+ rsbpl r0, r1, #0. */
+ if (CONST_INT_P (XEXP (x, 1))
+ && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+ && mode == SImode)
+ {
+ *cost += COSTS_N_INSNS (3);
+
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.logical
+ + extra_cost->alu.arith;
+ return true;
+ }
+
+ /* Fall-through. */
case UMOD:
*cost = LIBCALL_COST (2);
return false; /* All arguments must be in registers. */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index b6c2047..775ca25 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1229,7 +1229,7 @@ (define_peephole2
""
)
-(define_insn "*subsi3_compare0"
+(define_insn "subsi3_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
(minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
@@ -11142,6 +11142,75 @@ (define_expand "thumb_legacy_rev"
""
)
+;; ARM-specific expansion of signed mod by power of 2
+;; using conditional negate.
+;; For r0 % n where n is a power of 2 produce:
+;; rsbs r1, r0, #0
+;; and r0, r0, #(n - 1)
+;; and r1, r1, #(n - 1)
+;; rsbpl r0, r1, #0
+
+(define_expand "modsi3"
+ [(match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_32BIT"
+ {
+ HOST_WIDE_INT val = INTVAL (operands[2]);
+
+ if (val <= 0
+ || exact_log2 (val) <= 0)
+ FAIL;
+
+ rtx mask = GEN_INT (val - 1);
+
+ /* In the special case of x0 % 2 we can do the even shorter:
+ cmp r0, #0
+ and r0, r0, #1
+ rsblt r0, r0, #0. */
+
+ if (val == 2)
+ {
+ rtx cc_reg = arm_gen_compare_reg (LT,
+ operands[1], const0_rtx, NULL_RTX);
+ rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
+ rtx masked = gen_reg_rtx (SImode);
+
+ emit_insn (gen_andsi3 (masked, operands[1], mask));
+ emit_move_insn (operands[0],
+ gen_rtx_IF_THEN_ELSE (SImode, cond,
+ gen_rtx_NEG (SImode,
+ masked),
+ masked));
+ DONE;
+ }
+
+ rtx neg_op = gen_reg_rtx (SImode);
+ rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
+ operands[1]));
+
+ /* Extract the condition register and mode. */
+ rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
+ rtx cc_reg = SET_DEST (cmp);
+ rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
+
+ emit_insn (gen_andsi3 (operands[0], operands[1], mask));
+
+ rtx masked_neg = gen_reg_rtx (SImode);
+ emit_insn (gen_andsi3 (masked_neg, neg_op, mask));
+
+ /* We want a conditional negate here, but emitting COND_EXEC rtxes
+ during expand does not always work. Do an IF_THEN_ELSE instead. */
+ emit_move_insn (operands[0],
+ gen_rtx_IF_THEN_ELSE (SImode, cond,
+ gen_rtx_NEG (SImode, masked_neg),
+ operands[0]));
+
+
+ DONE;
+ }
+)
+
(define_expand "bswapsi2"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c
new file mode 100644
index 0000000..2645c18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_2.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x
new file mode 100644
index 0000000..2b079a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+ return x % 2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c
new file mode 100644
index 0000000..567332c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_256.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x
new file mode 100644
index 0000000..c1de42c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+ return x % 256;
+}
diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c
new file mode 100644
index 0000000..93017a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_2.x"
+
+/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c
new file mode 100644
index 0000000..ccb7f3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_256.x"
+
+/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
+