This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[GCC][PATCH v2][Aarch64] Exploiting BFXIL when OR-ing two AND-operations with appropriate bitmasks
- From: Sam Tebbs <sam dot tebbs at arm dot com>
- To: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Cc: James Greenhalgh <james dot greenhalgh at arm dot com>, Marcus Shawcroft <marcus dot shawcroft at arm dot com>, Richard Earnshaw <richard dot earnshaw at arm dot com>, nd <nd at arm dot com>
- Date: Wed, 1 Aug 2018 16:07:23 +0100
- Subject: [GCC][PATCH v2][Aarch64] Exploiting BFXIL when OR-ing two AND-operations with appropriate bitmasks
Hi all,
This patch adds an optimisation that exploits the AArch64 BFXIL
instruction when or-ing the result of two bitwise and operations
with non-overlapping bitmasks
(e.g. (a & 0xFFFF0000) | (b & 0x0000FFFF)).
Example:
unsigned long long combine(unsigned long long a, unsigned long
long b) {
return (a & 0xffffffff00000000ll) | (b & 0x00000000ffffffffll);
}
void read(unsigned long long a, unsigned long long b, unsigned
long long *c) {
*c = combine(a, b);
}
When compiled with -O2, read would result in:
read:
and x5, x1, #0xffffffff
and x4, x0, #0xffffffff00000000
orr x4, x4, x5
str x4, [x2]
ret
But with this patch results in:
read:
mov x4, x0
bfxil x4, x1, 0, 32
str x4, [x2]
ret
Bootstrapped and regtested on aarch64-none-linux-gnu and
aarch64-none-elf with no regressions.
gcc/
2018-08-01 Sam Tebbs<sam.tebbs@arm.com>
PR target/85628
* config/aarch64/aarch64.md (*aarch64_bfxil):
Define.
* config/aarch64/constraints.md (Ulc): Define
* config/aarch64/aarch64-protos.h
(aarch64_is_left_consecutive): Define.
* config/aarch64/aarch64.c (aarch64_is_left_consecutive):
New function.
gcc/testsuite
2018-08-01 Sam Tebbs<sam.tebbs@arm.com>
PR target/85628
* gcc.target/aarch64/combine_bfxil.c: New file.
* gcc.target/aarch64/combine_bfxil_2.c: New file.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index af5db9c595385f7586692258f750b6aceb3ed9c8..01d9e1bd634572fcfa60208ba4dc541805af5ccd 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -574,4 +574,6 @@ rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt);
poly_uint64 aarch64_regmode_natural_size (machine_mode);
+bool aarch64_is_left_consecutive (HOST_WIDE_INT);
+
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fa01475aa9ee579b6a3b2526295b622157120660..3cfa51b15af3e241672f1383cf881c12a44494a5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1454,6 +1454,14 @@ aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
return SImode;
}
+/* Implement IS_LEFT_CONSECUTIVE. Check if I's bits are consecutive
+ ones from the MSB. */
+bool
+aarch64_is_left_consecutive (HOST_WIDE_INT i)
+{
+ return (i | (i - 1)) == HOST_WIDE_INT_M1;
+}
+
/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
that strcpy from constants will be faster. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index e9c16f9697b766a5c56b6269a83b7276654c5668..ff2db4af38e16630daeada79afc604c4696abf82 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5305,6 +5305,31 @@
[(set_attr "type" "rev")]
)
+(define_insn "*aarch64_bfxil<mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r,r")
+ (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0")
+ (match_operand:GPI 3 "const_int_operand" "n, Ulc"))
+ (and:GPI (match_operand:GPI 2 "register_operand" "0,r")
+ (match_operand:GPI 4 "const_int_operand" "Ulc, n"))))]
+ "(INTVAL (operands[3]) == ~INTVAL (operands[4]))
+ && (aarch64_is_left_consecutive (INTVAL (operands[3]))
+ || aarch64_is_left_consecutive (INTVAL (operands[4])))"
+ {
+ switch (which_alternative)
+ {
+ case 0:
+ operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[3])));
+ return "bfxil\\t%<w>0, %<w>1, 0, %3";
+ case 1:
+ operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[4])));
+ return "bfxil\\t%<w>0, %<w>2, 0, %3";
+ default:
+ gcc_unreachable ();
+ }
+ }
+ [(set_attr "type" "bfm")]
+)
+
;; There are no canonicalisation rules for the position of the lshiftrt, ashift
;; operations within an IOR/AND RTX, therefore we have two patterns matching
;; each valid permutation.
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 72cacdabdac52dcb40b480f7a5bfbf4997c742d8..5bae0b70bbd11013a9fb27ec19cf7467eb20135f 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -172,6 +172,13 @@
A constraint that matches the immediate constant -1."
(match_test "op == constm1_rtx"))
+(define_constraint "Ulc"
+ "@internal
+ A constraint that matches a constant integer whose bits are consecutive ones
+ from the MSB."
+ (and (match_code "const_int")
+ (match_test "aarch64_is_left_consecutive (ival)")))
+
(define_constraint "Usv"
"@internal
A constraint that matches a VG-based constant that can be loaded by
diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
new file mode 100644
index 0000000000000000000000000000000000000000..3bc1dd5b216477efe7494dbcdac7a5bf465af218
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
@@ -0,0 +1,98 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+extern void abort(void);
+
+unsigned long long
+combine_balanced (unsigned long long a, unsigned long long b)
+{
+ return (a & 0xffffffff00000000ll) | (b & 0x00000000ffffffffll);
+}
+
+unsigned long long
+combine_minimal (unsigned long long a, unsigned long long b)
+{
+ return (a & 0xfffffffffffffffe) | (b & 0x0000000000000001);
+}
+
+unsigned long long
+combine_unbalanced (unsigned long long a, unsigned long long b)
+{
+ return (a & 0xffffffffff000000ll) | (b & 0x0000000000ffffffll);
+}
+
+unsigned int
+combine_balanced_int (unsigned int a, unsigned int b)
+{
+ return (a & 0xffff0000ll) | (b & 0x0000ffffll);
+}
+
+unsigned int
+combine_unbalanced_int (unsigned int a, unsigned int b)
+{
+ return (a & 0xffffff00ll) | (b & 0x000000ffll);
+}
+
+__attribute__ ((noinline)) void
+foo (unsigned long long a, unsigned long long b, unsigned long long *c,
+ unsigned long long *d)
+{
+ *c = combine_minimal(a, b);
+ *d = combine_minimal(b, a);
+}
+
+__attribute__ ((noinline)) void
+foo2 (unsigned long long a, unsigned long long b, unsigned long long *c,
+ unsigned long long *d)
+{
+ *c = combine_balanced (a, b);
+ *d = combine_balanced (b, a);
+}
+
+__attribute__ ((noinline)) void
+foo3 (unsigned long long a, unsigned long long b, unsigned long long *c,
+ unsigned long long *d)
+{
+ *c = combine_unbalanced (a, b);
+ *d = combine_unbalanced (b, a);
+}
+
+void
+foo4 (unsigned int a, unsigned int b, unsigned int *c, unsigned int *d)
+{
+ *c = combine_balanced_int(a, b);
+ *d = combine_balanced_int(b, a);
+}
+
+void
+foo5 (unsigned int a, unsigned int b, unsigned int *c, unsigned int *d)
+{
+ *c = combine_unbalanced_int(a, b);
+ *d = combine_unbalanced_int(b, a);
+}
+
+int
+main(void)
+{
+ unsigned long long a = 0x0123456789ABCDEF, b = 0xFEDCBA9876543210, c, d;
+ foo3(a, b, &c, &d);
+ if(c != 0x0123456789543210) abort();
+ if(d != 0xfedcba9876abcdef) abort();
+ foo2(a, b, &c, &d);
+ if(c != 0x0123456776543210) abort();
+ if(d != 0xfedcba9889abcdef) abort();
+ foo(a, b, &c, &d);
+ if(c != 0x0123456789abcdee) abort();
+ if(d != 0xfedcba9876543211) abort();
+
+ unsigned int a2 = 0x01234567, b2 = 0xFEDCBA98, c2, d2;
+ foo4(a2, b2, &c2, &d2);
+ if(c2 != 0x0123ba98) abort();
+ if(d2 != 0xfedc4567) abort();
+ foo5(a2, b2, &c2, &d2);
+ if(c2 != 0x01234598) abort();
+ if(d2 != 0xfedcba67) abort();
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "bfxil\\t" 10 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..0fc140443bc67bcf12b93d72b7970e095620021e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long
+combine_non_consecutive (unsigned long long a, unsigned long long b)
+{
+ return (a & 0xfffffff200f00000ll) | (b & 0x00001000ffffffffll);
+}
+
+void
+foo4 (unsigned long long a, unsigned long long b, unsigned long long *c,
+ unsigned long long *d) {
+ /* { dg-final { scan-assembler-not "bfxil\\t" } } */
+ *c = combine_non_consecutive (a, b);
+ *d = combine_non_consecutive (b, a);
+}