This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[GCC][PATCH][Aarch64] Exploiting BFXIL when OR-ing two AND-operations with appropriate bitmasks


Hi all,

This patch adds an optimisation that exploits the AArch64 BFXIL instruction
when or-ing the result of two bitwise and operations with non-overlapping
bitmasks (e.g. (a & 0xFFFF0000) | (b & 0x0000FFFF)).

Example:

unsigned long long combine(unsigned long long a, unsigned long long b) {
  return (a & 0xffffffff00000000ll) | (b & 0x00000000ffffffffll);
}

void read2(unsigned long long a, unsigned long long b, unsigned long long *c,
  unsigned long long *d) {
  *c = combine(a, b); *d = combine(b, a);
}

When compiled with -O2, read2 would result in:

read2:
  and   x5, x1, #0xffffffff
  and   x4, x0, #0xffffffff00000000
  orr   x4, x4, x5
  and   x1, x1, #0xffffffff00000000
  and   x0, x0, #0xffffffff
  str   x4, [x2]
  orr   x0, x0, x1
  str   x0, [x3]
  ret

But with this patch results in:

read2:
  mov   x4, x1
  bfxil x4, x0, 0, 32
  str   x4, [x2]
  bfxil x0, x1, 0, 32
  str   x0, [x3]
  ret
  
Bootstrapped and regtested on aarch64-none-linux-gnu and aarch64-none-elf with no regressions.


gcc/
2018-07-11  Sam Tebbs  <sam.tebbs@arm.com>

        * config/aarch64/aarch64.md (*aarch64_bfxil, *aarch64_bfxil_alt):
        Define.
        * config/aarch64/aarch64-protos.h (aarch64_is_left_consecutive):
        Define.
        * config/aarch64/aarch64.c (aarch64_is_left_consecutive): New function.

gcc/testsuite
2018-07-11  Sam Tebbs  <sam.tebbs@arm.com>

        * gcc.target/aarch64/combine_bfxil.c: New file.
        * gcc.target/aarch64/combine_bfxil_2.c: New file.


   
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 514ddc4..b025cd6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -558,4 +558,6 @@ rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt);
 
 poly_uint64 aarch64_regmode_natural_size (machine_mode);
 
+bool aarch64_is_left_consecutive (HOST_WIDE_INT);
+
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d75d45f..884958b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1439,6 +1439,14 @@ aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
     return SImode;
 }
 
+/* Implement IS_LEFT_CONSECUTIVE.  Check if an integer's bits are consecutive
+   ones from the MSB.  */
+bool
+aarch64_is_left_consecutive (HOST_WIDE_INT i)
+{
+  return (i | (i - 1)) == HOST_WIDE_INT_M1;
+}
+
 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
    that strcpy from constants will be faster.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a014a01..383d699 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4844,6 +4844,42 @@
   [(set_attr "type" "rev")]
 )
 
+(define_insn "*aarch64_bfxil"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+    (ior:DI (and:DI (match_operand:DI 1 "register_operand" "r")
+		    (match_operand 3 "const_int_operand"))
+	    (and:DI (match_operand:DI 2 "register_operand" "0")
+		    (match_operand 4 "const_int_operand"))))]
+  "INTVAL (operands[3]) == ~INTVAL (operands[4])
+    && aarch64_is_left_consecutive (INTVAL (operands[3]))"
+  {
+    HOST_WIDE_INT op4 = INTVAL (operands[4]);
+    operands[3] = GEN_INT (64 - ceil_log2 (op4));
+    output_asm_insn ("bfxil\\t%0, %1, 0, %3", operands);
+    return "";
+  }
+  [(set_attr "type" "bfx")]
+)
+
+; An alternate bfxil pattern where the second bitmask is the smallest, and so
+; the first register used is changed instead of the second
+(define_insn "*aarch64_bfxil_alt"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+    (ior:DI (and:DI (match_operand:DI 1 "register_operand" "0")
+		    (match_operand 3 "const_int_operand"))
+	    (and:DI (match_operand:DI 2 "register_operand" "r")
+		    (match_operand 4 "const_int_operand"))))]
+  "INTVAL (operands[3]) == ~INTVAL (operands[4])
+    && aarch64_is_left_consecutive (INTVAL (operands[4]))"
+  {
+    HOST_WIDE_INT op3 = INTVAL (operands[3]);
+    operands[3] = GEN_INT (64 - ceil_log2 (op3));
+    output_asm_insn ("bfxil\\t%0, %2, 0, %3", operands);
+    return "";
+  }
+  [(set_attr "type" "bfx")]
+)
+
 ;; There are no canonicalisation rules for the position of the lshiftrt, ashift
 ;; operations within an IOR/AND RTX, therefore we have two patterns matching
 ;; each valid permutation.
diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
new file mode 100644
index 0000000..a0c6be4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long
+combine_balanced (unsigned long long a, unsigned long long b)
+{
+  return (a & 0xffffffff00000000ll) | (b & 0x00000000ffffffffll);
+}
+
+
+unsigned long long
+combine_unbalanced (unsigned long long a, unsigned long long b)
+{
+  return (a & 0xffffffffff000000ll) | (b & 0x0000000000ffffffll);
+}
+
+void
+foo2 (unsigned long long a, unsigned long long b, unsigned long long *c,
+  unsigned long long *d)
+{
+  *c = combine_balanced(a, b);
+  *d = combine_balanced(b, a);
+}
+
+void
+foo3 (unsigned long long a, unsigned long long b, unsigned long long *c,
+  unsigned long long *d)
+{
+  *c = combine_unbalanced(a, b);
+  *d = combine_unbalanced(b, a);
+}
+
+/* { dg-final { scan-assembler-times "bfxil\\t" 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c
new file mode 100644
index 0000000..8237d94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long
+combine_non_consecutive (unsigned long long a, unsigned long long b)
+{
+  return (a & 0xfffffff200f00000ll) | (b & 0x00001000ffffffffll);
+}
+
+void
+foo4 (unsigned long long a, unsigned long long b, unsigned long long *c,
+  unsigned long long *d) {
+  /* { dg-final { scan-assembler-not "bfxil\\t" } } */
+  *c = combine_non_consecutive(a, b);
+  *d = combine_non_consecutive(b, a);
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]