]> gcc.gnu.org Git - gcc.git/commitdiff
[AArch64] Add combine pattern to fuse AESE/AESMC instructions
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 14 May 2018 16:29:13 +0000 (16:29 +0000)
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>
Mon, 14 May 2018 16:29:13 +0000 (16:29 +0000)
When the AESE,AESD and AESMC, AESMC instructions are generated through the appropriate arm_neon.h intrinsics
we really want to keep them together when the AESE feeds into an AESMC and fusion is supported by the target CPU.
We have macro-fusion hooks and scheduling model forwarding paths defined to facilitate that.
It is, however, not always enough.

This patch adds another mechanism for doing that.
When we can detect during combine that the required dependency is exists (AESE -> AESMC, AESD -> AESIMC)
just keep them together with a combine pattern throughout the rest of compilation.
We won't ever want to split them.

The testcases generate 4 AESE(D) instructions in a block followed by 4 AES(I)MC instructions that
consume the corresponding results and it also adds a bunch of computations in-between so that the
AESE and AESMC instructions are not trivially back-to-back, thus exercising the compiler's ability
to bring them together.

With this patch all 4 pairs are fused whereas before a couple of fusions would be missed due to intervening
arithmetic and memory instructions.

* config/aarch64/aarch64-simd.md (*aarch64_crypto_aese_fused):
New pattern.
(aarch64_crypto_aesd_fused): Likewise.

* gcc.target/aarch64/crypto-fuse-1.c: New test.
* gcc.target/aarch64/crypto-fuse-2.c: Likewise.

From-SVN: r260234

gcc/ChangeLog
gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c [new file with mode: 0644]

index 553c21e1370592691785ede3c602d4d17903339c..0ad13682cfd639ce67e857e0f5ab706cb8901c55 100644 (file)
@@ -1,3 +1,9 @@
+2018-05-14  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/aarch64-simd.md (*aarch64_crypto_aese_fused):
+       New pattern.
+       (aarch64_crypto_aesd_fused): Likewise.
+
 2018-05-14  Wilco Dijkstra  <wdijkstr@arm.com>
 
        * config/aarch64/aarch64.md (mov<mode>): Remove '*' in alternatives.
index 1154fc3d58deaa33413ea3050ff7feec37f092a6..9cfd4d30515a0162e071d4a934ef547e9beed8b6 100644 (file)
       (const_string "yes")])]
 )
 
+;; When AESE/AESMC fusion is enabled we really want to keep the two together
+;; and enforce the register dependency without scheduling or register
+;; allocation messing up the order or introducing moves inbetween.
+;;  Mash the two together during combine.
+
+(define_insn "*aarch64_crypto_aese_fused"
+  [(set (match_operand:V16QI 0 "register_operand" "=&w")
+       (unspec:V16QI
+         [(unspec:V16QI
+           [(match_operand:V16QI 1 "register_operand" "0")
+            (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
+         ] UNSPEC_AESMC))]
+  "TARGET_SIMD && TARGET_AES
+   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+  "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
+  [(set_attr "type" "crypto_aese")
+   (set_attr "length" "8")]
+)
+
+;; When AESD/AESIMC fusion is enabled we really want to keep the two together
+;; and enforce the register dependency without scheduling or register
+;; allocation messing up the order or introducing moves inbetween.
+;;  Mash the two together during combine.
+
+(define_insn "*aarch64_crypto_aesd_fused"
+  [(set (match_operand:V16QI 0 "register_operand" "=&w")
+       (unspec:V16QI
+         [(unspec:V16QI
+           [(match_operand:V16QI 1 "register_operand" "0")
+            (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
+         ] UNSPEC_AESIMC))]
+  "TARGET_SIMD && TARGET_AES
+   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+  "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
+  [(set_attr "type" "crypto_aese")
+   (set_attr "length" "8")]
+)
+
 ;; sha1
 
 (define_insn "aarch64_crypto_sha1hsi"
index 259d578eb0a3993c0860c59960ccb6a7f37a30cd..09d97e0364c03f241f7c7efc0749052eee5d0796 100644 (file)
@@ -1,3 +1,8 @@
+2018-05-14  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * gcc.target/aarch64/crypto-fuse-1.c: New test.
+       * gcc.target/aarch64/crypto-fuse-2.c: Likewise.
+
 2018-05-14  Wilco Dijkstra  <wdijkstr@arm.com>
 
        * gcc.target/aarch64/vmov_n_1.c: Update test.
diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c
new file mode 100644 (file)
index 0000000..d8adc89
--- /dev/null
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
+
+#include <arm_neon.h>
+
+#define AESE(r, v, key) (r = vaeseq_u8 ((v), (key)));
+#define AESMC(r, i) (r = vaesmcq_u8 (i))
+
+uint8x16_t dummy;
+uint8x16_t a;
+uint8x16_t b;
+uint8x16_t c;
+uint8x16_t d;
+uint8x16_t e;
+
+void
+foo (void)
+{
+  AESE (a, a, e);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESE (b, b, e);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESE (c, c, e);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESE (d, d, e);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+
+  AESMC (a, a);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESMC (b, b);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESMC (c, c);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESMC (d, d);
+}
+
+/* { dg-final { scan-assembler-times "crypto_aese_fused" 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c
new file mode 100644 (file)
index 0000000..b12df2d
--- /dev/null
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
+
+#include <arm_neon.h>
+
+#define AESE(r, v, key) (r = vaesdq_u8 ((v), (key)));
+#define AESMC(r, i) (r = vaesimcq_u8 (i))
+
+uint8x16_t dummy;
+uint8x16_t a;
+uint8x16_t b;
+uint8x16_t c;
+uint8x16_t d;
+uint8x16_t e;
+
+void
+foo (void)
+{
+  AESE (a, a, e);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESE (b, b, e);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESE (c, c, e);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESE (d, d, e);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+
+  AESMC (a, a);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESMC (b, b);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESMC (c, c);
+  dummy = vaddq_u8 (dummy, dummy);
+  dummy = vaddq_u8 (dummy, dummy);
+  AESMC (d, d);
+}
+
+/* { dg-final { scan-assembler-times "crypto_aesd_fused" 4 } } */
+
This page took 0.127975 seconds and 5 git commands to generate.