[PATCH] aarch64: gimple fold aes[ed] [PR114522]
Andrew Pinski
quic_apinski@quicinc.com
Wed Feb 5 16:04:53 GMT 2025
Instead of waiting to get combine/rtl optimizations fixed here. This fixes the
builtins at the gimple level. It should provide for slightly faster compile time
since we have a simplification earlier on.
Built and tested for aarch64-linux-gnu.
gcc/ChangeLog:
PR target/114522
* config/aarch64/aarch64-builtins.cc (aarch64_fold_aes_op): New function.
(aarch64_general_gimple_fold_builtin): Call aarch64_fold_aes_op for crypto_aese
and crypto_aesd.
Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
gcc/config/aarch64/aarch64-builtins.cc | 30 ++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 6d5479c2e44..ab67194575d 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -4722,6 +4722,31 @@ aarch64_fold_combine (gcall *stmt)
return gimple_build_assign (gimple_call_lhs (stmt), ctor);
}
+/* Fold a call to vaeseq_u8 and vaesdq_u8.
+ That is `vaeseq_u8 (x ^ y, 0)` gets folded
+ into `vaeseq_u8 (x, y)`.*/
+static gimple *
+aarch64_fold_aes_op (gcall *stmt)
+{
+ tree arg0 = gimple_call_arg (stmt, 0);
+ tree arg1 = gimple_call_arg (stmt, 1);
+ if (integer_zerop (arg0))
+ arg0 = arg1;
+ else if (!integer_zerop (arg1))
+ return nullptr;
+ if (TREE_CODE (arg0) != SSA_NAME)
+ return nullptr;
+ if (!has_single_use (arg0))
+ return nullptr;
+ gimple *s = SSA_NAME_DEF_STMT (arg0);
+ if (!is_gimple_assign (s)
+ || gimple_assign_rhs_code (s) != BIT_XOR_EXPR)
+ return nullptr;
+ gimple_call_set_arg (stmt, 0, gimple_assign_rhs1 (s));
+ gimple_call_set_arg (stmt, 1, gimple_assign_rhs2 (s));
+ return stmt;
+}
+
/* Fold a call to vld1, given that it loads something of type TYPE. */
static gimple *
aarch64_fold_load (gcall *stmt, tree type)
@@ -4983,6 +5008,11 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
break;
+ VAR1 (BINOPU, crypto_aese, 0, DEFAULT, v16qi)
+ VAR1 (BINOPU, crypto_aesd, 0, DEFAULT, v16qi)
+ new_stmt = aarch64_fold_aes_op (stmt);
+ break;
+
/* Lower sqrt builtins to gimple/internal function sqrt. */
BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
new_stmt = gimple_build_call_internal (IFN_SQRT,
--
2.43.0
More information about the Gcc-patches
mailing list