[AArch64] Enable generation of FRINTNZ instructions
Andre Vieira (lists)
andre.simoesdiasvieira@arm.com
Wed Nov 17 13:30:31 GMT 2021
On 16/11/2021 12:10, Richard Biener wrote:
> On Fri, 12 Nov 2021, Andre Simoes Dias Vieira wrote:
>
>> On 12/11/2021 10:56, Richard Biener wrote:
>>> On Thu, 11 Nov 2021, Andre Vieira (lists) wrote:
>>>
>>>> Hi,
>>>>
>>>> This patch introduces two IFN's FTRUNC32 and FTRUNC64, the corresponding
>>>> optabs and mappings. It also creates a backend pattern to implement them
>>>> for
>>>> aarch64 and a match.pd pattern to idiom recognize these.
>>>> These IFN's (and optabs) represent a truncation towards zero, as if
>>>> performed
>>>> by first casting it to a signed integer of 32 or 64 bits and then back to
>>>> the
>>>> same floating point type/mode.
>>>>
>>>> The match.pd pattern choses to use these, when supported, regardless of
>>>> trapping math, since these new patterns mimic the original behavior of
>>>> truncating through an integer.
>>>>
>>>> I didn't think any of the existing IFN's represented these. I know it's a
>>>> bit
>>>> late in stage 1, but I thought this might be OK given it's only used by a
>>>> single target and should have very little impact on anything else.
>>>>
>>>> Bootstrapped on aarch64-none-linux.
>>>>
>>>> OK for trunk?
>>> On the RTL side ftrunc32/ftrunc64 would probably be better a conversion
>>> optab (with two modes), so not
>>>
>>> +OPTAB_D (ftrunc32_optab, "ftrunc$asi2")
>>> +OPTAB_D (ftrunc64_optab, "ftrunc$adi2")
>>>
>>> but
>>>
>>> OPTAB_CD (ftrunc_shrt_optab, "ftrunc$a$I$b2")
>>>
>>> or so? I know that gets somewhat awkward for the internal function,
>>> but IMHO we shouldn't tie our hands because of that?
>> I tried doing this originally, but indeed I couldn't find a way to correctly
>> tie the internal function to it.
>>
>> direct_optab_supported_p with multiple types expect those to be of the same
>> mode. I see convert_optab_supported_p does but I don't know how that is
>> used...
>>
>> Any ideas?
> No "nice" ones. The "usual" way is to provide fake arguments that
> specify the type/mode. We could use an integer argument directly
> secifying the mode (then the IL would look host dependent - ugh),
> or specify a constant zero in the intended mode (less visibly
> obvious - but at least with -gimple dumping you'd see the type...).
Hi,
So I reworked this to have a single optab and IFN. This required a bit
of fiddling with custom expander and supported_p functions for the IFN.
I decided to pass a MAX_INT for the 'int' type to the IFN to be able to
pass on the size of the int we use as an intermediate cast. I tried 0
first, but gcc was being too smart and just demoted it to an 'int' for
the long long test-cases.
Bootstrapped on aarch64-none-linux.
OK for trunk?
gcc/ChangeLog:
* config/aarch64/aarch64.md (ftrunc<mode><frintnz_mode>2): New
pattern.
* config/aarch64/iterators.md (FRINTZ): New iterator.
* doc/md.texi: New entry for ftrunc pattern name.
* internal-fn.def (FTRUNC_INT): New IFN.
* match.pd: Add to the existing TRUNC pattern match.
* optabs.def (ftrunc_int): New entry.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/merge_trunc1.c: Adapted to skip if frintNz
instruction available.
* lib/target-supports.exp: Added arm_v8_5a_frintnzx_ok target.
* gcc.target/aarch64/frintnz.c: New test.
-------------- next part --------------
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4035e061706793849c68ae09bcb2e4b9580ab7b6..62adbc4cb6bbbe0c856f9fbe451aee08f2dea3b5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7345,6 +7345,14 @@ (define_insn "despeculate_simpleti"
(set_attr "speculation_barrier" "true")]
)
+(define_expand "ftrunc<mode><frintnz_mode>2"
+ [(set (match_operand:VSFDF 0 "register_operand" "=w")
+ (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
+ FRINTNZ))]
+ "TARGET_FRINT && TARGET_FLOAT
+ && !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)"
+)
+
(define_insn "aarch64_<frintnzs_op><mode>"
[(set (match_operand:VSFDF 0 "register_operand" "=w")
(unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..49510488a2a800689e95c399f2e6c967b566516d 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3067,6 +3067,8 @@ (define_int_iterator FCMLA [UNSPEC_FCMLA
(define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X
UNSPEC_FRINT64Z UNSPEC_FRINT64X])
+(define_int_iterator FRINTNZ [UNSPEC_FRINT32Z UNSPEC_FRINT64Z])
+
(define_int_iterator SVE_BRK_UNARY [UNSPEC_BRKA UNSPEC_BRKB])
(define_int_iterator SVE_BRK_BINARY [UNSPEC_BRKN UNSPEC_BRKPA UNSPEC_BRKPB])
@@ -3482,6 +3484,8 @@ (define_int_attr f16mac1 [(UNSPEC_FMLAL "a") (UNSPEC_FMLSL "s")
(define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x")
(UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")])
+(define_int_attr frintnz_mode [(UNSPEC_FRINT32Z "si") (UNSPEC_FRINT64Z "di")])
+
;; The condition associated with an UNSPEC_COND_<xx>.
(define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq")
(UNSPEC_COND_CMPGE_WIDE "ge")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 41f1850bf6e95005647ca97a495a97d7e184d137..7bd66818144e87e1dca2ef13bef1d6f21f239570 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6175,6 +6175,13 @@ operands; otherwise, it may not.
This pattern is not allowed to @code{FAIL}.
+@cindex @code{ftrunc@var{m}@var{n}2} instruction pattern
+@item @samp{ftrunc@var{m}@var{n}2}
+Truncate operand 1 to a @var{n} mode signed integer, towards zero, and store
+the result in operand 0. Both operands have mode @var{m}, which is a scalar or
+vector floating-point mode.
+
+
@cindex @code{round@var{m}2} instruction pattern
@item @samp{round@var{m}2}
Round operand 1 to the nearest integer, rounding away from zero in the
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 0cba95411a63423484dda5b1251f47de24e926ba..d8306b50807609573c2ff612e2a83dcf1c55d1de 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -130,6 +130,7 @@ init_internal_fns ()
#define fold_left_direct { 1, 1, false }
#define mask_fold_left_direct { 1, 1, false }
#define check_ptrs_direct { 0, 0, false }
+#define ftrunc_int_direct { 0, 1, true }
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
@@ -156,6 +157,29 @@ get_multi_vector_move (tree array_type, convert_optab optab)
return convert_optab_handler (optab, imode, vmode);
}
+/* Expand FTRUNC_INT call STMT using optab OPTAB. */
+
+static void
+expand_ftrunc_int_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+ class expand_operand ops[2];
+ tree lhs, float_type, int_type;
+ rtx target, op;
+
+ lhs = gimple_call_lhs (stmt);
+ target = expand_normal (lhs);
+ op = expand_normal (gimple_call_arg (stmt, 0));
+
+ float_type = TREE_TYPE (lhs);
+ int_type = TREE_TYPE (gimple_call_arg (stmt, 1));
+
+ create_output_operand (&ops[0], target, TYPE_MODE (float_type));
+ create_input_operand (&ops[1], op, TYPE_MODE (float_type));
+
+ expand_insn (convert_optab_handler (optab, TYPE_MODE (float_type),
+ TYPE_MODE (int_type)), 2, ops);
+}
+
/* Expand LOAD_LANES call STMT using optab OPTAB. */
static void
@@ -3712,6 +3736,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
#define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
#define direct_check_ptrs_optab_supported_p direct_optab_supported_p
#define direct_vec_set_optab_supported_p direct_optab_supported_p
+#define direct_ftrunc_int_optab_supported_p convert_optab_supported_p
/* Return the optab used by internal function FN. */
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index bb13c6cce1bf55633760bc14980402f1f0ac1689..fb97d37cecae17cdb6444e7f3391361b214f0712 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -269,6 +269,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
+DEF_INTERNAL_OPTAB_FN (FTRUNC_INT, ECF_CONST, ftruncint, ftrunc_int)
/* Binary math functions. */
DEF_INTERNAL_FLT_FN (ATAN2, ECF_CONST, atan2, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index a319aefa8081ac177981ad425c461f8a771128f4..c37aa023b57838eba80c7a212ff1038eb6eed861 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3713,12 +3713,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
trapping behaviour, so require !flag_trapping_math. */
#if GIMPLE
(simplify
- (float (fix_trunc @0))
- (if (!flag_trapping_math
- && types_match (type, TREE_TYPE (@0))
- && direct_internal_fn_supported_p (IFN_TRUNC, type,
- OPTIMIZE_FOR_BOTH))
- (IFN_TRUNC @0)))
+ (float (fix_trunc@1 @0))
+ (if (types_match (type, TREE_TYPE (@0)))
+ (if (TYPE_SIGN (TREE_TYPE (@1)) == SIGNED
+ && direct_internal_fn_supported_p (IFN_FTRUNC_INT, type,
+ TREE_TYPE (@1), OPTIMIZE_FOR_BOTH))
+ (with {
+ tree int_type = TREE_TYPE (@1);
+ unsigned HOST_WIDE_INT max_int_c
+ = (1ULL << (element_precision (int_type) - 1)) - 1;
+ }
+ (IFN_FTRUNC_INT @0 { build_int_cst (int_type, max_int_c); }))
+ (if (!flag_trapping_math
+ && direct_internal_fn_supported_p (IFN_TRUNC, type,
+ OPTIMIZE_FOR_BOTH))
+ (IFN_TRUNC @0)))))
#endif
/* If we have a narrowing conversion to an integral type that is fed by a
diff --git a/gcc/optabs.def b/gcc/optabs.def
index b889ad2e5a08613db51d16d072080ac6cb48404f..57d259d33409265df3af1646d123e4ab216c34c8 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -63,6 +63,7 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
+OPTAB_CD(ftruncint_optab, "ftrunc$a$b2")
OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz.c b/gcc/testsuite/gcc.target/aarch64/frintnz.c
new file mode 100644
index 0000000000000000000000000000000000000000..2e1971f8aa11d8b95f454d03a03e050a3bf96747
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/frintnz.c
@@ -0,0 +1,88 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.5-a" } */
+/* { dg-require-effective-target arm_v8_5a_frintnzx_ok } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+** ...
+** frint32z s0, s0
+** ...
+*/
+float
+f1 (float x)
+{
+ int y = x;
+ return (float) y;
+}
+
+/*
+** f2:
+** ...
+** frint64z s0, s0
+** ...
+*/
+float
+f2 (float x)
+{
+ long long int y = x;
+ return (float) y;
+}
+
+/*
+** f3:
+** ...
+** frint32z d0, d0
+** ...
+*/
+double
+f3 (double x)
+{
+ int y = x;
+ return (double) y;
+}
+
+/*
+** f4:
+** ...
+** frint64z d0, d0
+** ...
+*/
+double
+f4 (double x)
+{
+ long long int y = x;
+ return (double) y;
+}
+
+float
+f1_dont (float x)
+{
+ unsigned int y = x;
+ return (float) y;
+}
+
+float
+f2_dont (float x)
+{
+ unsigned long long int y = x;
+ return (float) y;
+}
+
+double
+f3_dont (double x)
+{
+ unsigned int y = x;
+ return (double) y;
+}
+
+double
+f4_dont (double x)
+{
+ unsigned long long int y = x;
+ return (double) y;
+}
+
+/* Make sure the 'dont's don't generate any frintNz. */
+/* { dg-final { scan-assembler-times {frint32z} 2 } } */
+/* { dg-final { scan-assembler-times {frint64z} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
index 07217064e2ba54fcf4f5edc440e6ec19ddae66e1..3b34dc3ad79f1406a41ec4c00db10347ba1ca2c4 100644
--- a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
+++ b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
@@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math" } */
+/* { dg-skip-if "" { arm_v8_5a_frintnzx_ok } } */
float
f1 (float x)
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 8cbda192fe0fae59ea208ee43696b4d22c43e61e..7fa1659ce734257f3cd96f1e2e50ace4d02dcf51 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -11365,6 +11365,33 @@ proc check_effective_target_arm_v8_3a_bkey_directive { } {
}]
}
+# Return 1 if the target supports ARMv8.5 scalar and Adv.Simd FRINT32[ZX]
+# and FRINT64[ZX] instructions, 0 otherwise. The test is valid for AArch64.
+# Record the command line options needed.
+
+proc check_effective_target_arm_v8_5a_frintnzx_ok_nocache { } {
+
+ if { ![istarget aarch64*-*-*] } {
+ return 0;
+ }
+
+ if { [check_no_compiler_messages_nocache \
+ arm_v8_5a_frintnzx_ok assembly {
+ #if !defined (__ARM_FEATURE_FRINT)
+ #error "__ARM_FEATURE_FRINT not defined"
+ #endif
+ } [current_compiler_flags]] } {
+ return 1;
+ }
+
+ return 0;
+}
+
+proc check_effective_target_arm_v8_5a_frintnzx_ok { } {
+ return [check_cached_effective_target arm_v8_5a_frintnzx_ok \
+ check_effective_target_arm_v8_5a_frintnzx_ok_nocache]
+}
+
# Return 1 if the target supports executing the Armv8.1-M Mainline Low
# Overhead Loop, 0 otherwise. The test is valid for ARM.
More information about the Gcc-patches
mailing list