[PATCHv2 2/2] aarch64: Add support for _BitInt

Richard Sandiford richard.sandiford@arm.com
Thu Mar 28 15:00:46 GMT 2024


"Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes:
> This patch adds support for C23's _BitInt for the AArch64 port when 
> compiling for little endianness.  Big Endianness requires further 
> target-agnostic support and we therefor disable it for now.
>
> The tests expose some suboptimal codegen for which I'll create PR's for 
> optimizations after this goes in.
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.cc (TARGET_C_BITINT_TYPE_INFO): Declare MACRO.
> 	(aarch64_bitint_type_info): New function.
> 	(aarch64_return_in_memory_1): Return large _BitInt's in memory.
> 	(aarch64_function_arg_alignment): Adapt to correctly return the ABI
> 	mandated alignment of _BitInt(N) where N > 128 as the alignment of
> 	TImode.
> 	(aarch64_composite_type_p): Return true for _BitInt(N), where N > 128.
>
> libgcc/ChangeLog:
>
> 	* config/aarch64/t-softfp (softfp_extras): Add floatbitinthf,
> 	floatbitintbf, floatbitinttf and fixtfbitint.
> 	* config/aarch64/libgcc-softfp.ver (GCC_14.0.0): Add __floatbitinthf,
> 	__floatbitintbf, __floatbitinttf and __fixtfbitint.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/bitint-alignments.c: New test.
> 	* gcc.target/aarch64/bitint-args.c: New test.
> 	* gcc.target/aarch64/bitint-sizes.c: New test.
> 	* gcc.target/aarch64/bitfield-bitint-abi.h: New header.
> 	* gcc.target/aarch64/bitfield-bitint-abi-align16.c: New test.
> 	* gcc.target/aarch64/bitfield-bitint-abi-align8.c: New test.

Since we don't support big-endian yet, I assume the tests should be
conditional on aarch64_little_endian.

> [...]
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..048d04e4c1bf90215892aa0173f22226246a097d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> @@ -0,0 +1,378 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-stack-protector -save-temps -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#define ALIGN 16
> +#include "bitfield-bitint-abi.h"
> +
> +// f1-f16 are all the same
> +
> +/*
> +** f1:
> +**	and	x0, x2, 1
> +**	ret
> +*/
> +/*
> +** f8:
> +**	and	x0, x2, 1
> +**	ret
> +*/
> +/*
> +** f16:
> +**	and	x0, x2, 1
> +**	ret
> +*/
> +
> +/* fp seems to be unable to optimize away stack-usage, TODO: to fix.  */
> +
> +/*
> +** fp:
> +**...
> +**	and	x0, x1, 1
> +**...
> +**	ret
> +*/
> +
> +// all other f1p-f8p generate the same code, for f16p the value comes from x2
> +/*
> +** f1p:
> +**	and	x0, x1, 1
> +**	ret
> +*/
> +/*
> +** f8p:
> +**	and	x0, x1, 1
> +**	ret
> +*/
> +/*
> +** f16p:
> +**	and	x0, x2, 1
> +**	ret
> +*/
> +
> +// g1-g16 are all the same
> +/*
> +** g1:
> +**	mov	(x[0-9]+), x0
> +**	mov	w0, w1
> +**	and	x4, \1, 9223372036854775807
> +**	and	x2, \1, 1
> +**	mov	x3, 0
> +**	b	f1
> +*/
> +
> +/*
> +** g8:
> +**	mov	(x[0-9]+), x0
> +**	mov	w0, w1
> +**	and	x4, \1, 9223372036854775807
> +**	and	x2, \1, 1
> +**	mov	x3, 0
> +**	b	f8
> +*/
> +/*
> +** g16:
> +**	mov	(x[0-9]+), x0
> +**	mov	w0, w1
> +**	and	x4, \1, 9223372036854775807
> +**	and	x2, \1, 1
> +**	mov	x3, 0
> +**	b	f16
> +*/
> +
> +// again gp different from the rest
> +
> +/*
> +** gp:
> +**	sub	sp, sp, #16
> +**	mov	(x[0-9]+), x0
> +**	mov	w0, w1
> +**	sbfx	x([0-9]+), \1, 0, 63
> +**	mov	(w[0-9]+), 0
> +**	bfi	\3, w\2, 0, 1
> +**	and	x3, x\2, 9223372036854775807
> +**	mov	x2, 0
> +**	str	xzr, \[sp\]
> +**	strb	\3, \[sp\]
> +**	ldr	x1, \[sp\]
> +**	add	sp, sp, 16
> +**	b	fp
> +*/
> +
> +// g1p-g8p are all the same, g16p uses x2 to pass parameter to f16p
> +
> +/*
> +** g1p:
> +**	mov	(w[0-9]+), w1
> +**	and	x3, x0, 9223372036854775807
> +**	and	x1, x0, 1
> +**	mov	x2, 0
> +**	mov	w0, \1
> +**	b	f1p
> +*/
> +/*
> +** g8p:
> +**	mov	(w[0-9]+), w1
> +**	and	x3, x0, 9223372036854775807
> +**	and	x1, x0, 1
> +**	mov	x2, 0
> +**	mov	w0, \1
> +**	b	f8p
> +*/
> +/*
> +** g16p:
> +**	mov	(x[0-9]+), x0
> +**	mov	w0, w1
> +**	and	x4, \1, 9223372036854775807
> +**	and	x2, \1, 1
> +**	mov	x3, 0
> +**	b	f16p
> +*/
> +
> +// f*_stack are all the same
> +/*
> +** f1_stack:
> +**	ldr	(x[0-9]+), \[sp, 16\]
> +**	and	x0, \1, 1
> +**	ret
> +*/
> +/*
> +** f8_stack:
> +**	ldr	(x[0-9]+), \[sp, 16\]
> +**	and	x0, \1, 1
> +**	ret
> +*/
> +/*
> +** f16_stack:
> +**	ldr	(x[0-9]+), \[sp, 16\]
> +**	and	x0, \1, 1
> +**	ret
> +*/
> +
> +// fp{,1,8}_stack are all the same but fp16_stack loads from sp+16
> +/*
> +** fp_stack:
> +**	ldr	(x[0-9]+), \[sp, 8\]
> +**	and	x0, \1, 1
> +**	ret
> +*/
> +/*
> +** f1p_stack:
> +**	ldr	(x[0-9]+), \[sp, 8\]
> +**	and	x0, \1, 1
> +**	ret
> +*/
> +/*
> +** f8p_stack:
> +**	ldr	(x[0-9]+), \[sp, 8\]
> +**	and	x0, \1, 1
> +**	ret
> +*/
> +
> +/*
> +** f16p_stack:
> +**	ldr	(x[0-9]+), \[sp, 16\]
> +**	and	x0, \1, 1
> +**	ret
> +*/
> +
> +/*
> +** gp_stack:
> +**...
> +**	mov	x([0-9]+), x0
> +**	sxtw	(x[0-9]+), w1
> +**	mov	x0, \2
> +**	and	(x[0-9]+), \2, 9223372036854775807

I assume this is x7, is that right?  x7 doesn't seem to be set elsewhere.
If so, I think this is one case that we need to match as x7 rather than
capture.  Similarly for the other stack tests, including in *-align8.c.

> +**	mov	(w[0-9]+), 0
> +**	bfi	\4, w\1, 0, 1
> +**	strb	wzr, \[sp, 16\]
> +**	mov	x6, \3
> +**	mov	x5, \3
> +**	mov	x4, \3
> +**	mov	x3, \3
> +**	mov	x2, \3
> +**	str	xzr, \[sp, 48\]
> +**	strb	\4, \[sp, 48\]
> +**	ldr	(x[0-9]+), \[sp, 48\]
> +**	stp	\3, \5, \[sp\]
> +**	mov	x1, \3
> +**	bl	fp_stack
> +**	sbfx	x0, x0, 0, 63
> +**...
> +**	ret
> +*/
> [...]
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..11f0580fd60c3d619126c5b41d646e22374c3593
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
> @@ -0,0 +1,380 @@
> [...]
> +/*
> +** g16:
> +**	mov	x2, x0

This set and...

> +**	mov	w0, w1
> +**	and	x4, x2, 9223372036854775807
> +**	and	x2, x2, 1

...these two reads should be captured.  (The destination of the last
instruction must be x2, of course.)

> +**	mov	x3, 0
> +**	b	f16
> +*/
> +
> +// again gp different from the rest
> +
> +/*
> +** gp:
> +**	sub	sp, sp, #16
> +**	mov	(x[0-9]+), x0
> +**	mov	w0, w1
> +**	sbfx	x([0-9]+), \1, 0, 63
> +**	mov	w1, 0
> +**	bfi	w1, w\2, 0, 1

The use of w1 in the last two instructions should be captured
similarly to the align16.c test.

> diff --git a/gcc/testsuite/gcc.target/aarch64/bitint-alignments.c b/gcc/testsuite/gcc.target/aarch64/bitint-alignments.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..4de31fe7ebd933247911c48ace01ab520fe194a3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitint-alignments.c
> @@ -0,0 +1,58 @@
> +/* { dg-do run } */
> +/* { dg-options "-std=c23" } */
> +
> +static long unsigned int
> +calc_size (int n)

Would be more natural as calc_align(of).

> +{
> +  if (n > 64)
> +    return alignof(__int128_t);
> +  if (n > 32)
> +    return alignof(long long);
> +  if (n > 16)
> +    return alignof(int);
> +  if (n > 8)
> +    return alignof(short);
> +  else
> +    return alignof(char);
> +}
> +
> +#define CHECK_ALIGNMENT(N) \
> +  if (alignof(_BitInt(N)) != calc_size(N)) \
> +    __builtin_abort ();

I'm relying on Jakub's previous LGTM for the libgcc changes :)

OK with those changes.  Thanks a lot for doing this.

Richard


More information about the Gcc-patches mailing list