[PATCH]AArch64 Fix the AAPCs for new partial and full SIMD structure types [PR103094]
Richard Sandiford
richard.sandiford@arm.com
Wed Dec 15 12:23:13 GMT 2021
Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Tamar Christina <tamar.christina@arm.com> writes:
>> Hi All,
>>
>> The new partial and full vector types added to AArch64, e.g.
>>
>> int8x8x2_t with mode V2x8QI are incorrectly being defined as being short
>> vectors and not being composite types.
>>
>> This causes the layout code to incorrectly conclude that the registers are
>> packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.
>>
>> Because of this the code under !aarch64_composite_type_p is unreachable but also
>> lacked any extra checks to see that nregs is what we expected it to be.
>>
>> I have also updated aarch64_advsimd_full_struct_mode_p and
>> aarch64_advsimd_partial_struct_mode_p to only consider vector types as struct
>> modes. Otherwise types such as OImode and friends would qualify leading to
>> incorrect results.
>
> How easy would it be to fix the bug without doing this last bit?
> The idea was that OI, CI and XI should continue to be structure
> modes until we remove them. aarch64_advsimd_partial_struct_mode_p
> and aarch64_advsimd_full_struct_mode_p are meant to be convenience
> wrappers and so they shouldn't make different decisions from the
> underlying aarch64_classify_vector_mode.
>
>>
>> This patch fixes up the issues and we now generate correct code.
>>
>> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>>
>> Ok for master?
>>
>> Thanks,
>> Tamar
>>
>>
>>
>> gcc/ChangeLog:
>>
>> PR target/103094
>> * config/aarch64/aarch64.c (aarch64_function_value, aarch64_layout_arg):
>> Fix unreachable code for partial vectors and re-order switch to perform
>> the simplest test first.
>> (aarch64_short_vector_p): Mark as not short vectors.
>> (aarch64_composite_type_p): Mark as composite types.
>> (aarch64_advsimd_partial_struct_mode_p,
>> aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.
>>
>> gcc/testsuite/ChangeLog:
>>
>> PR target/103094
>> * gcc.target/aarch64/pr103094.c: New test.
>>
>> --- inline copy of patch --
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p (machine_mode mode)
>> static bool
>> aarch64_advsimd_partial_struct_mode_p (machine_mode mode)
>> {
>> - return (aarch64_classify_vector_mode (mode)
>> - == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>> + return VECTOR_MODE_P (mode)
>> + && (aarch64_classify_vector_mode (mode)
>> + == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>> }
>>
>> /* Return true if MODE is an Advanced SIMD Q-register structure mode. */
>> static bool
>> aarch64_advsimd_full_struct_mode_p (machine_mode mode)
>> {
>> - return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
>> + return VECTOR_MODE_P (mode)
>> + && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
>> }
>>
>> /* Return true if MODE is any of the data vector modes, including
>> @@ -6468,17 +6470,21 @@ aarch64_function_value (const_tree type, const_tree func,
>> NULL, false))
>> {
>> gcc_assert (!sve_p);
>> - if (!aarch64_composite_type_p (type, mode))
>> + if (aarch64_advsimd_full_struct_mode_p (mode))
>> + {
>> + gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count));
>> + return gen_rtx_REG (mode, V0_REGNUM);
>> + }
>> + else if (aarch64_advsimd_partial_struct_mode_p (mode))
>> + {
>> + gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count));
>> + return gen_rtx_REG (mode, V0_REGNUM);
>> + }
>> + else if (!aarch64_composite_type_p (type, mode))
>> {
>> gcc_assert (count == 1 && mode == ag_mode);
>> return gen_rtx_REG (mode, V0_REGNUM);
>> }
>> - else if (aarch64_advsimd_full_struct_mode_p (mode)
>> - && known_eq (GET_MODE_SIZE (ag_mode), 16))
>> - return gen_rtx_REG (mode, V0_REGNUM);
>> - else if (aarch64_advsimd_partial_struct_mode_p (mode)
>> - && known_eq (GET_MODE_SIZE (ag_mode), 8))
>> - return gen_rtx_REG (mode, V0_REGNUM);
>> else
>> {
>> int i;
>> @@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
>> /* No frontends can create types with variable-sized modes, so we
>> shouldn't be asked to pass or return them. */
>> size = GET_MODE_SIZE (mode).to_constant ();
>> +
>> size = ROUND_UP (size, UNITS_PER_WORD);
>>
>> allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
>> @@ -6769,17 +6776,21 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
>> if (nvrn + nregs <= NUM_FP_ARG_REGS)
>> {
>> pcum->aapcs_nextnvrn = nvrn + nregs;
>> - if (!aarch64_composite_type_p (type, mode))
>> + if (aarch64_advsimd_full_struct_mode_p (mode))
>> + {
>> + gcc_assert (nregs == size / 16);
>> + pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> + }
>> + else if (aarch64_advsimd_partial_struct_mode_p (mode))
>> + {
>> + gcc_assert (nregs == size / 8);
>> + pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> + }
>> + else if (!aarch64_composite_type_p (type, mode))
>> {
>> gcc_assert (nregs == 1);
>> pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> }
>> - else if (aarch64_advsimd_full_struct_mode_p (mode)
>> - && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16))
>> - pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> - else if (aarch64_advsimd_partial_struct_mode_p (mode)
>> - && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8))
>> - pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> else
>> {
>> rtx par;
>> @@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
>> else
>> size = GET_MODE_SIZE (mode);
>> }
>> +
>> + /* If a Advanced SIMD partial or full aggregate vector type we aren't a short
>> + type. */
>> + if (aarch64_advsimd_partial_struct_mode_p (mode)
>> + || aarch64_advsimd_full_struct_mode_p (mode))
>> + return false;
>> +
>> if (known_eq (size, 8) || known_eq (size, 16))
>> {
>> /* 64-bit and 128-bit vectors should only acquire an SVE mode if
>
> I think the bug here is that we trust the mode even if we're
> given a conflicting type. In principle it would be OK to use,
> say, V4SI for an array of 4 ints, but that shouldn't suddenly
> make aarch64_short_vector_p true.
>
> Unfortunately that ship has sailed, so we e.g. treat:
>
> struct wrapper { int32x4_t x; int :0; };
>
> as a short vector too.
>
> So it feels like this a case of limiting the contagion and
> that the check should go in here:
>
> else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
> {
> /* Rely only on the type, not the mode, when processing SVE types. */
> if (type && aarch64_some_values_include_pst_objects_p (type))
> /* Leave later code to report an error if SVE is disabled. */
> gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
> else
> size = GET_MODE_SIZE (mode);
> }
>
> where we needed similar protection for SVE. E.g. we could change the
> inner else to:
>
> else if (!aarch64_advsimd_struct_mode_p (mode))
>
> or keep it is an early-out (but within the outer “else if”)
> if that seems clearer.
Following some off-line discussion, I've committed the following
combined patch after testing on aarch64-linux-gnu.
Thanks,
Richard
In this PR we were wrongly classifying a pair of 8-byte vectors
as a 16-byte “short vector” (in the AAPCS64 sense). As the
comment in the patch says, this stems from an old condition
in aarch64_short_vector_p that is too loose, but that would
be difficult to tighten now.
We can still do the right thing for the newly-added modes though,
since there are no backwards compatibility concerns there.
Co-authored-by: Tamar Christina <tamar.christina@arm.com>
gcc/
PR target/103094
* config/aarch64/aarch64.c (aarch64_short_vector_p): Return false
for structure modes, rather than ignoring the type in that case.
gcc/testsuite/
PR target/103094
* gcc.target/aarch64/pr103094.c: New test.
---
gcc/config/aarch64/aarch64.c | 19 ++++++++++++++++--
gcc/testsuite/gcc.target/aarch64/pr103094.c | 22 +++++++++++++++++++++
2 files changed, 39 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr103094.c
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f07330cff4f..ff4a808629b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -19299,7 +19299,21 @@ aarch64_short_vector_p (const_tree type,
else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
{
- /* Rely only on the type, not the mode, when processing SVE types. */
+ /* The containing "else if" is too loose: it means that we look at TYPE
+ if the type is a vector type (good), but that we otherwise ignore TYPE
+ and look only at the mode. This is wrong because the type describes
+ the language-level information whereas the mode is purely an internal
+ GCC concept. We can therefore reach here for types that are not
+ vectors in the AAPCS64 sense.
+
+ We can't "fix" that for the traditional Advanced SIMD vector modes
+ without breaking backwards compatibility. However, there's no such
+ baggage for the structure modes, which were introduced in GCC 12. */
+ if (aarch64_advsimd_struct_mode_p (mode))
+ return false;
+
+ /* For similar reasons, rely only on the type, not the mode, when
+ processing SVE types. */
if (type && aarch64_some_values_include_pst_objects_p (type))
/* Leave later code to report an error if SVE is disabled. */
gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
@@ -19310,7 +19324,8 @@ aarch64_short_vector_p (const_tree type,
{
/* 64-bit and 128-bit vectors should only acquire an SVE mode if
they are being treated as scalable AAPCS64 types. */
- gcc_assert (!aarch64_sve_mode_p (mode));
+ gcc_assert (!aarch64_sve_mode_p (mode)
+ && !aarch64_advsimd_struct_mode_p (mode));
return true;
}
return false;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c b/gcc/testsuite/gcc.target/aarch64/pr103094.c
new file mode 100644
index 00000000000..beda99dc1f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-rtl-expand -w" } */
+
+#include <arm_neon.h>
+
+void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t*
+outptr0) {
+ uint16x4x4_t cols_01_23_45_67 = { {
+ vreinterpret_u16_u8(cols_01_23.val[0]),
+ vreinterpret_u16_u8(cols_01_23.val[1]),
+ vreinterpret_u16_u8(cols_45_67.val[0]),
+ vreinterpret_u16_u8(cols_45_67.val[1])
+ } };
+
+ vst4_lane_u16(outptr0, cols_01_23_45_67, 0); }
+
+/* Check that we expand to v0 and v2 from the function arguments. */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23
+\]\)} expand } } */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67
+\]\)} expand } } */
+
--
2.25.1
More information about the Gcc-patches
mailing list