This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate
- From: James Greenhalgh <james dot greenhalgh at arm dot com>
- To: Alan Lawrence <Alan dot Lawrence at arm dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Mon, 17 Aug 2015 14:19:53 +0100
- Subject: Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate
- Authentication-results: sourceware.org; auth=none
- References: <55B765DF dot 4040706 at arm dot com> <55B766C3 dot 4060601 at arm dot com> <20150729090841 dot GA25826 at arm dot com> <55C38B32 dot 4010506 at arm dot com>
On Thu, Aug 06, 2015 at 05:28:34PM +0100, Alan Lawrence wrote:
> Alan Lawrence wrote:
> > James Greenhalgh wrote:
> >> Hi Alan,
> >>
> >> The arm_neon.h portion of this patch does not apply after Charles' recent
> >> changes. Could you please rebase and resubmit the patch for review?
> >>
> >> Thanks,
> >> James
> >
> > Ah, indeed, thanks. Here's a rebased version, using Charles' new versions of
> > __(LD|ST)[234]_LANE_FUNC. I'll follow with a patch adding corresponding
> > lane_f16_indices tests in a separate email.
> >
> > (Changelog as before)
> >
> > Bootstrapped + check-gcc on aarch64-none-linux-gnu.
>
>
> Here, in fact. gcc/ChangeLog:
>
> * config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
> * config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
> (aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
> Add __builtin_aarch64_simd_hf.
> * config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
> float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
> vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
> vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
> vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
> vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
> vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
> vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
> vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.
>
> * config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
> V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
> (VDC, Vdbl): Add V4HF.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
> * gcc.target/aarch64/vldN_dup_1.c: Likewise.
> * gcc.target/aarch64/vldN_lane_1.c: Likewise.
> (main): update orig_data to avoid float16 NaN on bigendian.
OK, but clean up the stray newline....
<arm_neon.h>
> @@ -15974,6 +16086,19 @@ vld4q_u64 (const uint64_t * __a)
> return ret;
> }
>
> +__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
> +vld4q_f16 (const float16_t * __a)
> +{
> + float16x8x4_t ret;
> + __builtin_aarch64_simd_xi __o;
> + __o = __builtin_aarch64_ld4v8hf (__a);
> + ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
> + ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
> + ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
> + ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
> + return ret;
> +}
> +
> __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
> vld4q_f32 (const float32_t * __a)
> {
> @@ -16035,6 +16160,18 @@ vld2_dup_s32 (const int32_t * __a)
> return ret;
> }
>
> +
Here.
> +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
> +vld2_dup_f16 (const float16_t * __a)
> +{
> + float16x4x2_t ret;
> + __builtin_aarch64_simd_oi __o;
> + __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
> + ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
> + ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
> + return ret;
> +}
> +
> __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
> vld2_dup_f32 (const float32_t * __a)
> {
Thanks,
James