This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate


On Thu, Aug 06, 2015 at 05:28:34PM +0100, Alan Lawrence wrote:
> Alan Lawrence wrote:
>  > James Greenhalgh wrote:
>  >> Hi Alan,
>  >>
>  >> The arm_neon.h portion of this patch does not apply after Charles' recent
>  >> changes. Could you please rebase and resubmit the patch for review?
>  >>
>  >> Thanks,
>  >> James
>  >
>  > Ah, indeed, thanks. Here's a rebased version, using Charles' new versions of
>  > __(LD|ST)[234]_LANE_FUNC. I'll follow with a patch adding corresponding
>  > lane_f16_indices tests in a separate email.
>  >
>  > (Changelog as before)
>  >
>  > Bootstrapped + check-gcc on aarch64-none-linux-gnu.
> 
> 
> Here, in fact. gcc/ChangeLog:
> 
> 	* config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
> 	* config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
> 	(aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
> 	Add __builtin_aarch64_simd_hf.
> 	* config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
> 	float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
> 	vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
> 	vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
> 	vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
> 	vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
> 	vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
> 	vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
> 	vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.
> 
> 	* config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
> 	V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
> 	(VDC, Vdbl): Add V4HF.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
> 	* gcc.target/aarch64/vldN_dup_1.c: Likewise.
> 	* gcc.target/aarch64/vldN_lane_1.c: Likewise.
>          (main): update orig_data to avoid float16 NaN on bigendian.

OK, but clean up the stray newline....

   <arm_neon.h>

> @@ -15974,6 +16086,19 @@ vld4q_u64 (const uint64_t * __a)
>    return ret;
>  }
>  
> +__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
> +vld4q_f16 (const float16_t * __a)
> +{
> +  float16x8x4_t ret;
> +  __builtin_aarch64_simd_xi __o;
> +  __o = __builtin_aarch64_ld4v8hf (__a);
> +  ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
> +  ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
> +  ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
> +  ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
> +  return ret;
> +}
> +
>  __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
>  vld4q_f32 (const float32_t * __a)
>  {
> @@ -16035,6 +16160,18 @@ vld2_dup_s32 (const int32_t * __a)
>    return ret;
>  }
>  
> +

Here.

> +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
> +vld2_dup_f16 (const float16_t * __a)
> +{
> +  float16x4x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
> +  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
> +  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
> +  return ret;
> +}
> +
>  __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
>  vld2_dup_f32 (const float32_t * __a)
>  {

Thanks,
James


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]