This patch supports following MVE ACLE intrinsics to get and set vector lane.
vsetq_lane_f16, vsetq_lane_f32, vsetq_lane_s16, vsetq_lane_s32, vsetq_lane_s8, vsetq_lane_s64, vsetq_lane_u8, vsetq_lane_u16, vsetq_lane_u32, vsetq_lane_u64, vgetq_lane_f16, vgetq_lane_f32, vgetq_lane_s16, vgetq_lane_s32, vgetq_lane_s8, vgetq_lane_s64, vgetq_lane_u8, vgetq_lane_u16, vgetq_lane_u32, vgetq_lane_u64.
Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details.
[1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics
2020-03-23 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
* config/arm/arm_mve.h (vsetq_lane_f16): Define macro.
(vsetq_lane_f32): Likewise.
(vsetq_lane_s16): Likewise.
(vsetq_lane_s32): Likewise.
(vsetq_lane_s8): Likewise.
(vsetq_lane_s64): Likewise.
(vsetq_lane_u8): Likewise.
(vsetq_lane_u16): Likewise.
(vsetq_lane_u32): Likewise.
(vsetq_lane_u64): Likewise.
(vgetq_lane_f16): Likewise.
(vgetq_lane_f32): Likewise.
(vgetq_lane_s16): Likewise.
(vgetq_lane_s32): Likewise.
(vgetq_lane_s8): Likewise.
(vgetq_lane_s64): Likewise.
(vgetq_lane_u8): Likewise.
(vgetq_lane_u16): Likewise.
(vgetq_lane_u32): Likewise.
(vgetq_lane_u64): Likewise.
(__ARM_NUM_LANES): Likewise.
(__ARM_LANEQ): Likewise.
(__ARM_CHECK_LANEQ): Likewise.
(__arm_vsetq_lane_s16): Define intrinsic.
(__arm_vsetq_lane_s32): Likewise.
(__arm_vsetq_lane_s8): Likewise.
(__arm_vsetq_lane_s64): Likewise.
(__arm_vsetq_lane_u8): Likewise.
(__arm_vsetq_lane_u16): Likewise.
(__arm_vsetq_lane_u32): Likewise.
(__arm_vsetq_lane_u64): Likewise.
(__arm_vgetq_lane_s16): Likewise.
(__arm_vgetq_lane_s32): Likewise.
(__arm_vgetq_lane_s8): Likewise.
(__arm_vgetq_lane_s64): Likewise.
(__arm_vgetq_lane_u8): Likewise.
(__arm_vgetq_lane_u16): Likewise.
(__arm_vgetq_lane_u32): Likewise.
(__arm_vgetq_lane_u64): Likewise.
(__arm_vsetq_lane_f16): Likewise.
(__arm_vsetq_lane_f32): Likewise.
(__arm_vgetq_lane_f16): Likewise.
(__arm_vgetq_lane_f32): Likewise.
(vgetq_lane): Define polymorphic variant.
(vsetq_lane): Likewise.
* config/arm/mve.md (mve_vec_extract<mode><V_elem_l>): Define RTL
pattern.
(mve_vec_extractv2didi): Likewise.
(mve_vec_extract_sext_internal<mode>): Likewise.
(mve_vec_extract_zext_internal<mode>): Likewise.
(mve_vec_set<mode>_internal): Likewise.
(mve_vec_setv2di_internal): Likewise.
* config/arm/neon.md (vec_set<mode>): Move RTL pattern to vec-common.md
file.
(vec_extract<mode><V_elem_l>): Rename to
"neon_vec_extract<mode><V_elem_l>".
(vec_extractv2didi): Rename to "neon_vec_extractv2didi".
* config/arm/vec-common.md (vec_extract<mode><V_elem_l>): Define RTL
pattern common for MVE and NEON.
(vec_set<mode>): Move RTL pattern from neon.md and modify to accept both
MVE and NEON.
gcc/testsuite/ChangeLog:
2020-03-23 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
* gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c: New test.
* gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_s64.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_u64.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_s64.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_u64.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsetq_lane_u8.c: Likewise.
+2020-03-23 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
+ Andre Vieira <andre.simoesdiasvieira@arm.com>
+ Mihail Ionescu <mihail.ionescu@arm.com>
+
+ * config/arm/arm_mve.h (vsetq_lane_f16): Define macro.
+ (vsetq_lane_f32): Likewise.
+ (vsetq_lane_s16): Likewise.
+ (vsetq_lane_s32): Likewise.
+ (vsetq_lane_s8): Likewise.
+ (vsetq_lane_s64): Likewise.
+ (vsetq_lane_u8): Likewise.
+ (vsetq_lane_u16): Likewise.
+ (vsetq_lane_u32): Likewise.
+ (vsetq_lane_u64): Likewise.
+ (vgetq_lane_f16): Likewise.
+ (vgetq_lane_f32): Likewise.
+ (vgetq_lane_s16): Likewise.
+ (vgetq_lane_s32): Likewise.
+ (vgetq_lane_s8): Likewise.
+ (vgetq_lane_s64): Likewise.
+ (vgetq_lane_u8): Likewise.
+ (vgetq_lane_u16): Likewise.
+ (vgetq_lane_u32): Likewise.
+ (vgetq_lane_u64): Likewise.
+ (__ARM_NUM_LANES): Likewise.
+ (__ARM_LANEQ): Likewise.
+ (__ARM_CHECK_LANEQ): Likewise.
+ (__arm_vsetq_lane_s16): Define intrinsic.
+ (__arm_vsetq_lane_s32): Likewise.
+ (__arm_vsetq_lane_s8): Likewise.
+ (__arm_vsetq_lane_s64): Likewise.
+ (__arm_vsetq_lane_u8): Likewise.
+ (__arm_vsetq_lane_u16): Likewise.
+ (__arm_vsetq_lane_u32): Likewise.
+ (__arm_vsetq_lane_u64): Likewise.
+ (__arm_vgetq_lane_s16): Likewise.
+ (__arm_vgetq_lane_s32): Likewise.
+ (__arm_vgetq_lane_s8): Likewise.
+ (__arm_vgetq_lane_s64): Likewise.
+ (__arm_vgetq_lane_u8): Likewise.
+ (__arm_vgetq_lane_u16): Likewise.
+ (__arm_vgetq_lane_u32): Likewise.
+ (__arm_vgetq_lane_u64): Likewise.
+ (__arm_vsetq_lane_f16): Likewise.
+ (__arm_vsetq_lane_f32): Likewise.
+ (__arm_vgetq_lane_f16): Likewise.
+ (__arm_vgetq_lane_f32): Likewise.
+ (vgetq_lane): Define polymorphic variant.
+ (vsetq_lane): Likewise.
+ * config/arm/mve.md (mve_vec_extract<mode><V_elem_l>): Define RTL
+ pattern.
+ (mve_vec_extractv2didi): Likewise.
+ (mve_vec_extract_sext_internal<mode>): Likewise.
+ (mve_vec_extract_zext_internal<mode>): Likewise.
+ (mve_vec_set<mode>_internal): Likewise.
+ (mve_vec_setv2di_internal): Likewise.
+ * config/arm/neon.md (vec_set<mode>): Move RTL pattern to vec-common.md
+ file.
+ (vec_extract<mode><V_elem_l>): Rename to
+ "neon_vec_extract<mode><V_elem_l>".
+ (vec_extractv2didi): Rename to "neon_vec_extractv2didi".
+ * config/arm/vec-common.md (vec_extract<mode><V_elem_l>): Define RTL
+ pattern common for MVE and NEON.
+ (vec_set<mode>): Move RTL pattern from neon.md and modify to accept both
+ MVE and NEON.
+
2020-03-23 Andre Vieira <andre.simoesdiasvieira@arm.com>
* config/arm/mve.md (earlyclobber_32): New mode attribute.
#define vld1q_z_f32(__base, __p) __arm_vld1q_z_f32(__base, __p)
#define vst2q_f32(__addr, __value) __arm_vst2q_f32(__addr, __value)
#define vst1q_p_f32(__addr, __value, __p) __arm_vst1q_p_f32(__addr, __value, __p)
+#define vsetq_lane_f16(__a, __b, __idx) __arm_vsetq_lane_f16(__a, __b, __idx)
+#define vsetq_lane_f32(__a, __b, __idx) __arm_vsetq_lane_f32(__a, __b, __idx)
+#define vsetq_lane_s16(__a, __b, __idx) __arm_vsetq_lane_s16(__a, __b, __idx)
+#define vsetq_lane_s32(__a, __b, __idx) __arm_vsetq_lane_s32(__a, __b, __idx)
+#define vsetq_lane_s8(__a, __b, __idx) __arm_vsetq_lane_s8(__a, __b, __idx)
+#define vsetq_lane_s64(__a, __b, __idx) __arm_vsetq_lane_s64(__a, __b, __idx)
+#define vsetq_lane_u8(__a, __b, __idx) __arm_vsetq_lane_u8(__a, __b, __idx)
+#define vsetq_lane_u16(__a, __b, __idx) __arm_vsetq_lane_u16(__a, __b, __idx)
+#define vsetq_lane_u32(__a, __b, __idx) __arm_vsetq_lane_u32(__a, __b, __idx)
+#define vsetq_lane_u64(__a, __b, __idx) __arm_vsetq_lane_u64(__a, __b, __idx)
+#define vgetq_lane_f16(__a, __idx) __arm_vgetq_lane_f16(__a, __idx)
+#define vgetq_lane_f32(__a, __idx) __arm_vgetq_lane_f32(__a, __idx)
+#define vgetq_lane_s16(__a, __idx) __arm_vgetq_lane_s16(__a, __idx)
+#define vgetq_lane_s32(__a, __idx) __arm_vgetq_lane_s32(__a, __idx)
+#define vgetq_lane_s8(__a, __idx) __arm_vgetq_lane_s8(__a, __idx)
+#define vgetq_lane_s64(__a, __idx) __arm_vgetq_lane_s64(__a, __idx)
+#define vgetq_lane_u8(__a, __idx) __arm_vgetq_lane_u8(__a, __idx)
+#define vgetq_lane_u16(__a, __idx) __arm_vgetq_lane_u16(__a, __idx)
+#define vgetq_lane_u32(__a, __idx) __arm_vgetq_lane_u32(__a, __idx)
+#define vgetq_lane_u64(__a, __idx) __arm_vgetq_lane_u64(__a, __idx)
#endif
+/* For big-endian, GCC's vector indices are reversed within each 64 bits
+ compared to the architectural lane indices used by MVE intrinsics. */
+#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
+#ifdef __ARM_BIG_ENDIAN
+#define __ARM_LANEQ(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1))
+#else
+#define __ARM_LANEQ(__vec, __idx) __idx
+#endif
+#define __ARM_CHECK_LANEQ(__vec, __idx) \
+ __builtin_arm_lane_check (__ARM_NUM_LANES(__vec), \
+ __ARM_LANEQ(__vec, __idx))
+
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vst4q_s8 (int8_t * __addr, int8x16x4_t __value)
return __rv.__i;
}
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline int16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_s16 (int16x8_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_s32 (int32x4_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
+__extension__ extern __inline int8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_s8 (int8x16_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_s64 (int64x2_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
+__extension__ extern __inline uint8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_u8 (uint8x16_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_u16 (uint16x8_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_u32 (uint32x4_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_u64 (uint64x2_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */
__extension__ extern __inline void
return vstrwq_p_f32 (__addr, __value, __p);
}
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_f16 (float16_t __a, float16x8_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__b, __idx);
+ __b[__ARM_LANEQ(__b,__idx)] = __a;
+ return __b;
+}
+
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_f16 (float16x8_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
+
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vgetq_lane_f32 (float32x4_t __a, const int __idx)
+{
+ __ARM_CHECK_LANEQ (__a, __idx);
+ return __a[__ARM_LANEQ(__a,__idx)];
+}
#endif
enum {
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot90_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot90_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+#define vgetq_lane(p0,p1) __arm_vgetq_lane(p0,p1)
+#define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
+ int (*)[__ARM_mve_type_int8x16_t]: __arm_vgetq_lane_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
+ int (*)[__ARM_mve_type_int16x8_t]: __arm_vgetq_lane_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
+ int (*)[__ARM_mve_type_int32x4_t]: __arm_vgetq_lane_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
+ int (*)[__ARM_mve_type_int64x2_t]: __arm_vgetq_lane_s64 (__ARM_mve_coerce(__p0, int64x2_t), p1), \
+ int (*)[__ARM_mve_type_uint8x16_t]: __arm_vgetq_lane_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
+ int (*)[__ARM_mve_type_uint16x8_t]: __arm_vgetq_lane_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
+ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vgetq_lane_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1), \
+ int (*)[__ARM_mve_type_uint64x2_t]: __arm_vgetq_lane_u64 (__ARM_mve_coerce(__p0, uint64x2_t), p1), \
+ int (*)[__ARM_mve_type_float16x8_t]: __arm_vgetq_lane_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \
+ int (*)[__ARM_mve_type_float32x4_t]: __arm_vgetq_lane_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));})
+
+#define vsetq_lane(p0,p1,p2) __arm_vsetq_lane(p0,p1,p2)
+#define __arm_vsetq_lane(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
+ int (*)[__ARM_mve_type_int8_t][__ARM_mve_type_int8x16_t]: __arm_vsetq_lane_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int16_t][__ARM_mve_type_int16x8_t]: __arm_vsetq_lane_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int32x4_t]: __arm_vsetq_lane_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
+ int (*)[__ARM_mve_type_int64_t][__ARM_mve_type_int64x2_t]: __arm_vsetq_lane_s64 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int64x2_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t][__ARM_mve_type_uint8x16_t]: __arm_vsetq_lane_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_uint16_t][__ARM_mve_type_uint16x8_t]: __arm_vsetq_lane_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_uint32_t][__ARM_mve_type_uint32x4_t]: __arm_vsetq_lane_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
+ int (*)[__ARM_mve_type_uint64_t][__ARM_mve_type_uint64x2_t]: __arm_vsetq_lane_u64 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint64x2_t), p2), \
+ int (*)[__ARM_mve_type_float16_t][__ARM_mve_type_float16x8_t]: __arm_vsetq_lane_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+ int (*)[__ARM_mve_type_float32_t][__ARM_mve_type_float32x4_t]: __arm_vsetq_lane_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+
#else /* MVE Integer. */
#define vstrwq_scatter_base_wb(p0,p1,p2) __arm_vstrwq_scatter_base_wb(p0,p1,p2)
int (*)[__ARM_mve_type_uint16_t_const_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(__p0, uint16_t const *)), \
int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(__p0, uint32_t const *)));})
+#define vgetq_lane(p0,p1) __arm_vgetq_lane(p0,p1)
+#define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
+ int (*)[__ARM_mve_type_int8x16_t]: __arm_vgetq_lane_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
+ int (*)[__ARM_mve_type_int16x8_t]: __arm_vgetq_lane_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
+ int (*)[__ARM_mve_type_int32x4_t]: __arm_vgetq_lane_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
+ int (*)[__ARM_mve_type_int64x2_t]: __arm_vgetq_lane_s64 (__ARM_mve_coerce(__p0, int64x2_t), p1), \
+ int (*)[__ARM_mve_type_uint8x16_t]: __arm_vgetq_lane_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
+ int (*)[__ARM_mve_type_uint16x8_t]: __arm_vgetq_lane_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
+ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vgetq_lane_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1), \
+ int (*)[__ARM_mve_type_uint64x2_t]: __arm_vgetq_lane_u64 (__ARM_mve_coerce(__p0, uint64x2_t), p1));})
+
+#define vsetq_lane(p0,p1,p2) __arm_vsetq_lane(p0,p1,p2)
+#define __arm_vsetq_lane(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
+ int (*)[__ARM_mve_type_int8_t][__ARM_mve_type_int8x16_t]: __arm_vsetq_lane_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int16_t][__ARM_mve_type_int16x8_t]: __arm_vsetq_lane_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int32x4_t]: __arm_vsetq_lane_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
+ int (*)[__ARM_mve_type_int64_t][__ARM_mve_type_int64x2_t]: __arm_vsetq_lane_s64 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int64x2_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t][__ARM_mve_type_uint8x16_t]: __arm_vsetq_lane_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_uint16_t][__ARM_mve_type_uint16x8_t]: __arm_vsetq_lane_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_uint32_t][__ARM_mve_type_uint32x4_t]: __arm_vsetq_lane_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
+ int (*)[__ARM_mve_type_uint64_t][__ARM_mve_type_uint64x2_t]: __arm_vsetq_lane_u64 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint64x2_t), p2));})
+
#endif /* MVE Integer. */
#define vmvnq_x(p1,p2) __arm_vmvnq_x(p1,p2)
;; Quad-width vector modes plus 64-bit elements.
(define_mode_iterator VQX [V16QI V8HI V8HF V8BF V4SI V4SF V2DI])
+;; Quad-width vector modes plus 64-bit elements.
+(define_mode_iterator VQX_NOBF [V16QI V8HI V8HF V4SI V4SF V2DI])
+
;; Quad-width vector modes plus 64-bit elements and V8BF.
(define_mode_iterator VQXBF [V16QI V8HI V8HF (V8BF "TARGET_BF16_SIMD") V4SI V4SF V2DI])
(define_mode_attr MVE_H_ELEM [ (V8HI "V8HI") (V4SI "V4HI")])
(define_mode_attr V_sz_elem1 [(V16QI "b") (V8HI "h") (V4SI "w") (V8HF "h")
(V4SF "w")])
+(define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32")
+ (V8HF "u16") (V4SF "32")])
+
(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w")
(V8HF "=w") (V4SF "=&w")])
return "";
}
[(set_attr "length" "16")])
+;;
+;; [vgetq_lane_u, vgetq_lane_s, vgetq_lane_f])
+;;
+(define_insn "mve_vec_extract<mode><V_elem_l>"
+ [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
+ (vec_select:<V_elem>
+ (match_operand:MVE_VLD_ST 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+ "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+ || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+{
+ if (BYTES_BIG_ENDIAN)
+ {
+ int elt = INTVAL (operands[2]);
+ elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+ operands[2] = GEN_INT (elt);
+ }
+ return "vmov.<V_extr_elem>\t%0, %q1[%c2]";
+}
+ [(set_attr "type" "mve_move")])
+
+(define_insn "mve_vec_extractv2didi"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (vec_select:DI
+ (match_operand:V2DI 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+ "TARGET_HAVE_MVE"
+{
+ int elt = INTVAL (operands[2]);
+ if (BYTES_BIG_ENDIAN)
+ elt = 1 - elt;
+
+ if (elt == 0)
+ return "vmov\t%Q0, %R0, %e1";
+ else
+ return "vmov\t%J0, %K0, %f1";
+}
+ [(set_attr "type" "mve_move")])
+
+(define_insn "*mve_vec_extract_sext_internal<mode>"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (sign_extend:SI
+ (vec_select:<V_elem>
+ (match_operand:MVE_2 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+ || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+{
+ if (BYTES_BIG_ENDIAN)
+ {
+ int elt = INTVAL (operands[2]);
+ elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+ operands[2] = GEN_INT (elt);
+ }
+ return "vmov.s<V_sz_elem>\t%0, %q1[%c2]";
+}
+ [(set_attr "type" "mve_move")])
+
+(define_insn "*mve_vec_extract_zext_internal<mode>"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:<V_elem>
+ (match_operand:MVE_2 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+ || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+{
+ if (BYTES_BIG_ENDIAN)
+ {
+ int elt = INTVAL (operands[2]);
+ elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+ operands[2] = GEN_INT (elt);
+ }
+ return "vmov.u<V_sz_elem>\t%0, %q1[%c2]";
+}
+ [(set_attr "type" "mve_move")])
+
+;;
+;; [vsetq_lane_u, vsetq_lane_s, vsetq_lane_f])
+;;
+(define_insn "mve_vec_set<mode>_internal"
+ [(set (match_operand:VQ2 0 "s_register_operand" "=w")
+ (vec_merge:VQ2
+ (vec_duplicate:VQ2
+ (match_operand:<V_elem> 1 "nonimmediate_operand" "r"))
+ (match_operand:VQ2 3 "s_register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+ || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+{
+ int elt = ffs ((int) INTVAL (operands[2])) - 1;
+ if (BYTES_BIG_ENDIAN)
+ elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+ operands[2] = GEN_INT (elt);
+
+ return "vmov.<V_sz_elem>\t%q0[%c2], %1";
+}
+ [(set_attr "type" "mve_move")])
+
+(define_insn "mve_vec_setv2di_internal"
+ [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+ (vec_merge:V2DI
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" "r"))
+ (match_operand:V2DI 3 "s_register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "TARGET_HAVE_MVE"
+{
+ int elt = ffs ((int) INTVAL (operands[2])) - 1;
+ if (BYTES_BIG_ENDIAN)
+ elt = 1 - elt;
+
+ if (elt == 0)
+ return "vmov\t%e0, %Q1, %R1";
+ else
+ return "vmov\t%f0, %J1, %K1";
+}
+ [(set_attr "type" "mve_move")])
[(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
)
-(define_expand "vec_set<mode>"
- [(match_operand:VDQ 0 "s_register_operand")
- (match_operand:<V_elem> 1 "s_register_operand")
- (match_operand:SI 2 "immediate_operand")]
- "TARGET_NEON"
-{
- HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
- emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
- GEN_INT (elem), operands[0]));
- DONE;
-})
-
(define_insn "vec_extract<mode><V_elem_l>"
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
(vec_select:<V_elem>
[(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
)
-(define_insn "vec_extract<mode><V_elem_l>"
+;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to
+;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called
+;; by define_expand in vec-common.md file.
+(define_insn "neon_vec_extract<mode><V_elem_l>"
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
(vec_select:<V_elem>
(match_operand:VQ2 1 "s_register_operand" "w,w")
[(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
)
-(define_insn "vec_extractv2didi"
+;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi"
+;; and this pattern is called by define_expand in vec-common.md file.
+(define_insn "neon_vec_extractv2didi"
[(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
(vec_select:DI
(match_operand:V2DI 1 "s_register_operand" "w,w")
arm_expand_vec_perm (operands[0], operands[1], operands[2], operands[3]);
DONE;
})
+
+(define_expand "vec_extract<mode><V_elem_l>"
+ [(match_operand:<V_elem> 0 "nonimmediate_operand")
+ (match_operand:VQX_NOBF 1 "s_register_operand")
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_NEON || TARGET_HAVE_MVE"
+{
+ if (TARGET_NEON)
+ emit_insn (gen_neon_vec_extract<mode><V_elem_l> (operands[0], operands[1],
+ operands[2]));
+ else if (TARGET_HAVE_MVE)
+ emit_insn (gen_mve_vec_extract<mode><V_elem_l> (operands[0], operands[1],
+ operands[2]));
+ else
+ gcc_unreachable ();
+ DONE;
+})
+
+(define_expand "vec_set<mode>"
+ [(match_operand:VQX_NOBF 0 "s_register_operand" "")
+ (match_operand:<V_elem> 1 "s_register_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")]
+ "TARGET_NEON || TARGET_HAVE_MVE"
+{
+ HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
+ if (TARGET_NEON)
+ emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
+ GEN_INT (elem), operands[0]));
+ else
+ emit_insn (gen_mve_vec_set<mode>_internal (operands[0], operands[1],
+ GEN_INT (elem), operands[0]));
+ DONE;
+})
+2020-03-23 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
+ Andre Vieira <andre.simoesdiasvieira@arm.com>
+ Mihail Ionescu <mihail.ionescu@arm.com>
+
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c: New test.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_s64.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_u64.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_s16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_s32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_s64.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_s8.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_u16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_u32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_u64.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsetq_lane_u8.c: Likewise.
+
2020-03-23 Andrea Corallo <andrea.corallo@arm.com>
* jit.dg/all-non-failing-tests.h: Add test-long-string-literal.c.
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16_t
+foo (float16x8_t a)
+{
+ return vgetq_lane_f16 (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.u16" } } */
+
+float16_t
+foo1 (float16x8_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.u16" } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32_t
+foo (float32x4_t a)
+{
+ return vgetq_lane_f32 (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
+
+float32_t
+foo1 (float32x4_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int16_t
+foo (int16x8_t a)
+{
+ return vgetq_lane_s16 (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.s16" } } */
+
+int16_t
+foo1 (int16x8_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.s16" } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int32_t
+foo (int32x4_t a)
+{
+ return vgetq_lane_s32 (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
+
+int32_t
+foo1 (int32x4_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int64_t
+foo (int64x2_t a)
+{
+ return vgetq_lane_s64 (a, 0);
+}
+
+/* { dg-final { scan-assembler {vmov\tr0, r1, d0} } } */
+
+int64_t
+foo1 (int64x2_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler {vmov\tr0, r1, d0} } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int8_t
+foo (int8x16_t a)
+{
+ return vgetq_lane_s8 (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.s8" } } */
+
+int8_t
+foo1 (int8x16_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.s8" } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint16_t
+foo (uint16x8_t a)
+{
+ return vgetq_lane_u16 (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.u16" } } */
+
+uint16_t
+foo1 (uint16x8_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.u16" } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint32_t
+foo (uint32x4_t a)
+{
+ return vgetq_lane_u32 (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
+
+uint32_t
+foo1 (uint32x4_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint64_t
+foo (uint64x2_t a)
+{
+ return vgetq_lane_u64 (a, 0);
+}
+
+/* { dg-final { scan-assembler {vmov\tr0, r1, d0} } } */
+
+uint64_t
+foo1 (uint64x2_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler {vmov\tr0, r1, d0} } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint8_t
+foo (uint8x16_t a)
+{
+ return vgetq_lane_u8 (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.u8" } } */
+
+uint8_t
+foo1 (uint8x16_t a)
+{
+ return vgetq_lane (a, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.u8" } } */
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16_t a, float16x8_t b)
+{
+ return vsetq_lane_f16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.16" } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32_t a, float32x4_t b)
+{
+ return vsetq_lane_f32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (int16_t a, int16x8_t b)
+{
+ return vsetq_lane_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.16" } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo (int32_t a, int32x4_t b)
+{
+ return vsetq_lane_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int64x2_t
+foo (int64_t a, int64x2_t b)
+{
+ return vsetq_lane_s64 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler {vmov\td0, r[1-9]*[0-9], r[1-9]*[0-9]} } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int8x16_t
+foo (int8_t a, int8x16_t b)
+{
+ return vsetq_lane_s8 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.8" } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint16x8_t
+foo (uint16_t a, uint16x8_t b)
+{
+ return vsetq_lane_u16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.16" } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint32x4_t
+foo (uint32_t a, uint32x4_t b)
+{
+ return vsetq_lane_u32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.32" } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint64x2_t
+foo (uint64_t a, uint64x2_t b)
+{
+ return vsetq_lane_u64 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler {vmov\td0, r[1-9]*[0-9], r[1-9]*[0-9]} } } */
+
--- /dev/null
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint8x16_t
+foo (uint8_t a, uint8x16_t b)
+{
+ return vsetq_lane_u8 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler "vmov.8" } } */
+