This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH AArch64 2/2] Replace temporary inline assembler for vget_high
- From: Alan Lawrence <alan dot lawrence at arm dot com>
- To: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 12 Aug 2014 11:12:11 +0100
- Subject: [PATCH AArch64 2/2] Replace temporary inline assembler for vget_high
- Authentication-results: sourceware.org; auth=none
- References: <53E9E499 dot 4050806 at arm dot com>
This patch replaces the current inline assembler for the vget_high intrinsics in
arm_neon.h with a sequence of other calls, in a similar fashion to vget_low.
Unlike the assembler, these are all transparent to the front-end, so should
enable better optimization through the mid-end.
Tested check-gcc and check-g++ and aarch64-none-elf and aarch64_be-none-elf
(including new tests in previous patch!).
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 45f545779292bbd3ceb1b9e13c980988e52bc3ec..706e07c8cda5d1513a6ea9817f6054cfb2258d48 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -4322,6 +4322,85 @@ vget_low_u64 (uint64x2_t __a)
#undef __GET_LOW
+#define __GET_HIGH(__TYPE) \
+ uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
+ uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
+ return vreinterpret_##__TYPE##_u64 (hi);
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_high_f32 (float32x4_t __a)
+{
+ __GET_HIGH (f32);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vget_high_f64 (float64x2_t __a)
+{
+ __GET_HIGH (f64);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_high_p8 (poly8x16_t __a)
+{
+ __GET_HIGH (p8);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_high_p16 (poly16x8_t __a)
+{
+ __GET_HIGH (p16);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_high_s8 (int8x16_t __a)
+{
+ __GET_HIGH (s8);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_high_s16 (int16x8_t __a)
+{
+ __GET_HIGH (s16);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_high_s32 (int32x4_t __a)
+{
+ __GET_HIGH (s32);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_high_s64 (int64x2_t __a)
+{
+ __GET_HIGH (s64);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_high_u8 (uint8x16_t __a)
+{
+ __GET_HIGH (u8);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_high_u16 (uint16x8_t __a)
+{
+ __GET_HIGH (u16);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_high_u32 (uint32x4_t __a)
+{
+ __GET_HIGH (u32);
+}
+
+#undef __GET_HIGH
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_high_u64 (uint64x2_t __a)
+{
+ return vcreate_u64 (vgetq_lane_u64 (__a, 1));
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcombine_s8 (int8x8_t __a, int8x8_t __b)
{
@@ -5764,138 +5843,6 @@ vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vget_high_f32 (float32x4_t a)
-{
- float32x2_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-vget_high_f64 (float64x2_t a)
-{
- float64x1_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vget_high_p8 (poly8x16_t a)
-{
- poly8x8_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vget_high_p16 (poly16x8_t a)
-{
- poly16x4_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vget_high_s8 (int8x16_t a)
-{
- int8x8_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vget_high_s16 (int16x8_t a)
-{
- int16x4_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vget_high_s32 (int32x4_t a)
-{
- int32x2_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vget_high_s64 (int64x2_t a)
-{
- int64x1_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vget_high_u8 (uint8x16_t a)
-{
- uint8x8_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vget_high_u16 (uint16x8_t a)
-{
- uint16x4_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vget_high_u32 (uint32x4_t a)
-{
- uint32x2_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vget_high_u64 (uint64x2_t a)
-{
- uint64x1_t result;
- __asm__ ("ins %0.d[0], %1.d[1]"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vhsub_s8 (int8x8_t a, int8x8_t b)
{