This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[AArch64] Fix size of memory store for the vst<n>_lane intrinsics


Hi,

The vst<n>_lane_<lane_type> intrinsics should write
(sizeof (lane_type) * n) bytes to memory.

In their current form, their asm constraints suggest a write size of
(sizeof (vector_type) * n). This is anywhere from 1 to 16 times too
much data, can cause huge headaches with dead store elimination.

This patch better models how much data we will be writing, which in
turn lets us eliminate the memory clobber. Together, we avoid the
problems with dead store elimination.

Tested with aarch64.exp and checked the C++ neon mangling test which
often breaks when you do these ugly casts.

OK?

Thanks,
James

---
gcc/

2013-10-29  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/arm_neon.h
	(__ST2_LANE_FUNC): Better model data size.
	(__ST3_LANE_FUNC): Likewise.
	(__ST4_LANE_FUNC): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 787ff15..7a63ea1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -14704,16 +14704,19 @@ __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
 
 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix,			\
 			lnsuffix, funcsuffix, Q)			\
+  typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype;	\
   __extension__ static __inline void					\
   __attribute__ ((__always_inline__))					\
-  vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
+  vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
 				     intype b, const int c)		\
   {									\
+    __ST2_LANE_STRUCTURE_##intype *__p =				\
+				(__ST2_LANE_STRUCTURE_##intype *)ptr;	\
     __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
 	     "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t"	\
-	     : "=Q"(*(intype *) ptr)					\
+	     : "=Q"(*__p)						\
 	     : "Q"(b), "i"(c)						\
-	     : "memory", "v16", "v17");					\
+	     : "v16", "v17");						\
   }
 
 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
@@ -14743,16 +14746,19 @@ __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
 
 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix,			\
 			lnsuffix, funcsuffix, Q)			\
+  typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype;	\
   __extension__ static __inline void					\
   __attribute__ ((__always_inline__))					\
-  vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
+  vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
 				     intype b, const int c)		\
   {									\
+    __ST3_LANE_STRUCTURE_##intype *__p =				\
+				(__ST3_LANE_STRUCTURE_##intype *)ptr;	\
     __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
 	     "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t"	\
-	     : "=Q"(*(intype *) ptr)					\
+	     : "=Q"(*__p)						\
 	     : "Q"(b), "i"(c)						\
-	     : "memory", "v16", "v17", "v18");				\
+	     : "v16", "v17", "v18");					\
   }
 
 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
@@ -14782,16 +14788,19 @@ __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
 
 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix,			\
 			lnsuffix, funcsuffix, Q)			\
+  typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype;	\
   __extension__ static __inline void					\
   __attribute__ ((__always_inline__))					\
-  vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
+  vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
 				     intype b, const int c)		\
   {									\
+    __ST4_LANE_STRUCTURE_##intype *__p =				\
+				(__ST4_LANE_STRUCTURE_##intype *)ptr;	\
     __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
 	     "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t"	\
-	     : "=Q"(*(intype *) ptr)					\
+	     : "=Q"(*__p)						\
 	     : "Q"(b), "i"(c)						\
-	     : "memory", "v16", "v17", "v18", "v19");			\
+	     : "v16", "v17", "v18", "v19");				\
   }
 
 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]