This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[committed] patch for 24323


Was previously approved in this thread for 4.2:

http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00324.html

I've now committed the following for 4.2.

-eric

2005-11-29 Evan Cheng <evan.cheng@apple.com>

* config/i386/xmmintrin.h (_MM_TRANSPOSE4_PS): Rewrite using high/low
moves and unpack to speed up.


Index: xmmintrin.h
===================================================================
--- xmmintrin.h (revision 107699)
+++ xmmintrin.h (working copy)
@@ -1197,14 +1197,14 @@ _mm_pause (void)
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
do { \
__v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \
- __v4sf __t0 = __builtin_ia32_shufps (__r0, __r1, 0x44); \
- __v4sf __t2 = __builtin_ia32_shufps (__r0, __r1, 0xEE); \
- __v4sf __t1 = __builtin_ia32_shufps (__r2, __r3, 0x44); \
- __v4sf __t3 = __builtin_ia32_shufps (__r2, __r3, 0xEE); \
- (row0) = __builtin_ia32_shufps (__t0, __t1, 0x88); \
- (row1) = __builtin_ia32_shufps (__t0, __t1, 0xDD); \
- (row2) = __builtin_ia32_shufps (__t2, __t3, 0x88); \
- (row3) = __builtin_ia32_shufps (__t2, __t3, 0xDD); \
+ __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); \
+ __v4sf __t2 = __builtin_ia32_unpcklps (__r2, __r3); \
+ __v4sf __t1 = __builtin_ia32_unpckhps (__r0, __r1); \
+ __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); \
+ (row0) = __builtin_ia32_movlhps (__t0, __t1); \
+ (row1) = __builtin_ia32_movhlps (__t1, __t0); \
+ (row2) = __builtin_ia32_movlhps (__t2, __t3); \
+ (row3) = __builtin_ia32_movhlps (__t3, __t2); \
} while (0)



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]