This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, i386]: Fix PR target/34435
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Richard Guenther <rguenther at suse dot de>
- Date: Wed, 12 Dec 2007 22:36:53 +0100
- Subject: [PATCH, i386]: Fix PR target/34435
Hello!
Attached patch implements the solution to PR target/34435, suggested by
Richi (thanks!) in Comment #1 of the PR target/34435 audit trail [1].
The testcase:
--cut here--
#include <emmintrin.h>
class Vec {
__m128i vec;
public:
Vec(int mm) {
vec = _mm_set1_epi16(mm);
}
operator __m128i() const {
return vec;
}
};
int main() {
_mm_shuffle_epi32(Vec(5), _MM_SHUFFLE(3,3,3,3)); // error
}
--cut here--
fails to compile without optimizations due to the macro version of the
intrinsic, used without optimizations (macro version is necessary to
avoid failure in the check for immediate value in the non-optimized
compile):
"error: can't convert value to a vector".
Attached patch adds another cast of non-constant input value to __m128i,
so the macro becomes more similar to the inlined version of the intrinsics.
Patch was bootstrapped and regression tested on x86_64 {,-m32} without
new failures. If there are no comments to this approach, I plan to
commit the patch tomorrow.
2007-12-12 Uros Bizjak <ubizjak@gmail.com>
Richard Guenther <rguenther@suse.de>
PR target/34435
* config/i386/emmintrin.h (_mm_shuffle_pd, _mm_extract_epi16,
_mm_insert_epi16, _mm_shufflehi_epi16, _mm_shufflelo_epi16,
_mm_shuffle_epi32): Cast non-constant input values to either __m64,
__m128, __m128i or __m128d in a macro version of the intrinsic.
Cast constant input values to int.
* config/i386/ammintrin.h (_mm_extracti_si64, _mm_inserti_si64):
Ditto.
* config/i386/bmmintrin.h (_mm_roti_epi8, _mm_roti_epi16,
_mm_roti_epi32, _mm_roti_epi64): Ditto.
* config/i386/smmintrin.h (_mm_blend_epi16, _mm_blend_ps,
_mm_blend_pd,
_mm_dp_ps, _mm_dp_pd, _mm_insert_ps, _mm_extract_ps,
_mm_insert_epi8,
_mm_insert_epi32, _mm_insert_epi64, _mm_extract_epi8,
mm_extract_epi32,
_mm_extract_epi64, _mm_mpsadbw_epu8, _mm_cmpistrm, _mm_cmpistri,
_mm_cmpestrm, _mm_cmpestri, _mm_cmpistra, _mm_cmpistrc,
_mm_cmpistro,
_mm_cmpistrs, _mm_cmpistrz, _mm_cmpestra, _mm_cmpestrc,
_mm_cmpestro,
_mm_cmpestrs, _mm_cmpestrz): Ditto.
* config/i386/tmmintrin.h (_mm_alignr_epi8, _mm_alignr_pi8): Ditto.
* config/i386/xmmintrin.h (_mm_shuffle_ps, _mm_extract_pi16,
_m_pextrw,
_mm_insert_pi16, _m_pinsrw, _mm_shuffle_pi16, _m_pshufw): Ditto.
* config/i386/mmintrin-common.h (_mm_round_pd, _mm_round_sd,
_mm_round_ps, _mm_round_ss): Ditto.
testsuite/ChangeLog:
2007-12-12 Uros Bizjak <ubizjak@gmail.com>
PR target/34435
* g++.dg/other/pr34435.C: New testcase.
Uros.
[1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34435
Index: testsuite/g++.dg/other/pr34435.C
===================================================================
--- testsuite/g++.dg/other/pr34435.C (revision 0)
+++ testsuite/g++.dg/other/pr34435.C (revision 0)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-msse2" } */
+
+#include <emmintrin.h>
+
+class Vec {
+ __m128i vec;
+public:
+ Vec(int mm) {
+ vec = _mm_set1_epi16(mm);
+ }
+ operator __m128i() const {
+ return vec;
+ }
+};
+
+int main() {
+ _mm_shuffle_epi32(Vec(5), _MM_SHUFFLE(3,3,3,3));
+}
Index: config/i386/bmmintrin.h
===================================================================
--- config/i386/bmmintrin.h (revision 130791)
+++ config/i386/bmmintrin.h (working copy)
@@ -352,33 +352,37 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
/* Rotates - Immediate form */
#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_roti_epi8(__m128i __A, int __B)
+_mm_roti_epi8(__m128i __A, const int __B)
{
return (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_roti_epi16(__m128i __A, int __B)
+_mm_roti_epi16(__m128i __A, const int __B)
{
return (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_roti_epi32(__m128i __A, int __B)
+_mm_roti_epi32(__m128i __A, const int __B)
{
return (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_roti_epi64(__m128i __A, int __B)
+_mm_roti_epi64(__m128i __A, const int __B)
{
return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
}
#else
-#define _mm_roti_epi8(A, B) ((_m128i) __builtin_ia32_protbi ((__v16qi)(A), B)
-#define _mm_roti_epi16(A, B) ((_m128i) __builtin_ia32_protwi ((__v8hi)(A), B)
-#define _mm_roti_epi32(A, B) ((_m128i) __builtin_ia32_protdi ((__v4si)(A), B)
-#define _mm_roti_epi64(A, B) ((_m128i) __builtin_ia32_protqi ((__v2di)(A), B)
+#define _mm_roti_epi8(A, B) \
+ ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(B)))
+#define _mm_roti_epi16(A, B) \
+ ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(B)))
+#define _mm_roti_epi32(A, B) \
+ ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(B)))
+#define _mm_roti_epi64(A, B) \
+ ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(B))
#endif
/* pshl */
Index: config/i386/smmintrin.h
===================================================================
--- config/i386/smmintrin.h (revision 130791)
+++ config/i386/smmintrin.h (working copy)
@@ -53,8 +53,9 @@ _mm_blend_epi16 (__m128i __X, __m128i __
__M);
}
#else
-#define _mm_blend_epi16(X, Y, M) \
- ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(X), (__v8hi)(Y), (M)))
+#define _mm_blend_epi16(X, Y, M) \
+ ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(M)))
#endif
static __inline __m128i __attribute__((__always_inline__, __artificial__))
@@ -77,8 +78,9 @@ _mm_blend_ps (__m128 __X, __m128 __Y, co
__M);
}
#else
-#define _mm_blend_ps(X, Y, M) \
- ((__m128) __builtin_ia32_blendps ((__v4sf)(X), (__v4sf)(Y), (M)))
+#define _mm_blend_ps(X, Y, M) \
+ ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(M)))
#endif
static __inline __m128 __attribute__((__always_inline__, __artificial__))
@@ -101,8 +103,9 @@ _mm_blend_pd (__m128d __X, __m128d __Y,
__M);
}
#else
-#define _mm_blend_pd(X, Y, M) \
- ((__m128d) __builtin_ia32_blendpd ((__v2df)(X), (__v2df)(Y), (M)))
+#define _mm_blend_pd(X, Y, M) \
+ ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(M)))
#endif
static __inline __m128d __attribute__((__always_inline__, __artificial__))
@@ -133,11 +136,13 @@ _mm_dp_pd (__m128d __X, __m128d __Y, con
__M);
}
#else
-#define _mm_dp_ps(X, Y, M) \
- ((__m128) __builtin_ia32_dpps ((__v4sf)(X), (__v4sf)(Y), (M)))
-
-#define _mm_dp_pd(X, Y, M) \
- ((__m128d) __builtin_ia32_dppd ((__v2df)(X), (__v2df)(Y), (M)))
+#define _mm_dp_ps(X, Y, M) \
+ ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(M)))
+
+#define _mm_dp_pd(X, Y, M) \
+ ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(M)))
#endif
/* Packed integer 64-bit comparison, zeroing or filling with ones
@@ -228,8 +233,9 @@ _mm_insert_ps (__m128 __D, __m128 __S, c
__N);
}
#else
-#define _mm_insert_ps(D, S, N) \
- ((__m128) __builtin_ia32_insertps128 ((__v4sf)(D), (__v4sf)(S), (N)))
+#define _mm_insert_ps(D, S, N) \
+ ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D), \
+ (__v4sf)(__m128)(S), (int)(N)))
#endif
/* Helper macro to create the N value for _mm_insert_ps. */
@@ -247,14 +253,13 @@ _mm_extract_ps (__m128 __X, const int __
return __tmp.i;
}
#else
-#define _mm_extract_ps(X, N) \
- (__extension__ \
- ({ \
- union { int i; float f; } __tmp; \
- __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(X), (N)); \
- __tmp.i; \
- }) \
- )
+#define _mm_extract_ps(X, N) \
+ (__extension__ \
+ ({ \
+ union { int i; float f; } __tmp; \
+ __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \
+ __tmp.i; \
+ }))
#endif
/* Extract binary representation of single precision float into
@@ -296,15 +301,18 @@ _mm_insert_epi64 (__m128i __D, long long
}
#endif
#else
-#define _mm_insert_epi8(D, S, N) \
- ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(D), (S), (N)))
-
-#define _mm_insert_epi32(D, S, N) \
- ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(D), (S), (N)))
+#define _mm_insert_epi8(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D), \
+ (int)(S), (int)(N)))
+
+#define _mm_insert_epi32(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D), \
+ (int)(S), (int)(N)))
#ifdef __x86_64__
-#define _mm_insert_epi64(D, S, N) \
- ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(D), (S), (N)))
+#define _mm_insert_epi64(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D), \
+ (long long)(S), (int)(N)))
#endif
#endif
@@ -333,13 +341,13 @@ _mm_extract_epi64 (__m128i __X, const in
#endif
#else
#define _mm_extract_epi8(X, N) \
- __builtin_ia32_vec_ext_v16qi ((__v16qi) X, (N))
+ __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))
#define _mm_extract_epi32(X, N) \
- __builtin_ia32_vec_ext_v4si ((__v4si) X, (N))
+ __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))
#ifdef __x86_64__
#define _mm_extract_epi64(X, N) \
- ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(X), (N)))
+ ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
#endif
#endif
@@ -447,8 +455,9 @@ _mm_mpsadbw_epu8 (__m128i __X, __m128i _
(__v16qi)__Y, __M);
}
#else
-#define _mm_mpsadbw_epu8(X, Y, M) \
- ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(X), (__v16qi)(Y), (M)))
+#define _mm_mpsadbw_epu8(X, Y, M) \
+ ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
#endif
/* Load double quadword using non-temporal aligned hint. */
@@ -521,17 +530,21 @@ _mm_cmpestri (__m128i __X, int __LX, __m
__M);
}
#else
-#define _mm_cmpistrm(X, Y, M) \
- ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(X), (__v16qi)(Y), (M)))
-#define _mm_cmpistri(X, Y, M) \
- __builtin_ia32_pcmpistri128 ((__v16qi)(X), (__v16qi)(Y), (M))
-
-#define _mm_cmpestrm(X, LX, Y, LY, M) \
- ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(X), (int)(LX), \
- (__v16qi)(Y), (int)(LY), (M)))
-#define _mm_cmpestri(X, LX, Y, LY, M) \
- __builtin_ia32_pcmpestri128 ((__v16qi)(X), (int)(LX), \
- (__v16qi)(Y), (int)(LY), (M))
+#define _mm_cmpistrm(X, Y, M) \
+ ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistri(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestrm(X, LX, Y, LY, M) \
+ ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X), \
+ (int)(LX), (__v16qi)(__m128i)(Y), \
+ (int)(LY), (int)(M)))
+#define _mm_cmpestri(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M))
#endif
/* Intrinsics for text/string processing and reading values of
@@ -618,32 +631,42 @@ _mm_cmpestrz (__m128i __X, int __LX, __m
__M);
}
#else
-#define _mm_cmpistra(X, Y, M) \
- __builtin_ia32_pcmpistria128 ((__v16qi)(X), (__v16qi)(Y), (M))
-#define _mm_cmpistrc(X, Y, M) \
- __builtin_ia32_pcmpistric128 ((__v16qi)(X), (__v16qi)(Y), (M))
-#define _mm_cmpistro(X, Y, M) \
- __builtin_ia32_pcmpistrio128 ((__v16qi)(X), (__v16qi)(Y), (M))
-#define _mm_cmpistrs(X, Y, M) \
- __builtin_ia32_pcmpistris128 ((__v16qi)(X), (__v16qi)(Y), (M))
-#define _mm_cmpistrz(X, Y, M) \
- __builtin_ia32_pcmpistriz128 ((__v16qi)(X), (__v16qi)(Y), (M))
-
-#define _mm_cmpestra(X, LX, Y, LY, M) \
- __builtin_ia32_pcmpestria128 ((__v16qi)(X), (int)(LX), \
- (__v16qi)(Y), (int)(LY), (M))
-#define _mm_cmpestrc(X, LX, Y, LY, M) \
- __builtin_ia32_pcmpestric128 ((__v16qi)(X), (int)(LX), \
- (__v16qi)(Y), (int)(LY), (M))
-#define _mm_cmpestro(X, LX, Y, LY, M) \
- __builtin_ia32_pcmpestrio128 ((__v16qi)(X), (int)(LX), \
- (__v16qi)(Y), (int)(LY), (M))
-#define _mm_cmpestrs(X, LX, Y, LY, M) \
- __builtin_ia32_pcmpestris128 ((__v16qi)(X), (int)(LX), \
- (__v16qi)(Y), (int)(LY), (M))
-#define _mm_cmpestrz(X, LX, Y, LY, M) \
- __builtin_ia32_pcmpestriz128 ((__v16qi)(X), (int)(LX), \
- (__v16qi)(Y), (int)(LY), (M))
+#define _mm_cmpistra(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrc(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistro(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrs(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrz(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestra(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrc(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestro(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrs(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrz(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
#endif
/* Packed integer 64-bit comparison, zeroing or filling with ones
Index: config/i386/tmmintrin.h
===================================================================
--- config/i386/tmmintrin.h (revision 130791)
+++ config/i386/tmmintrin.h (working copy)
@@ -185,18 +185,25 @@ _mm_sign_pi32 (__m64 __X, __m64 __Y)
static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
{
- return (__m128i)__builtin_ia32_palignr128 ((__v2di)__X, (__v2di)__Y, __N * 8);}
+ return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
+ (__v2di)__Y, __N * 8);
+}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
{
- return (__m64)__builtin_ia32_palignr ((long long)__X, (long long)__Y, __N * 8);
+ return (__m64) __builtin_ia32_palignr ((long long)__X,
+ (long long)__Y, __N * 8);
}
#else
-#define _mm_alignr_epi8(__X, __Y, __N) \
- ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
-#define _mm_alignr_pi8(__X, __Y, __N) \
- ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
+#define _mm_alignr_epi8(X, Y, N) \
+ ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), \
+ (int)(N) * 8))
+#define _mm_alignr_pi8(X, Y, N) \
+ ((__m64) __builtin_ia32_palignr ((long long)(__m64)(__X), \
+ (long long)(__m64)(__Y), \
+ (int)(N) * 8))
#endif
static __inline __m128i __attribute__((__always_inline__, __artificial__))
Index: config/i386/xmmintrin.h
===================================================================
--- config/i386/xmmintrin.h (revision 130791)
+++ config/i386/xmmintrin.h (working copy)
@@ -723,8 +723,9 @@ _mm_shuffle_ps (__m128 __A, __m128 __B,
return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
}
#else
-#define _mm_shuffle_ps(A, B, MASK) \
- ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK)))
+#define _mm_shuffle_ps(A, B, MASK) \
+ ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(MASK)))
#endif
/* Selects and interleaves the upper two SPFP values from A and B. */
@@ -1004,8 +1005,10 @@ _m_pextrw (__m64 const __A, int const __
return _mm_extract_pi16 (__A, __N);
}
#else
-#define _mm_extract_pi16(A, N) __builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N))
-#define _m_pextrw(A, N) _mm_extract_pi16((A), (N))
+#define _mm_extract_pi16(A, N) \
+ ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
+#define _m_pextrw(A, N) \
+ ((int) _mm_extract_pi16((__m64)(A),(int)(N)))
#endif
/* Inserts word D into one of four words of A. The selector N must be
@@ -1023,9 +1026,11 @@ _m_pinsrw (__m64 const __A, int const __
return _mm_insert_pi16 (__A, __D, __N);
}
#else
-#define _mm_insert_pi16(A, D, N) \
- ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N)))
-#define _m_pinsrw(A, D, N) _mm_insert_pi16((A), (D), (N))
+#define _mm_insert_pi16(A, D, N) \
+ ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), \
+ (int)(D), (int)(N)))
+#define _m_pinsrw(A, D, N) \
+ ((__m64) _mm_insert_pi16((__m64)(A), (int)(D), (int)(N))
#endif
/* Compute the element-wise maximum of signed 16-bit values. */
@@ -1123,8 +1128,9 @@ _m_pshufw (__m64 __A, int const __N)
}
#else
#define _mm_shuffle_pi16(A, N) \
- ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N)))
-#define _m_pshufw(A, N) _mm_shuffle_pi16 ((A), (N))
+ ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+#define _m_pshufw(A, N) \
+ ((__m64) _mm_shuffle_pi16 ((__m64)(A), (int)(N))
#endif
/* Conditionally store byte elements of A into P. The high bit of each
Index: config/i386/mmintrin-common.h
===================================================================
--- config/i386/mmintrin-common.h (revision 130791)
+++ config/i386/mmintrin-common.h (working copy)
@@ -108,10 +108,11 @@ _mm_round_sd(__m128d __D, __m128d __V, c
}
#else
#define _mm_round_pd(V, M) \
- ((__m128d) __builtin_ia32_roundpd ((__v2df)(V), (M)))
+ ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
-#define _mm_round_sd(D, V, M) \
- ((__m128d) __builtin_ia32_roundsd ((__v2df)(D), (__v2df)(V), (M)))
+#define _mm_round_sd(D, V, M) \
+ ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D), \
+ (__v2df)(__m128d)(V), (int)(M)))
#endif
/* Packed/scalar single precision floating point rounding. */
@@ -132,10 +133,11 @@ _mm_round_ss (__m128 __D, __m128 __V, co
}
#else
#define _mm_round_ps(V, M) \
- ((__m128) __builtin_ia32_roundps ((__v4sf)(V), (M)))
+ ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
-#define _mm_round_ss(D, V, M) \
- ((__m128) __builtin_ia32_roundss ((__v4sf)(D), (__v4sf)(V), (M)))
+#define _mm_round_ss(D, V, M) \
+ ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D), \
+ (__v4sf)(__m128)(V), (int)(M)))
#endif
/* Macros for ceil/floor intrinsics. */
Index: config/i386/ammintrin.h
===================================================================
--- config/i386/ammintrin.h (revision 130791)
+++ config/i386/ammintrin.h (working copy)
@@ -62,8 +62,9 @@ _mm_extracti_si64 (__m128i __X, unsigned
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
}
#else
-#define _mm_extracti_si64(X, I, L) \
- ((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
+#define _mm_extracti_si64(X, I, L) \
+ ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), \
+ (unsigned int)(I), (unsigned int)(L)))
#endif
static __inline __m128i __attribute__((__always_inline__, __artificial__))
@@ -79,8 +80,10 @@ _mm_inserti_si64(__m128i __X, __m128i __
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
}
#else
-#define _mm_inserti_si64(X, Y, I, L) \
- ((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
+#define _mm_inserti_si64(X, Y, I, L) \
+ ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), \
+ (unsigned int)(I), (unsigned int)(L)))
#endif
#endif /* __SSE4A__ */
Index: config/i386/emmintrin.h
===================================================================
--- config/i386/emmintrin.h (revision 130791)
+++ config/i386/emmintrin.h (working copy)
@@ -887,8 +887,9 @@ _mm_shuffle_pd(__m128d __A, __m128d __B,
return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
}
#else
-#define _mm_shuffle_pd(__A, __B, __C) \
- ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C)))
+#define _mm_shuffle_pd(__A, __B, __C) \
+ ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)__A, \
+ (__v2df)(__m128d)__B, (int)(__C)))
#endif
static __inline __m128d __attribute__((__always_inline__, __artificial__))
@@ -1320,9 +1321,10 @@ _mm_insert_epi16 (__m128i const __A, int
}
#else
#define _mm_extract_epi16(A, N) \
- ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N)))
-#define _mm_insert_epi16(A, D, N) \
- ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N)))
+ ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_insert_epi16(A, D, N) \
+ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
+ (int)(D), (int)(N)))
#endif
static __inline __m128i __attribute__((__always_inline__, __artificial__))
@@ -1381,11 +1383,11 @@ _mm_shuffle_epi32 (__m128i __A, const in
}
#else
#define _mm_shufflehi_epi16(__A, __B) \
- ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
+ ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)__A, (int)__B))
#define _mm_shufflelo_epi16(__A, __B) \
- ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
+ ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)__A, (int)__B))
#define _mm_shuffle_epi32(__A, __B) \
- ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
+ ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)__A, (int)__B))
#endif
static __inline void __attribute__((__always_inline__, __artificial__))