This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[PATCH, i386]: Fix PR target/34435

From: Uros Bizjak <ubizjak at gmail dot com>
To: GCC Patches <gcc-patches at gcc dot gnu dot org>
Cc: Richard Guenther <rguenther at suse dot de>
Date: Wed, 12 Dec 2007 22:36:53 +0100
Subject: [PATCH, i386]: Fix PR target/34435

Hello!

Attached patch implements the solution to PR target/34435, suggested by Richi (thanks!) in Comment #1 of the PR target/34435 audit trail [1]. The testcase:

--cut here--

#include <emmintrin.h>

class Vec {
   __m128i vec;
public:
   Vec(int mm) {
       vec = _mm_set1_epi16(mm);
   }
   operator __m128i() const {
       return vec;
   }
};

int main() {
   _mm_shuffle_epi32(Vec(5), _MM_SHUFFLE(3,3,3,3));  // error
}

--cut here--

fails to compile without optimizations due to the macro version of the intrinsic, used without optimizations (macro version is necessary to avoid failure in the check for immediate value in the non-optimized compile):

"error: can't convert value to a vector".

Attached patch adds another cast of non-constant input value to __m128i, so the macro becomes more similar to the inlined version of the intrinsics.

Patch was bootstrapped and regression tested on x86_64 {,-m32} without new failures. If there are no comments to this approach, I plan to commit the patch tomorrow.


2007-12-12  Uros Bizjak  <ubizjak@gmail.com>
           Richard Guenther  <rguenther@suse.de>

PR target/34435 * config/i386/emmintrin.h (_mm_shuffle_pd, _mm_extract_epi16, _mm_insert_epi16, _mm_shufflehi_epi16, _mm_shufflelo_epi16, _mm_shuffle_epi32): Cast non-constant input values to either __m64, __m128, __m128i or __m128d in a macro version of the intrinsic. Cast constant input values to int. * config/i386/ammintrin.h (_mm_extracti_si64, _mm_inserti_si64): Ditto. * config/i386/bmmintrin.h (_mm_roti_epi8, _mm_roti_epi16, _mm_roti_epi32, _mm_roti_epi64): Ditto. * config/i386/smmintrin.h (_mm_blend_epi16, _mm_blend_ps, _mm_blend_pd, _mm_dp_ps, _mm_dp_pd, _mm_insert_ps, _mm_extract_ps, _mm_insert_epi8, _mm_insert_epi32, _mm_insert_epi64, _mm_extract_epi8, mm_extract_epi32, _mm_extract_epi64, _mm_mpsadbw_epu8, _mm_cmpistrm, _mm_cmpistri, _mm_cmpestrm, _mm_cmpestri, _mm_cmpistra, _mm_cmpistrc, _mm_cmpistro, _mm_cmpistrs, _mm_cmpistrz, _mm_cmpestra, _mm_cmpestrc, _mm_cmpestro, _mm_cmpestrs, _mm_cmpestrz): Ditto. * config/i386/tmmintrin.h (_mm_alignr_epi8, _mm_alignr_pi8): Ditto. * config/i386/xmmintrin.h (_mm_shuffle_ps, _mm_extract_pi16, _m_pextrw, _mm_insert_pi16, _m_pinsrw, _mm_shuffle_pi16, _m_pshufw): Ditto. * config/i386/mmintrin-common.h (_mm_round_pd, _mm_round_sd, _mm_round_ps, _mm_round_ss): Ditto.

testsuite/ChangeLog:

2007-12-12 Uros Bizjak <ubizjak@gmail.com>

       PR target/34435
       * g++.dg/other/pr34435.C: New testcase.

Uros.

[1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34435

Index: testsuite/g++.dg/other/pr34435.C
===================================================================
--- testsuite/g++.dg/other/pr34435.C	(revision 0)
+++ testsuite/g++.dg/other/pr34435.C	(revision 0)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-msse2" } */
+
+#include <emmintrin.h>
+
+class Vec {
+    __m128i vec;
+public:
+    Vec(int mm) {
+        vec = _mm_set1_epi16(mm);
+    }
+  operator __m128i() const {
+      return vec;
+    }
+};
+
+int main() {
+  _mm_shuffle_epi32(Vec(5), _MM_SHUFFLE(3,3,3,3));
+}
Index: config/i386/bmmintrin.h
===================================================================
--- config/i386/bmmintrin.h	(revision 130791)
+++ config/i386/bmmintrin.h	(working copy)
@@ -352,33 +352,37 @@ _mm_rot_epi64(__m128i __A,  __m128i __B)
 /* Rotates - Immediate form */
 #ifdef __OPTIMIZE__
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_roti_epi8(__m128i __A,  int __B)
+_mm_roti_epi8(__m128i __A, const int __B)
 {
   return  (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B);
 }
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_roti_epi16(__m128i __A, int __B)
+_mm_roti_epi16(__m128i __A, const int __B)
 {
   return  (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B);
 }
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_roti_epi32(__m128i __A, int __B)
+_mm_roti_epi32(__m128i __A, const int __B)
 {
   return  (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B);
 }
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
-_mm_roti_epi64(__m128i __A, int __B)
+_mm_roti_epi64(__m128i __A, const int __B)
 {
   return  (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
 }
 #else
-#define _mm_roti_epi8(A, B) ((_m128i) __builtin_ia32_protbi ((__v16qi)(A), B)
-#define _mm_roti_epi16(A, B) ((_m128i) __builtin_ia32_protwi ((__v8hi)(A), B)
-#define _mm_roti_epi32(A, B) ((_m128i) __builtin_ia32_protdi ((__v4si)(A), B)
-#define _mm_roti_epi64(A, B) ((_m128i) __builtin_ia32_protqi ((__v2di)(A), B)
+#define _mm_roti_epi8(A, B) \
+  ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(B)))
+#define _mm_roti_epi16(A, B) \
+  ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(B)))
+#define _mm_roti_epi32(A, B) \
+  ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(B)))
+#define _mm_roti_epi64(A, B) \
+  ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(B))
 #endif
 
 /* pshl */
Index: config/i386/smmintrin.h
===================================================================
--- config/i386/smmintrin.h	(revision 130791)
+++ config/i386/smmintrin.h	(working copy)
@@ -53,8 +53,9 @@ _mm_blend_epi16 (__m128i __X, __m128i __
 					      __M);
 }
 #else
-#define _mm_blend_epi16(X, Y, M) \
-  ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(X), (__v8hi)(Y), (M)))
+#define _mm_blend_epi16(X, Y, M)					\
+  ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X),		\
+					(__v8hi)(__m128i)(Y), (int)(M)))
 #endif
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
@@ -77,8 +78,9 @@ _mm_blend_ps (__m128 __X, __m128 __Y, co
 					  __M);
 }
 #else
-#define _mm_blend_ps(X, Y, M) \
-  ((__m128) __builtin_ia32_blendps ((__v4sf)(X), (__v4sf)(Y), (M)))
+#define _mm_blend_ps(X, Y, M)						\
+  ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X),		\
+				    (__v4sf)(__m128)(Y), (int)(M)))
 #endif
 
 static __inline __m128 __attribute__((__always_inline__, __artificial__))
@@ -101,8 +103,9 @@ _mm_blend_pd (__m128d __X, __m128d __Y, 
 					   __M);
 }
 #else
-#define _mm_blend_pd(X, Y, M) \
-  ((__m128d) __builtin_ia32_blendpd ((__v2df)(X), (__v2df)(Y), (M)))
+#define _mm_blend_pd(X, Y, M)						\
+  ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X),		\
+				     (__v2df)(__m128d)(Y), (int)(M)))
 #endif
 
 static __inline __m128d __attribute__((__always_inline__, __artificial__))
@@ -133,11 +136,13 @@ _mm_dp_pd (__m128d __X, __m128d __Y, con
 					__M);
 }
 #else
-#define _mm_dp_ps(X, Y, M) \
-  ((__m128) __builtin_ia32_dpps ((__v4sf)(X), (__v4sf)(Y), (M)))
-
-#define _mm_dp_pd(X, Y, M) \
-  ((__m128d) __builtin_ia32_dppd ((__v2df)(X), (__v2df)(Y), (M)))
+#define _mm_dp_ps(X, Y, M)						\
+  ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X),			\
+				 (__v4sf)(__m128)(Y), (int)(M)))
+
+#define _mm_dp_pd(X, Y, M)						\
+  ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X),			\
+				  (__v2df)(__m128d)(Y), (int)(M)))
 #endif
 
 /* Packed integer 64-bit comparison, zeroing or filling with ones
@@ -228,8 +233,9 @@ _mm_insert_ps (__m128 __D, __m128 __S, c
 					      __N);
 }
 #else
-#define _mm_insert_ps(D, S, N) \
-  ((__m128) __builtin_ia32_insertps128 ((__v4sf)(D), (__v4sf)(S), (N)))
+#define _mm_insert_ps(D, S, N)						\
+  ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D),		\
+					(__v4sf)(__m128)(S), (int)(N)))
 #endif
 
 /* Helper macro to create the N value for _mm_insert_ps.  */
@@ -247,14 +253,13 @@ _mm_extract_ps (__m128 __X, const int __
   return __tmp.i;
 }
 #else
-#define _mm_extract_ps(X, N) \
-  (__extension__ 						\
-   ({								\
-      union { int i; float f; } __tmp;				\
-      __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(X), (N));	\
-      __tmp.i;							\
-    })								\
-   )
+#define _mm_extract_ps(X, N)						\
+  (__extension__							\
+   ({									\
+     union { int i; float f; } __tmp;					\
+     __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \
+     __tmp.i;								\
+   }))
 #endif
 
 /* Extract binary representation of single precision float into
@@ -296,15 +301,18 @@ _mm_insert_epi64 (__m128i __D, long long
 }
 #endif
 #else
-#define _mm_insert_epi8(D, S, N) \
-  ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(D), (S), (N)))
-
-#define _mm_insert_epi32(D, S, N) \
-  ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(D), (S), (N)))
+#define _mm_insert_epi8(D, S, N)					\
+  ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D),	\
+					   (int)(S), (int)(N)))
+
+#define _mm_insert_epi32(D, S, N)				\
+  ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D),	\
+					  (int)(S), (int)(N)))
 
 #ifdef __x86_64__
-#define _mm_insert_epi64(D, S, N) \
-  ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(D), (S), (N)))
+#define _mm_insert_epi64(D, S, N)					\
+  ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D),		\
+					  (long long)(S), (int)(N)))
 #endif
 #endif
 
@@ -333,13 +341,13 @@ _mm_extract_epi64 (__m128i __X, const in
 #endif
 #else
 #define _mm_extract_epi8(X, N) \
-  __builtin_ia32_vec_ext_v16qi ((__v16qi) X, (N))
+  __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))
 #define _mm_extract_epi32(X, N) \
-  __builtin_ia32_vec_ext_v4si ((__v4si) X, (N))
+  __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))
 
 #ifdef __x86_64__
 #define _mm_extract_epi64(X, N) \
-  ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(X), (N)))
+  ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
 #endif
 #endif
 
@@ -447,8 +455,9 @@ _mm_mpsadbw_epu8 (__m128i __X, __m128i _
 					      (__v16qi)__Y, __M);
 }
 #else
-#define _mm_mpsadbw_epu8(X, Y, M) \
-  ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(X), (__v16qi)(Y), (M)))
+#define _mm_mpsadbw_epu8(X, Y, M)					\
+  ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X),		\
+					(__v16qi)(__m128i)(Y), (int)(M)))
 #endif
 
 /* Load double quadword using non-temporal aligned hint.  */
@@ -521,17 +530,21 @@ _mm_cmpestri (__m128i __X, int __LX, __m
 				      __M);
 }
 #else
-#define _mm_cmpistrm(X, Y, M) \
-  ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(X), (__v16qi)(Y), (M)))
-#define _mm_cmpistri(X, Y, M) \
-  __builtin_ia32_pcmpistri128 ((__v16qi)(X), (__v16qi)(Y), (M))
-
-#define _mm_cmpestrm(X, LX, Y, LY, M) \
-  ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(X), (int)(LX), \
-					  (__v16qi)(Y), (int)(LY), (M)))
-#define _mm_cmpestri(X, LX, Y, LY, M) \
-  __builtin_ia32_pcmpestri128 ((__v16qi)(X), (int)(LX), \
-			       (__v16qi)(Y), (int)(LY), (M))
+#define _mm_cmpistrm(X, Y, M)						\
+  ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X),	\
+					  (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistri(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X),		\
+				      (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestrm(X, LX, Y, LY, M)					\
+  ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X),	\
+					  (int)(LX), (__v16qi)(__m128i)(Y), \
+					  (int)(LY), (int)(M)))
+#define _mm_cmpestri(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX),	\
+				      (__v16qi)(__m128i)(Y), (int)(LY),	\
+				      (int)(M))
 #endif
 
 /* Intrinsics for text/string processing and reading values of
@@ -618,32 +631,42 @@ _mm_cmpestrz (__m128i __X, int __LX, __m
 				       __M);
 }
 #else
-#define _mm_cmpistra(X, Y, M) \
-  __builtin_ia32_pcmpistria128 ((__v16qi)(X), (__v16qi)(Y), (M))
-#define _mm_cmpistrc(X, Y, M) \
-  __builtin_ia32_pcmpistric128 ((__v16qi)(X), (__v16qi)(Y), (M))
-#define _mm_cmpistro(X, Y, M) \
-  __builtin_ia32_pcmpistrio128 ((__v16qi)(X), (__v16qi)(Y), (M))
-#define _mm_cmpistrs(X, Y, M) \
-  __builtin_ia32_pcmpistris128 ((__v16qi)(X), (__v16qi)(Y), (M))
-#define _mm_cmpistrz(X, Y, M) \
-  __builtin_ia32_pcmpistriz128 ((__v16qi)(X), (__v16qi)(Y), (M))
-
-#define _mm_cmpestra(X, LX, Y, LY, M) \
-  __builtin_ia32_pcmpestria128 ((__v16qi)(X), (int)(LX), \
-				(__v16qi)(Y), (int)(LY), (M))
-#define _mm_cmpestrc(X, LX, Y, LY, M) \
-  __builtin_ia32_pcmpestric128 ((__v16qi)(X), (int)(LX), \
-				(__v16qi)(Y), (int)(LY), (M))
-#define _mm_cmpestro(X, LX, Y, LY, M) \
-  __builtin_ia32_pcmpestrio128 ((__v16qi)(X), (int)(LX), \
-				(__v16qi)(Y), (int)(LY), (M))
-#define _mm_cmpestrs(X, LX, Y, LY, M) \
-  __builtin_ia32_pcmpestris128 ((__v16qi)(X), (int)(LX), \
-				(__v16qi)(Y), (int)(LY), (M))
-#define _mm_cmpestrz(X, LX, Y, LY, M) \
-  __builtin_ia32_pcmpestriz128 ((__v16qi)(X), (int)(LX), \
-				(__v16qi)(Y), (int)(LY), (M))
+#define _mm_cmpistra(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrc(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistro(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrs(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrz(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestra(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrc(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestro(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrs(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrz(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
 #endif
 
 /* Packed integer 64-bit comparison, zeroing or filling with ones
Index: config/i386/tmmintrin.h
===================================================================
--- config/i386/tmmintrin.h	(revision 130791)
+++ config/i386/tmmintrin.h	(working copy)
@@ -185,18 +185,25 @@ _mm_sign_pi32 (__m64 __X, __m64 __Y)
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
 _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
 {
-  return (__m128i)__builtin_ia32_palignr128 ((__v2di)__X, (__v2di)__Y, __N * 8);}
+  return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
+					      (__v2di)__Y, __N * 8);
+}
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
 {
-  return (__m64)__builtin_ia32_palignr ((long long)__X, (long long)__Y, __N * 8);
+  return (__m64) __builtin_ia32_palignr ((long long)__X,
+					 (long long)__Y, __N * 8);
 }
 #else
-#define _mm_alignr_epi8(__X, __Y, __N) \
-  ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
-#define _mm_alignr_pi8(__X, __Y, __N) \
-  ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
+#define _mm_alignr_epi8(X, Y, N)					\
+  ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X),		\
+					(__v2di)(__m128i)(Y),		\
+					(int)(N) * 8))
+#define _mm_alignr_pi8(X, Y, N)						\
+  ((__m64) __builtin_ia32_palignr ((long long)(__m64)(__X),		\
+				   (long long)(__m64)(__Y),		\
+				   (int)(N) * 8))
 #endif
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
Index: config/i386/xmmintrin.h
===================================================================
--- config/i386/xmmintrin.h	(revision 130791)
+++ config/i386/xmmintrin.h	(working copy)
@@ -723,8 +723,9 @@ _mm_shuffle_ps (__m128 __A, __m128 __B, 
   return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
 }
 #else
-#define _mm_shuffle_ps(A, B, MASK) \
- ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK)))
+#define _mm_shuffle_ps(A, B, MASK)					\
+  ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A),			\
+				   (__v4sf)(__m128)(B), (int)(MASK)))
 #endif
 
 /* Selects and interleaves the upper two SPFP values from A and B.  */
@@ -1004,8 +1005,10 @@ _m_pextrw (__m64 const __A, int const __
   return _mm_extract_pi16 (__A, __N);
 }
 #else
-#define _mm_extract_pi16(A, N)	__builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N))
-#define _m_pextrw(A, N)		_mm_extract_pi16((A), (N))
+#define _mm_extract_pi16(A, N)	\
+  ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
+#define _m_pextrw(A, N) \
+  ((int) _mm_extract_pi16((__m64)(A),(int)(N)))
 #endif
 
 /* Inserts word D into one of four words of A.  The selector N must be
@@ -1023,9 +1026,11 @@ _m_pinsrw (__m64 const __A, int const __
   return _mm_insert_pi16 (__A, __D, __N);
 }
 #else
-#define _mm_insert_pi16(A, D, N) \
-  ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N)))
-#define _m_pinsrw(A, D, N)	 _mm_insert_pi16((A), (D), (N))
+#define _mm_insert_pi16(A, D, N)				\
+  ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A),	\
+					(int)(D), (int)(N)))
+#define _m_pinsrw(A, D, N) \
+  ((__m64) _mm_insert_pi16((__m64)(A), (int)(D), (int)(N))
 #endif
 
 /* Compute the element-wise maximum of signed 16-bit values.  */
@@ -1123,8 +1128,9 @@ _m_pshufw (__m64 __A, int const __N)
 }
 #else
 #define _mm_shuffle_pi16(A, N) \
-  ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N)))
-#define _m_pshufw(A, N)		_mm_shuffle_pi16 ((A), (N))
+  ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+#define _m_pshufw(A, N) \
+  ((__m64) _mm_shuffle_pi16 ((__m64)(A), (int)(N))
 #endif
 
 /* Conditionally store byte elements of A into P.  The high bit of each
Index: config/i386/mmintrin-common.h
===================================================================
--- config/i386/mmintrin-common.h	(revision 130791)
+++ config/i386/mmintrin-common.h	(working copy)
@@ -108,10 +108,11 @@ _mm_round_sd(__m128d __D, __m128d __V, c
 }
 #else
 #define _mm_round_pd(V, M) \
-  ((__m128d) __builtin_ia32_roundpd ((__v2df)(V), (M)))
+  ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
 
-#define _mm_round_sd(D, V, M) \
-  ((__m128d) __builtin_ia32_roundsd ((__v2df)(D), (__v2df)(V), (M)))
+#define _mm_round_sd(D, V, M)						\
+  ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D),		\
+				     (__v2df)(__m128d)(V), (int)(M)))
 #endif
 
 /* Packed/scalar single precision floating point rounding.  */
@@ -132,10 +133,11 @@ _mm_round_ss (__m128 __D, __m128 __V, co
 }
 #else
 #define _mm_round_ps(V, M) \
-  ((__m128) __builtin_ia32_roundps ((__v4sf)(V), (M)))
+  ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
 
-#define _mm_round_ss(D, V, M) \
-  ((__m128) __builtin_ia32_roundss ((__v4sf)(D), (__v4sf)(V), (M)))
+#define _mm_round_ss(D, V, M)						\
+  ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D),		\
+				    (__v4sf)(__m128)(V), (int)(M)))
 #endif
 
 /* Macros for ceil/floor intrinsics.  */
Index: config/i386/ammintrin.h
===================================================================
--- config/i386/ammintrin.h	(revision 130791)
+++ config/i386/ammintrin.h	(working copy)
@@ -62,8 +62,9 @@ _mm_extracti_si64 (__m128i __X, unsigned
   return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
 }
 #else
-#define _mm_extracti_si64(X, I, L) \
-  ((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
+#define _mm_extracti_si64(X, I, L)					\
+  ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X),		\
+				    (unsigned int)(I), (unsigned int)(L)))
 #endif
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
@@ -79,8 +80,10 @@ _mm_inserti_si64(__m128i __X, __m128i __
   return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
 }
 #else
-#define _mm_inserti_si64(X, Y, I, L) \
-  ((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
+#define _mm_inserti_si64(X, Y, I, L)					\
+  ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X),		\
+				      (__v2di)(__m128i)(Y),		\
+				      (unsigned int)(I), (unsigned int)(L)))
 #endif
 
 #endif /* __SSE4A__ */
Index: config/i386/emmintrin.h
===================================================================
--- config/i386/emmintrin.h	(revision 130791)
+++ config/i386/emmintrin.h	(working copy)
@@ -887,8 +887,9 @@ _mm_shuffle_pd(__m128d __A, __m128d __B,
   return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
 }
 #else
-#define _mm_shuffle_pd(__A, __B, __C) \
-  ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C)))
+#define _mm_shuffle_pd(__A, __B, __C)					\
+  ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)__A,		\
+				   (__v2df)(__m128d)__B, (int)(__C)))
 #endif
 
 static __inline __m128d __attribute__((__always_inline__, __artificial__))
@@ -1320,9 +1321,10 @@ _mm_insert_epi16 (__m128i const __A, int
 }
 #else
 #define _mm_extract_epi16(A, N) \
-  ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N)))
-#define _mm_insert_epi16(A, D, N) \
-  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N)))
+  ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_insert_epi16(A, D, N)				\
+  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A),	\
+					  (int)(D), (int)(N)))
 #endif
 
 static __inline __m128i __attribute__((__always_inline__, __artificial__))
@@ -1381,11 +1383,11 @@ _mm_shuffle_epi32 (__m128i __A, const in
 }
 #else
 #define _mm_shufflehi_epi16(__A, __B) \
-  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
+  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)__A, (int)__B))
 #define _mm_shufflelo_epi16(__A, __B) \
-  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
+  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)__A, (int)__B))
 #define _mm_shuffle_epi32(__A, __B) \
-  ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
+  ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)__A, (int)__B))
 #endif
 
 static __inline void __attribute__((__always_inline__, __artificial__))

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]