This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATCH: Add SSE4.1 support


On Tue, May 22, 2007 at 08:29:04PM +0200, Uros Bizjak wrote:
> H. J. Lu wrote:
> 
> >I am testing the following patch. I will check it on after bootstrap
> >and test are finished without regressions on Linux/ia32 and
> >Linux/Intel64.
> >
> >
> > /* Test that {,x,e,p,t,a}mmintrin.h, mm3dnow.h and mm_malloc.h are
> >    usable with -O -pedantic-errors.  */
> > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> >-/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mssse3 -msse4a" } 
> >*/
> >+/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mssse3 -msse4.1 
> >-msse4a" } */
> > 
> > #include <ammintrin.h>
> >+#include <smmintrin.h>
> > #include <tmmintrin.h>
> > #include <mm3dnow.h>
> >  
> smmintrin.h already includes tmmintrin.h, so it is enough to include 
> smmintrin.h _instead_ of tmmintrin.h to include full chain of SSE 
> intrinsic headers. Also, IIRC -msse4.1 implies -mssse3, so it is enough 
> to add -msse4.1 _instead_ of -mssse3. Oh, and a comment should be 
> updated, as new header is included.
> 
> Patch is OK with this change to all tests. But be aware that these four 
> tests are the ones that test your headers ;)
> 

Here is the updated patch. I will check it in shortly.

Thanks.


H.J.
-----
2007-05-22  H.J. Lu  <hongjiu.lu@intel.com>

	* g++.dg/other/i386-2.C: Update comments on header files tested.
	Use -msse4.1 instead of -mssse3.  Include <smmintrin.h> instead
	of <tmmintrin.h>.
	* gcc.target/i386/sse-12.c: Likewise.
	* gcc.target/i386/sse-13.c: Likewise.
	* gcc.target/i386/sse-14.c: Likewise.

	* gcc.target/i386/sse-13.c (__builtin_ia32_pblendw128): New.
	Redefined to test with immediate operand.
	(__builtin_ia32_blendps): Likewise.
	(__builtin_ia32_blendpd): Likewise.
	(__builtin_ia32_dpps): Likewise.
	(__builtin_ia32_dpps): Likewise.
	(__builtin_ia32_insertps128): Likewise.
	(__builtin_ia32_vec_ext_v4sf): Likewise.
	(__builtin_ia32_vec_set_v16qi): Likewise.
	(__builtin_ia32_vec_set_v4si): Likewise.
	(__builtin_ia32_vec_set_v2di): Likewise.
	(__builtin_ia32_vec_ext_v16qi): Likewise.
	(__builtin_ia32_vec_ext_v4si): Likewise.
	(__builtin_ia32_vec_ext_v2di): Likewise.
	(__builtin_ia32_roundpd): Likewise.
	(__builtin_ia32_roundsd): Likewise.
	(__builtin_ia32_roundps): Likewise.
	(__builtin_ia32_roundss): Likewise.
	(__builtin_ia32_mpsadbw128): Likewise.

--- gcc/testsuite/g++.dg/other/i386-2.C.sse41	2007-05-22 06:39:41.000000000 -0700
+++ gcc/testsuite/g++.dg/other/i386-2.C	2007-05-22 12:52:59.000000000 -0700
@@ -1,10 +1,10 @@
-/* Test that {,x,e,p,t,a}mmintrin.h, mm3dnow.h and mm_malloc.h are
+/* Test that {,x,e,p,t,s,a}mmintrin.h, mm3dnow.h and mm_malloc.h are
    usable with -O -pedantic-errors.  */
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mssse3 -msse4a" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4.1 -msse4a" } */
 
 #include <ammintrin.h>
-#include <tmmintrin.h>
+#include <smmintrin.h>
 #include <mm3dnow.h>
 
 int dummy;
--- gcc/testsuite/gcc.target/i386/sse-12.c.sse41	2007-05-22 06:39:41.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/sse-12.c	2007-05-22 12:53:11.000000000 -0700
@@ -1,10 +1,10 @@
-/* Test that {,x,e,p,t,a}mmintrin.h, mm3dnow.h and mm_malloc.h are
+/* Test that {,x,e,p,t,s,a}mmintrin.h, mm3dnow.h and mm_malloc.h are
    usable with -O -std=c89 -pedantic-errors.  */
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mssse3 -msse4a" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -msse4.1 -msse4a" } */
 
 #include <ammintrin.h>
-#include <tmmintrin.h>
+#include <smmintrin.h>
 #include <mm3dnow.h>
 
 int dummy;
--- gcc/testsuite/gcc.target/i386/sse-13.c.sse41	2007-05-20 09:09:13.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/sse-13.c	2007-05-22 12:53:57.000000000 -0700
@@ -1,10 +1,10 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O2 -mssse3 -msse4a" } */
+/* { dg-options "-O2 -msse4.1 -msse4a" } */
 
 /* Test that the intrinsics compile with optimization.  All of them are
-   defined as inline functions in mmintrin.h that reference the proper
-   builtin functions.  Defining away "static" and "__inline" results in
-   all of them being compiled as proper functions.  */
+   defined as inline functions in {,x,e,p,t,s,a}mmintrin.h that reference
+   the proper builtin functions.  Defining away "static" and "__inline"
+   results in all of them being compiled as proper functions.  */
 
 #define static
 #define __inline
@@ -15,6 +15,26 @@
 #define __builtin_ia32_extrqi(X, I, L)  __builtin_ia32_extrqi(X, 1, 1)
 #define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1)
 
+/* smmintrin.h */
+#define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1)
+#define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1)
+#define __builtin_ia32_blendpd(X, Y, M) __builtin_ia32_blendpd(X, Y, 1)
+#define __builtin_ia32_dpps(X, Y, M) __builtin_ia32_dpps(X, Y, 1)
+#define __builtin_ia32_dppd(X, Y, M) __builtin_ia32_dppd(X, Y, 1)
+#define __builtin_ia32_insertps128(D, S, N) __builtin_ia32_insertps128(D, S, 1)
+#define __builtin_ia32_vec_ext_v4sf(X, N) __builtin_ia32_vec_ext_v4sf(X, 1)
+#define __builtin_ia32_vec_set_v16qi(D, S, N) __builtin_ia32_vec_set_v16qi(D, S, 1)
+#define __builtin_ia32_vec_set_v4si(D, S, N) __builtin_ia32_vec_set_v4si(D, S, 1)
+#define __builtin_ia32_vec_set_v2di(D, S, N) __builtin_ia32_vec_set_v2di(D, S, 1)
+#define __builtin_ia32_vec_ext_v16qi(X, N) __builtin_ia32_vec_ext_v16qi(X, 1)
+#define __builtin_ia32_vec_ext_v4si(X, N) __builtin_ia32_vec_ext_v4si(X, 1)
+#define __builtin_ia32_vec_ext_v2di(X, N) __builtin_ia32_vec_ext_v2di(X, 1)
+#define __builtin_ia32_roundpd(V, M) __builtin_ia32_roundpd(V, 1)
+#define __builtin_ia32_roundsd(D, V, M) __builtin_ia32_roundsd(D, V, 1)
+#define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1)
+#define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1)
+#define __builtin_ia32_mpsadbw128(X, Y, M) __builtin_ia32_mpsadbw128(X, Y, 1)
+
 /* tmmintrin.h */
 #define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8)
 #define __builtin_ia32_palignr(X, Y, N) __builtin_ia32_palignr(X, Y, 8)
@@ -47,4 +67,4 @@
 #define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0)
 
 #include <ammintrin.h>
-#include <tmmintrin.h>
+#include <smmintrin.h>
--- gcc/testsuite/gcc.target/i386/sse-14.c.sse41	2007-05-22 06:39:41.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/sse-14.c	2007-05-22 12:54:12.000000000 -0700
@@ -1,14 +1,13 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O0 -mssse3 -msse4a" } */
+/* { dg-options "-O0 -msse4.1 -msse4a" } */
 
 /* Test that the intrinsics compile without optimization.  All of them are
-   defined as inline functions in mmintrin.h that reference the proper
-   builtin functions.  Defining away "static" and "__inline" results in
-   all of them being compiled as proper functions.  */
+   defined as inline functions in {,x,e,p,t,s,a}mmintrin.h that reference
+   the proper builtin functions.  Defining away "static" and "__inline"
+   results in all of them being compiled as proper functions.  */
 
 #define static
 #define __inline
 
 #include <ammintrin.h>
-#include <tmmintrin.h>
-
+#include <smmintrin.h>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]