This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: Remove SSE5 support
- From: Harsha Jagasia <harsha dot jagasia at amd dot com>
- To: Harsha Jagasia <harsha dot jagasia at amd dot com>, gcc-patches at gcc dot gnu dot org, hubicka at ucw dot cz, rth at redhat dot com, dwarak dot rajagopal at amd dot com, christophe dot harle at amd dot com
- Cc: Harsha Jagasia <harsha dot jagasia at amd dot com>
- Date: Wed, 23 Sep 2009 20:40:39 -0500
- Subject: PATCH: Remove SSE5 support
Hi,
This patch takes care of some of the omissions from the SSE5 removal patch
that Honza committed.
This patch has been pre-reviewed by Honza as a part of a larger patch.
Bootstrap and make check passes. Ok for check in?
Thanks,
Harsha
* doc/extend.texi (x86 intrinsics): Remove SSE5 flags
and builtins.
* gcc.target/i386/i386.exp: Remove check_effective_target_sse5.
* gcc.target/i386/isa-1.c
* gcc.target/i386/isa-5.c
* gcc.target/i386/isa-6.c: Remove SSE5.
* gcc.target/i386/funcspec-4.c: Remove error conditions related to SSE5.
* gcc.target/i386/avx-1.c
* gcc.target/i386/avx-2.c
* gcc.target/i386/sse-23.c: Remove comments to mmintrin-common.h.
* gcc.target/i386/funcspec-9.c: Delete.
* config.gcc (i[34567]86-*-*): Remove mmintrin-common.h.
(x86_64-*-*): Ditto.
* config/i386/smmintrin.h: Move instructions in mmintrin-common.h
back to smmintrin.h.
* config/i386/cpuid.h: (bit_SSE5): Remove SSE5 bit.
* config/i386/x86intrin.h: Remove SSE5.
* config/i386/mmintrin-common.h: Delete file.
Index: doc/extend.texi
===================================================================
--- doc/extend.texi (revision 151949)
+++ doc/extend.texi (working copy)
@@ -3168,11 +3168,6 @@ Enable/disable the generation of the sse
@cindex @code{target("sse4a")} attribute
Enable/disable the generation of the SSE4A instructions.
-@item sse5
-@itemx no-sse5
-@cindex @code{target("sse5")} attribute
-Enable/disable the generation of the SSE5 instructions.
-
@item ssse3
@itemx no-ssse3
@cindex @code{target("ssse3")} attribute
@@ -3244,8 +3239,8 @@ options, or you can separate the option
On the 386, the inliner will not inline a function that has different
target options than the caller, unless the callee has a subset of the
target options of the caller. For example a function declared with
-@code{target("sse5")} can inline a function with
-@code{target("sse2")}, since @code{-msse5} implies @code{-msse2}.
+@code{target("sse3")} can inline a function with
+@code{target("sse2")}, since @code{-msse3} implies @code{-msse2}.
The @code{target} attribute is not implemented in GCC versions earlier
than 4.4, and at present only the 386 uses it.
@@ -8893,223 +8888,6 @@ v2di __builtin_ia32_insertq (v2di, v2di)
v2di __builtin_ia32_insertqi (v2di, v2di, const unsigned int, const unsigned int)
@end smallexample
-The following built-in functions are available when @option{-msse5} is used.
-All of them generate the machine instruction that is part of the name
-with MMX registers.
-
-@smallexample
-v2df __builtin_ia32_comeqpd (v2df, v2df)
-v2df __builtin_ia32_comeqps (v2df, v2df)
-v4sf __builtin_ia32_comeqsd (v4sf, v4sf)
-v4sf __builtin_ia32_comeqss (v4sf, v4sf)
-v2df __builtin_ia32_comfalsepd (v2df, v2df)
-v2df __builtin_ia32_comfalseps (v2df, v2df)
-v4sf __builtin_ia32_comfalsesd (v4sf, v4sf)
-v4sf __builtin_ia32_comfalsess (v4sf, v4sf)
-v2df __builtin_ia32_comgepd (v2df, v2df)
-v2df __builtin_ia32_comgeps (v2df, v2df)
-v4sf __builtin_ia32_comgesd (v4sf, v4sf)
-v4sf __builtin_ia32_comgess (v4sf, v4sf)
-v2df __builtin_ia32_comgtpd (v2df, v2df)
-v2df __builtin_ia32_comgtps (v2df, v2df)
-v4sf __builtin_ia32_comgtsd (v4sf, v4sf)
-v4sf __builtin_ia32_comgtss (v4sf, v4sf)
-v2df __builtin_ia32_comlepd (v2df, v2df)
-v2df __builtin_ia32_comleps (v2df, v2df)
-v4sf __builtin_ia32_comlesd (v4sf, v4sf)
-v4sf __builtin_ia32_comless (v4sf, v4sf)
-v2df __builtin_ia32_comltpd (v2df, v2df)
-v2df __builtin_ia32_comltps (v2df, v2df)
-v4sf __builtin_ia32_comltsd (v4sf, v4sf)
-v4sf __builtin_ia32_comltss (v4sf, v4sf)
-v2df __builtin_ia32_comnepd (v2df, v2df)
-v2df __builtin_ia32_comneps (v2df, v2df)
-v4sf __builtin_ia32_comnesd (v4sf, v4sf)
-v4sf __builtin_ia32_comness (v4sf, v4sf)
-v2df __builtin_ia32_comordpd (v2df, v2df)
-v2df __builtin_ia32_comordps (v2df, v2df)
-v4sf __builtin_ia32_comordsd (v4sf, v4sf)
-v4sf __builtin_ia32_comordss (v4sf, v4sf)
-v2df __builtin_ia32_comtruepd (v2df, v2df)
-v2df __builtin_ia32_comtrueps (v2df, v2df)
-v4sf __builtin_ia32_comtruesd (v4sf, v4sf)
-v4sf __builtin_ia32_comtruess (v4sf, v4sf)
-v2df __builtin_ia32_comueqpd (v2df, v2df)
-v2df __builtin_ia32_comueqps (v2df, v2df)
-v4sf __builtin_ia32_comueqsd (v4sf, v4sf)
-v4sf __builtin_ia32_comueqss (v4sf, v4sf)
-v2df __builtin_ia32_comugepd (v2df, v2df)
-v2df __builtin_ia32_comugeps (v2df, v2df)
-v4sf __builtin_ia32_comugesd (v4sf, v4sf)
-v4sf __builtin_ia32_comugess (v4sf, v4sf)
-v2df __builtin_ia32_comugtpd (v2df, v2df)
-v2df __builtin_ia32_comugtps (v2df, v2df)
-v4sf __builtin_ia32_comugtsd (v4sf, v4sf)
-v4sf __builtin_ia32_comugtss (v4sf, v4sf)
-v2df __builtin_ia32_comulepd (v2df, v2df)
-v2df __builtin_ia32_comuleps (v2df, v2df)
-v4sf __builtin_ia32_comulesd (v4sf, v4sf)
-v4sf __builtin_ia32_comuless (v4sf, v4sf)
-v2df __builtin_ia32_comultpd (v2df, v2df)
-v2df __builtin_ia32_comultps (v2df, v2df)
-v4sf __builtin_ia32_comultsd (v4sf, v4sf)
-v4sf __builtin_ia32_comultss (v4sf, v4sf)
-v2df __builtin_ia32_comunepd (v2df, v2df)
-v2df __builtin_ia32_comuneps (v2df, v2df)
-v4sf __builtin_ia32_comunesd (v4sf, v4sf)
-v4sf __builtin_ia32_comuness (v4sf, v4sf)
-v2df __builtin_ia32_comunordpd (v2df, v2df)
-v2df __builtin_ia32_comunordps (v2df, v2df)
-v4sf __builtin_ia32_comunordsd (v4sf, v4sf)
-v4sf __builtin_ia32_comunordss (v4sf, v4sf)
-v2df __builtin_ia32_fmaddpd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmaddps (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fmaddsd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmaddss (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fmsubpd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmsubps (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fmsubsd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmsubss (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fnmaddpd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fnmaddps (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fnmaddsd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fnmaddss (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fnmsubpd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fnmsubps (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fnmsubsd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fnmsubss (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_frczpd (v2df)
-v4sf __builtin_ia32_frczps (v4sf)
-v2df __builtin_ia32_frczsd (v2df, v2df)
-v4sf __builtin_ia32_frczss (v4sf, v4sf)
-v2di __builtin_ia32_pcmov (v2di, v2di, v2di)
-v2di __builtin_ia32_pcmov_v2di (v2di, v2di, v2di)
-v4si __builtin_ia32_pcmov_v4si (v4si, v4si, v4si)
-v8hi __builtin_ia32_pcmov_v8hi (v8hi, v8hi, v8hi)
-v16qi __builtin_ia32_pcmov_v16qi (v16qi, v16qi, v16qi)
-v2df __builtin_ia32_pcmov_v2df (v2df, v2df, v2df)
-v4sf __builtin_ia32_pcmov_v4sf (v4sf, v4sf, v4sf)
-v16qi __builtin_ia32_pcomeqb (v16qi, v16qi)
-v8hi __builtin_ia32_pcomeqw (v8hi, v8hi)
-v4si __builtin_ia32_pcomeqd (v4si, v4si)
-v2di __builtin_ia32_pcomeqq (v2di, v2di)
-v16qi __builtin_ia32_pcomequb (v16qi, v16qi)
-v4si __builtin_ia32_pcomequd (v4si, v4si)
-v2di __builtin_ia32_pcomequq (v2di, v2di)
-v8hi __builtin_ia32_pcomequw (v8hi, v8hi)
-v8hi __builtin_ia32_pcomeqw (v8hi, v8hi)
-v16qi __builtin_ia32_pcomfalseb (v16qi, v16qi)
-v4si __builtin_ia32_pcomfalsed (v4si, v4si)
-v2di __builtin_ia32_pcomfalseq (v2di, v2di)
-v16qi __builtin_ia32_pcomfalseub (v16qi, v16qi)
-v4si __builtin_ia32_pcomfalseud (v4si, v4si)
-v2di __builtin_ia32_pcomfalseuq (v2di, v2di)
-v8hi __builtin_ia32_pcomfalseuw (v8hi, v8hi)
-v8hi __builtin_ia32_pcomfalsew (v8hi, v8hi)
-v16qi __builtin_ia32_pcomgeb (v16qi, v16qi)
-v4si __builtin_ia32_pcomged (v4si, v4si)
-v2di __builtin_ia32_pcomgeq (v2di, v2di)
-v16qi __builtin_ia32_pcomgeub (v16qi, v16qi)
-v4si __builtin_ia32_pcomgeud (v4si, v4si)
-v2di __builtin_ia32_pcomgeuq (v2di, v2di)
-v8hi __builtin_ia32_pcomgeuw (v8hi, v8hi)
-v8hi __builtin_ia32_pcomgew (v8hi, v8hi)
-v16qi __builtin_ia32_pcomgtb (v16qi, v16qi)
-v4si __builtin_ia32_pcomgtd (v4si, v4si)
-v2di __builtin_ia32_pcomgtq (v2di, v2di)
-v16qi __builtin_ia32_pcomgtub (v16qi, v16qi)
-v4si __builtin_ia32_pcomgtud (v4si, v4si)
-v2di __builtin_ia32_pcomgtuq (v2di, v2di)
-v8hi __builtin_ia32_pcomgtuw (v8hi, v8hi)
-v8hi __builtin_ia32_pcomgtw (v8hi, v8hi)
-v16qi __builtin_ia32_pcomleb (v16qi, v16qi)
-v4si __builtin_ia32_pcomled (v4si, v4si)
-v2di __builtin_ia32_pcomleq (v2di, v2di)
-v16qi __builtin_ia32_pcomleub (v16qi, v16qi)
-v4si __builtin_ia32_pcomleud (v4si, v4si)
-v2di __builtin_ia32_pcomleuq (v2di, v2di)
-v8hi __builtin_ia32_pcomleuw (v8hi, v8hi)
-v8hi __builtin_ia32_pcomlew (v8hi, v8hi)
-v16qi __builtin_ia32_pcomltb (v16qi, v16qi)
-v4si __builtin_ia32_pcomltd (v4si, v4si)
-v2di __builtin_ia32_pcomltq (v2di, v2di)
-v16qi __builtin_ia32_pcomltub (v16qi, v16qi)
-v4si __builtin_ia32_pcomltud (v4si, v4si)
-v2di __builtin_ia32_pcomltuq (v2di, v2di)
-v8hi __builtin_ia32_pcomltuw (v8hi, v8hi)
-v8hi __builtin_ia32_pcomltw (v8hi, v8hi)
-v16qi __builtin_ia32_pcomneb (v16qi, v16qi)
-v4si __builtin_ia32_pcomned (v4si, v4si)
-v2di __builtin_ia32_pcomneq (v2di, v2di)
-v16qi __builtin_ia32_pcomneub (v16qi, v16qi)
-v4si __builtin_ia32_pcomneud (v4si, v4si)
-v2di __builtin_ia32_pcomneuq (v2di, v2di)
-v8hi __builtin_ia32_pcomneuw (v8hi, v8hi)
-v8hi __builtin_ia32_pcomnew (v8hi, v8hi)
-v16qi __builtin_ia32_pcomtrueb (v16qi, v16qi)
-v4si __builtin_ia32_pcomtrued (v4si, v4si)
-v2di __builtin_ia32_pcomtrueq (v2di, v2di)
-v16qi __builtin_ia32_pcomtrueub (v16qi, v16qi)
-v4si __builtin_ia32_pcomtrueud (v4si, v4si)
-v2di __builtin_ia32_pcomtrueuq (v2di, v2di)
-v8hi __builtin_ia32_pcomtrueuw (v8hi, v8hi)
-v8hi __builtin_ia32_pcomtruew (v8hi, v8hi)
-v4df __builtin_ia32_permpd (v2df, v2df, v16qi)
-v4sf __builtin_ia32_permps (v4sf, v4sf, v16qi)
-v4si __builtin_ia32_phaddbd (v16qi)
-v2di __builtin_ia32_phaddbq (v16qi)
-v8hi __builtin_ia32_phaddbw (v16qi)
-v2di __builtin_ia32_phadddq (v4si)
-v4si __builtin_ia32_phaddubd (v16qi)
-v2di __builtin_ia32_phaddubq (v16qi)
-v8hi __builtin_ia32_phaddubw (v16qi)
-v2di __builtin_ia32_phaddudq (v4si)
-v4si __builtin_ia32_phadduwd (v8hi)
-v2di __builtin_ia32_phadduwq (v8hi)
-v4si __builtin_ia32_phaddwd (v8hi)
-v2di __builtin_ia32_phaddwq (v8hi)
-v8hi __builtin_ia32_phsubbw (v16qi)
-v2di __builtin_ia32_phsubdq (v4si)
-v4si __builtin_ia32_phsubwd (v8hi)
-v4si __builtin_ia32_pmacsdd (v4si, v4si, v4si)
-v2di __builtin_ia32_pmacsdqh (v4si, v4si, v2di)
-v2di __builtin_ia32_pmacsdql (v4si, v4si, v2di)
-v4si __builtin_ia32_pmacssdd (v4si, v4si, v4si)
-v2di __builtin_ia32_pmacssdqh (v4si, v4si, v2di)
-v2di __builtin_ia32_pmacssdql (v4si, v4si, v2di)
-v4si __builtin_ia32_pmacsswd (v8hi, v8hi, v4si)
-v8hi __builtin_ia32_pmacssww (v8hi, v8hi, v8hi)
-v4si __builtin_ia32_pmacswd (v8hi, v8hi, v4si)
-v8hi __builtin_ia32_pmacsww (v8hi, v8hi, v8hi)
-v4si __builtin_ia32_pmadcsswd (v8hi, v8hi, v4si)
-v4si __builtin_ia32_pmadcswd (v8hi, v8hi, v4si)
-v16qi __builtin_ia32_pperm (v16qi, v16qi, v16qi)
-v16qi __builtin_ia32_protb (v16qi, v16qi)
-v4si __builtin_ia32_protd (v4si, v4si)
-v2di __builtin_ia32_protq (v2di, v2di)
-v8hi __builtin_ia32_protw (v8hi, v8hi)
-v16qi __builtin_ia32_pshab (v16qi, v16qi)
-v4si __builtin_ia32_pshad (v4si, v4si)
-v2di __builtin_ia32_pshaq (v2di, v2di)
-v8hi __builtin_ia32_pshaw (v8hi, v8hi)
-v16qi __builtin_ia32_pshlb (v16qi, v16qi)
-v4si __builtin_ia32_pshld (v4si, v4si)
-v2di __builtin_ia32_pshlq (v2di, v2di)
-v8hi __builtin_ia32_pshlw (v8hi, v8hi)
-@end smallexample
-
-The following builtin-in functions are available when @option{-msse5}
-is used. The second argument must be an integer constant and generate
-the machine instruction that is part of the name with the @samp{_imm}
-suffix removed.
-
-@smallexample
-v16qi __builtin_ia32_protb_imm (v16qi, int)
-v4si __builtin_ia32_protd_imm (v4si, int)
-v2di __builtin_ia32_protq_imm (v2di, int)
-v8hi __builtin_ia32_protw_imm (v8hi, int)
-@end smallexample
-
The following built-in functions are available when @option{-m3dnow} is used.
All of them generate the machine instruction that is part of the name.
Index: testsuite/gcc.target/i386/i386.exp
===================================================================
--- testsuite/gcc.target/i386/i386.exp (revision 151949)
+++ testsuite/gcc.target/i386/i386.exp (working copy)
@@ -120,21 +120,6 @@ proc check_effective_target_sse4a { } {
} "-O2 -msse4a" ]
}
-# Return 1 if sse5 instructions can be compiled.
-proc check_effective_target_sse5 { } {
- return [check_no_compiler_messages sse5 object {
- typedef long long __m128i __attribute__ ((__vector_size__ (16)));
- typedef short __v8hi __attribute__ ((__vector_size__ (16)));
-
- __m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
- {
- return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,
- (__v8hi)__B,
- (__v8hi)__C);
- }
- } "-O2 -msse5" ]
-}
-
# If a testcase doesn't have special options, use these.
global DEFAULT_CFLAGS
if ![info exists DEFAULT_CFLAGS] then {
Index: testsuite/gcc.target/i386/isa-1.c
===================================================================
--- testsuite/gcc.target/i386/isa-1.c (revision 151949)
+++ testsuite/gcc.target/i386/isa-1.c (working copy)
@@ -27,8 +27,5 @@ main ()
#if defined __SSE4A__
abort ();
#endif
-#if defined __SSE5__
- abort ();
-#endif
return 0;
}
Index: testsuite/gcc.target/i386/isa-5.c
===================================================================
--- testsuite/gcc.target/i386/isa-5.c (revision 151949)
+++ testsuite/gcc.target/i386/isa-5.c (working copy)
@@ -27,8 +27,5 @@ main ()
#if !defined __SSE4A__
abort ();
#endif
-#if defined __SSE5__
- abort ();
-#endif
return 0;
}
Index: testsuite/gcc.target/i386/funcspec-4.c
===================================================================
--- testsuite/gcc.target/i386/funcspec-4.c (revision 151949)
+++ testsuite/gcc.target/i386/funcspec-4.c (working copy)
@@ -1,9 +1,6 @@
/* Test some error conditions with function specific options. */
/* { dg-do compile } */
-/* no sse500 switch */
-extern void error1 (void) __attribute__((__target__("sse500"))); /* { dg-error "unknown" } */
-
/* Multiple arch switches */
extern void error2 (void) __attribute__((__target__("arch=core2,arch=k8"))); /* { dg-error "already specified" } */
Index: testsuite/gcc.target/i386/avx-2.c
===================================================================
--- testsuite/gcc.target/i386/avx-2.c (revision 151949)
+++ testsuite/gcc.target/i386/avx-2.c (working copy)
@@ -97,13 +97,12 @@ test_1 (_mm256_round_ps, __m256, __m256,
test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
-/* mmintrin-common.h */
+/* smmintrin.h */
test_1 (_mm_round_pd, __m128d, __m128d, 1)
test_1 (_mm_round_ps, __m128, __m128, 1)
test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_round_ss, __m128, __m128, __m128, 1)
-/* smmintrin.h */
test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1)
test_2 (_mm_blend_ps, __m128, __m128, __m128, 1)
test_2 (_mm_blend_pd, __m128d, __m128d, __m128d, 1)
Index: testsuite/gcc.target/i386/sse-23.c
===================================================================
--- testsuite/gcc.target/i386/sse-23.c (revision 151949)
+++ testsuite/gcc.target/i386/sse-23.c (working copy)
@@ -21,13 +21,12 @@
#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1)
-/* mmintrin-common.h */
+/* smmintrin.h */
#define __builtin_ia32_roundpd(V, M) __builtin_ia32_roundpd(V, 1)
#define __builtin_ia32_roundsd(D, V, M) __builtin_ia32_roundsd(D, V, 1)
#define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1)
#define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1)
-/* smmintrin.h */
#define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1)
#define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1)
#define __builtin_ia32_blendpd(X, Y, M) __builtin_ia32_blendpd(X, Y, 1)
Index: testsuite/gcc.target/i386/isa-6.c
===================================================================
--- testsuite/gcc.target/i386/isa-6.c (revision 151949)
+++ testsuite/gcc.target/i386/isa-6.c (working copy)
@@ -28,8 +28,5 @@ main ()
#if !defined __SSE4A__
abort ();
#endif
-#if defined __SSE5__
- abort ();
-#endif
return 0;
}
Index: testsuite/gcc.target/i386/avx-1.c
===================================================================
--- testsuite/gcc.target/i386/avx-1.c (revision 151949)
+++ testsuite/gcc.target/i386/avx-1.c (working copy)
@@ -54,13 +54,12 @@
#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1)
-/* mmintrin-common.h */
+/* smmintrin.h */
#define __builtin_ia32_roundpd(V, M) __builtin_ia32_roundpd(V, 1)
#define __builtin_ia32_roundsd(D, V, M) __builtin_ia32_roundsd(D, V, 1)
#define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1)
#define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1)
-/* smmintrin.h */
#define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1)
#define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1)
#define __builtin_ia32_blendpd(X, Y, M) __builtin_ia32_blendpd(X, Y, 1)
Index: testsuite/gcc.target/i386/funcspec-9.c
===================================================================
--- testsuite/gcc.target/i386/funcspec-9.c (revision 151949)
+++ testsuite/gcc.target/i386/funcspec-9.c (working copy)
@@ -1,36 +0,0 @@
-/* Test whether using target specific options, we can generate SSE5 code. */
-/* { dg-do compile } */
-/* { dg-options "-O2 -march=k8 -mfpmath=sse -msse2" } */
-
-extern void exit (int);
-
-#ifdef __SSE4A__
-#warning "__SSE4A__ should not be defined before #pragma GCC target."
-#endif
-
-#pragma GCC push_options
-#pragma GCC target ("sse4a")
-
-#ifndef __SSE4A__
-#warning "__SSE4A__ should have be defined after #pragma GCC target."
-#endif
-
-float
-flt_mul_add (float a, float b, float c)
-{
- return (a * b) + c;
-}
-
-#pragma GCC pop_options
-#ifdef __SSE4A__
-#warning "__SSE4A__ should not be defined after #pragma GCC pop target."
-#endif
-
-double
-dbl_mul_add (double a, double b, double c)
-{
- return (a * b) + c;
-}
-
-/* We used to generate fused-madd with SSE5 support, but don't do that anymore. */
-/* { dg-final { scan-assembler "addsd" } } */
Index: config.gcc
===================================================================
--- config.gcc (revision 151949)
+++ config.gcc (working copy)
@@ -286,9 +286,8 @@ i[34567]86-*-*)
cxx_target_objs="i386-c.o"
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
- nmmintrin.h bmmintrin.h mmintrin-common.h
- wmmintrin.h immintrin.h x86intrin.h avxintrin.h
- ia32intrin.h cross-stdarg.h"
+ nmmintrin.h bmmintrin.h wmmintrin.h immintrin.h
+ x86intrin.h avxintrin.h ia32intrin.h cross-stdarg.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -296,9 +295,8 @@ x86_64-*-*)
cxx_target_objs="i386-c.o"
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
- nmmintrin.h bmmintrin.h mmintrin-common.h
- wmmintrin.h immintrin.h x86intrin.h avxintrin.h
- ia32intrin.h cross-stdarg.h"
+ nmmintrin.h bmmintrin.h wmmintrin.h immintrin.h
+ x86intrin.h avxintrin.h ia32intrin.h cross-stdarg.h"
need_64bit_hwint=yes
;;
ia64-*-*)
Index: config/i386/smmintrin.h
===================================================================
--- config/i386/smmintrin.h (revision 151949)
+++ config/i386/smmintrin.h (working copy)
@@ -35,7 +35,125 @@
/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
files. */
#include <tmmintrin.h>
-#include <mmintrin-common.h>
+
+/* Rounding mode macros. */
+#define _MM_FROUND_TO_NEAREST_INT 0x00
+#define _MM_FROUND_TO_NEG_INF 0x01
+#define _MM_FROUND_TO_POS_INF 0x02
+#define _MM_FROUND_TO_ZERO 0x03
+#define _MM_FROUND_CUR_DIRECTION 0x04
+
+#define _MM_FROUND_RAISE_EXC 0x00
+#define _MM_FROUND_NO_EXC 0x08
+
+#define _MM_FROUND_NINT \
+ (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_FLOOR \
+ (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_CEIL \
+ (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_TRUNC \
+ (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_RINT \
+ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_NEARBYINT \
+ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
+
+/* Test Instruction */
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & __M) == 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & ~__M) == 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & __M) != 0 && (__V & ~__M) != 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Macros for packed integer 128-bit comparison intrinsics. */
+#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
+
+#define _mm_test_all_ones(V) \
+ _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
+
+#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
+
+/* Packed/scalar double precision floating point rounding. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_pd (__m128d __V, const int __M)
+{
+ return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_sd(__m128d __D, __m128d __V, const int __M)
+{
+ return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
+ (__v2df)__V,
+ __M);
+}
+#else
+#define _mm_round_pd(V, M) \
+ ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
+
+#define _mm_round_sd(D, V, M) \
+ ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D), \
+ (__v2df)(__m128d)(V), (int)(M)))
+#endif
+
+/* Packed/scalar single precision floating point rounding. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ps (__m128 __V, const int __M)
+{
+ return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ss (__m128 __D, __m128 __V, const int __M)
+{
+ return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
+ (__v4sf)__V,
+ __M);
+}
+#else
+#define _mm_round_ps(V, M) \
+ ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
+
+#define _mm_round_ss(D, V, M) \
+ ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D), \
+ (__v4sf)(__m128)(V), (int)(M)))
+#endif
+
+/* Macros for ceil/floor intrinsics. */
+#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
+#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
+
+#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
+#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
/* SSE4.1 */
Index: config/i386/cpuid.h
===================================================================
--- config/i386/cpuid.h (revision 151949)
+++ config/i386/cpuid.h (working copy)
@@ -48,7 +48,6 @@
/* %ecx */
#define bit_LAHF_LM (1 << 0)
#define bit_SSE4a (1 << 6)
-#define bit_SSE5 (1 << 11)
/* %edx */
#define bit_LM (1 << 29)
Index: config/i386/x86intrin.h
===================================================================
--- config/i386/x86intrin.h (revision 151949)
+++ config/i386/x86intrin.h (working copy)
@@ -54,10 +54,6 @@
#include <smmintrin.h>
#endif
-#ifdef __SSE5__
-#include <bmmintrin.h>
-#endif
-
#if defined (__AES__) || defined (__PCLMUL__)
#include <wmmintrin.h>
#endif
Index: config/i386/mmintrin-common.h
===================================================================
--- config/i386/mmintrin-common.h (revision 151949)
+++ config/i386/mmintrin-common.h (working copy)
@@ -1,154 +0,0 @@
-/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-/* Common definition of the ROUND and PTEST intrinsics, SSE4.1. */
-
-#ifndef _MMINTRIN_COMMON_H_INCLUDED
-#define _MMINTRIN_COMMON_H_INCLUDED
-
-#if !defined(__SSE4_1__)
-# error "SSE4.1 instruction set not enabled"
-#else
-
-/* Rounding mode macros. */
-#define _MM_FROUND_TO_NEAREST_INT 0x00
-#define _MM_FROUND_TO_NEG_INF 0x01
-#define _MM_FROUND_TO_POS_INF 0x02
-#define _MM_FROUND_TO_ZERO 0x03
-#define _MM_FROUND_CUR_DIRECTION 0x04
-
-#define _MM_FROUND_RAISE_EXC 0x00
-#define _MM_FROUND_NO_EXC 0x08
-
-#define _MM_FROUND_NINT \
- (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_FLOOR \
- (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_CEIL \
- (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_TRUNC \
- (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_RINT \
- (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_NEARBYINT \
- (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
-
-/* Test Instruction */
-/* Packed integer 128-bit bitwise comparison. Return 1 if
- (__V & __M) == 0. */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testz_si128 (__m128i __M, __m128i __V)
-{
- return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
-}
-
-/* Packed integer 128-bit bitwise comparison. Return 1 if
- (__V & ~__M) == 0. */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testc_si128 (__m128i __M, __m128i __V)
-{
- return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
-}
-
-/* Packed integer 128-bit bitwise comparison. Return 1 if
- (__V & __M) != 0 && (__V & ~__M) != 0. */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testnzc_si128 (__m128i __M, __m128i __V)
-{
- return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
-}
-
-/* Macros for packed integer 128-bit comparison intrinsics. */
-#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
-
-#define _mm_test_all_ones(V) \
- _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
-
-#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
-
-/* Packed/scalar double precision floating point rounding. */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_round_pd (__m128d __V, const int __M)
-{
- return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_round_sd(__m128d __D, __m128d __V, const int __M)
-{
- return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
- (__v2df)__V,
- __M);
-}
-#else
-#define _mm_round_pd(V, M) \
- ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
-
-#define _mm_round_sd(D, V, M) \
- ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D), \
- (__v2df)(__m128d)(V), (int)(M)))
-#endif
-
-/* Packed/scalar single precision floating point rounding. */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_round_ps (__m128 __V, const int __M)
-{
- return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_round_ss (__m128 __D, __m128 __V, const int __M)
-{
- return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
- (__v4sf)__V,
- __M);
-}
-#else
-#define _mm_round_ps(V, M) \
- ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
-
-#define _mm_round_ss(D, V, M) \
- ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D), \
- (__v4sf)(__m128)(V), (int)(M)))
-#endif
-
-/* Macros for ceil/floor intrinsics. */
-#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL)
-#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
-
-#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
-#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
-
-#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL)
-#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
-
-#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
-#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
-
-#endif /* __SSE4_1__ */
-
-#endif /* _MMINTRIN_COMMON_H_INCLUDED */