This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
define MMX/SSE intrinsics without requiring cmdline isa flags
- From: Xinliang David Li <davidxl at google dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Wed, 18 Aug 2010 17:05:15 -0700
- Subject: define MMX/SSE intrinsics without requiring cmdline isa flags
Hi, I got many complains from the user that gcc's asm intrinsics are
hard to use -- because they need to specify command line --mxxx option
to use them even though the enclosing function has the right target
attribute:
#include <smmintrin.h>
int foo(...) __attribute__((__target__("sse4")));
int foo(...)
{
...
_mm_testc_si128 (...
...
}
The reason is that in gcc those intrinsics are defined as wrappers to
the gcc builtins and their definitions are guarded by ISA macros that
are defined via command line option or target pragmas.
There are workarounds of the problem:
1. Move the function into a separation file and compile it with -mxxx option.
2. Use gcc builtins directly. The drawback is that the code won't be
portable; and some data types such as __m128i is also guarded by the
macros
3. Use Pragma target
#pragma GCC push_options
#pragma GCC target ("sse4")
#include <smmintrin.h>
int foo(...)
{
...
}
# pragma GCC pop_options
Or use the pragma simply to wrap the header inclusion:
my_smmintrin.h:
#pragma GCC push_options
#pragma GCC target ("sse4")
#include <smmintrin.h>
#pragma GCC pop_options
#include "my_smmintrin.h"
int foo(...) __attribute__(...);
int foo(...)
{
...
}
None of the above are ideal:
1. There is an effort going to support optimization of calls to
multi-versioned functions (via runtime dispatch) via predicate
hoisting and caller cloning -- the end result is that it enables
inlining of target specific clones into callers which are also cloned.
This makes it important to avoid putting the target function into a
separate file -- otherwise CMO is needed.
2. Using pragma is ugly and push and pop options seem broken at this
moment (reset_options work fine).
So the question becomes why not allow the definitions of those
wrappers in the first place --- if the user includes the header, he
then intends to use them. See attached patch that does this. GCC
bootstrapping shows that there is NO visible compile time change at
all with this patch.
Comments?
Thanks,
David
Index: gcc/testsuite/gcc.target/i386/crc32-4.c
===================================================================
--- gcc/testsuite/gcc.target/i386/crc32-4.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/crc32-4.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-sse4.2 -mno-crc32" } */
+/* { dg-options "-O2 -mno-sse4.2 -mno-crc32 -mno-define-all-isa-intrin" } */
/* { dg-final { scan-assembler "__builtin_ia32_crc32di" } } */
unsigned long long
Index: gcc/testsuite/gcc.target/i386/isa-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-1.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-1.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=x86-64 -msse4" } */
+/* { dg-options "-march=x86-64 -msse4 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-12.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-12.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-12.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=x86-64 -mfma4 -mno-sse3" } */
+/* { dg-options "-march=x86-64 -mfma4 -mno-sse3 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-14.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-14.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-14.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=x86-64 -msse4a -mfma4 -mno-sse" } */
+/* { dg-options "-march=x86-64 -msse4a -mfma4 -mno-sse -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-5.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-5.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-5.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=core2 -msse4a -mno-sse4" } */
+/* { dg-options "-march=core2 -msse4a -mno-sse4 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-7.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-7.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-7.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=amdfam10 -mfma4 -mno-sse4" } */
+/* { dg-options "-march=amdfam10 -mfma4 -mno-sse4 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-9.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-9.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-9.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=amdfam10 -mno-fma4" } */
+/* { dg-options "-march=amdfam10 -mno-fma4 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/defines-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/defines-2.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/defines-2.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=athlon64 -mno-mmx" } */
+/* { dg-options "-march=athlon64 -mno-mmx -mno-define-all-isa-intrin" } */
#if defined(__MMX__) || defined(__3dNOW__) || defined(__3dNOW_A__)
#error
Index: gcc/testsuite/gcc.target/i386/funcspec-8.c
===================================================================
--- gcc/testsuite/gcc.target/i386/funcspec-8.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/funcspec-8.c (working copy)
@@ -2,7 +2,7 @@
functions in functions with the appropriate function specific options. */
/* { dg-do compile } */
/* { dg-skip-if "" { i?86-*-* x86_64-*-* } { "-march=*" } { "-march=k8" } } */
-/* { dg-options "-O2 -march=k8 -mfpmath=sse" } */
+/* { dg-options "-O2 -march=k8 -mfpmath=sse -mno-define-all-isa-intrin" } */
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
Index: gcc/testsuite/gcc.target/i386/3dnow-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/3dnow-1.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/3dnow-1.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do assemble } */
/* { dg-require-effective-target 3dnow } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -m3dnow" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -m3dnow -mno-define-all-isa-intrin" } */
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
Index: gcc/testsuite/gcc.target/i386/avx-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-2.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/avx-2.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul -mno-define-all-isa-intrin" } */
#include <mm_malloc.h>
Index: gcc/testsuite/gcc.target/i386/isa-11.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-11.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-11.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=x86-64 -mfma4 -mno-ssse3" } */
+/* { dg-options "-march=x86-64 -mfma4 -mno-ssse3 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-13.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-13.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-13.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=x86-64 -mfma4 -mno-sse2" } */
+/* { dg-options "-march=x86-64 -mfma4 -mno-sse2 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-4.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-4.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-4.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=core2 -mfma4 -mno-sse4" } */
+/* { dg-options "-march=core2 -mfma4 -mno-sse4 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-6.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-6.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-6.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do run } */
/* { dg-skip-if "" { i?86-*-* x86_64-*-* } { "-march=*" } { "-march=amdfam10" } } */
-/* { dg-options "-march=amdfam10 -mno-sse4" } */
+/* { dg-options "-march=amdfam10 -mno-sse4 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/isa-8.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-8.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-8.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=amdfam10 -mfma4 -mno-sse4a" } */
+/* { dg-options "-march=amdfam10 -mfma4 -mno-sse4a -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/defines-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/defines-1.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/defines-1.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=nocona -mno-sse" } */
+/* { dg-options "-march=nocona -mno-sse -mno-define-all-isa-intrin" } */
#if defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__)
#error
Index: gcc/testsuite/gcc.target/i386/avx-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-1.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/avx-1.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -maes -mpclmul" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -maes -mpclmul -mno-define-all-isa-intrin" } */
#include <mm_malloc.h>
Index: gcc/testsuite/gcc.target/i386/funcspec-9.c
===================================================================
--- gcc/testsuite/gcc.target/i386/funcspec-9.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/funcspec-9.c (working copy)
@@ -1,6 +1,6 @@
/* Test whether using target specific options, we can generate FMA4 code. */
/* { dg-do compile } */
-/* { dg-options "-O2 -march=k8 -mfpmath=sse -msse2" } */
+/* { dg-options "-O2 -march=k8 -mfpmath=sse -msse2 -mno-define-all-isa-intrin" } */
extern void exit (int);
Index: gcc/testsuite/gcc.target/i386/isa-10.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-10.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-10.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-march=x86-64 -mfma4 -mno-sse4" } */
+/* { dg-options "-march=x86-64 -mfma4 -mno-sse4 -mno-define-all-isa-intrin" } */
extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/3dnow-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/3dnow-2.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/3dnow-2.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do assemble } */
/* { dg-require-effective-target 3dnow } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -m3dnow" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -m3dnow -mno-define-all-isa-intrin" } */
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
Index: gcc/testsuite/gcc.target/i386/sse-22.c
===================================================================
--- gcc/testsuite/gcc.target/i386/sse-22.c (revision 163086)
+++ gcc/testsuite/gcc.target/i386/sse-22.c (working copy)
@@ -1,6 +1,6 @@
/* Same as sse-14, except converted to use #pragma GCC option. */
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -mno-define-all-isa-intrin" } */
#include <mm_malloc.h>
Index: gcc/testsuite/gcc.target/x86_64/abi/abi-x86_64.exp
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/abi-x86_64.exp (revision 163086)
+++ gcc/testsuite/gcc.target/x86_64/abi/abi-x86_64.exp (working copy)
@@ -29,7 +29,7 @@ if { (![istarget x86_64-*-*] && ![istarg
torture-init
set-torture-options $C_TORTURE_OPTIONS
-set additional_flags "-W -Wall -Wno-abi"
+set additional_flags "-W -Wall -Wno-abi -mno-define-all-isa-intrin"
foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
if {[runtest_file_p $runtests $src]} {
Index: gcc/config/i386/i386.opt
===================================================================
--- gcc/config/i386/i386.opt (revision 163086)
+++ gcc/config/i386/i386.opt (working copy)
@@ -112,6 +112,11 @@ mcmodel=
Target RejectNegative Joined Var(ix86_cmodel_string)
Use given x86-64 code model
+mdefine-all-isa-intrin
+Target Report Var(ix86_define_all_isa_intrin) Init(1)
+Enable inline definitions of all ISA specific intrinsics in the header files
+
+
mfancy-math-387
Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
Generate sin, cos, sqrt for FPU
Index: gcc/config/i386/i386-c.c
===================================================================
--- gcc/config/i386/i386-c.c (revision 163086)
+++ gcc/config/i386/i386-c.c (working copy)
@@ -208,53 +208,56 @@ ix86_target_macros_internal (int isa_fla
break;
}
- if (isa_flag & OPTION_MASK_ISA_MMX)
+ #define CHECK_ISA_FLAGS(mask) \
+ (isa_flag & (mask)) || ix86_define_all_isa_intrin
+
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_MMX))
def_or_undef (parse_in, "__MMX__");
- if (isa_flag & OPTION_MASK_ISA_3DNOW)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_3DNOW))
def_or_undef (parse_in, "__3dNOW__");
- if (isa_flag & OPTION_MASK_ISA_3DNOW_A)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_3DNOW_A))
def_or_undef (parse_in, "__3dNOW_A__");
- if (isa_flag & OPTION_MASK_ISA_SSE)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE))
def_or_undef (parse_in, "__SSE__");
- if (isa_flag & OPTION_MASK_ISA_SSE2)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE2))
def_or_undef (parse_in, "__SSE2__");
- if (isa_flag & OPTION_MASK_ISA_SSE3)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE3))
def_or_undef (parse_in, "__SSE3__");
- if (isa_flag & OPTION_MASK_ISA_SSSE3)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSSE3))
def_or_undef (parse_in, "__SSSE3__");
- if (isa_flag & OPTION_MASK_ISA_SSE4_1)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE4_1))
def_or_undef (parse_in, "__SSE4_1__");
- if (isa_flag & OPTION_MASK_ISA_SSE4_2)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE4_2))
def_or_undef (parse_in, "__SSE4_2__");
- if (isa_flag & OPTION_MASK_ISA_AES)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_AES))
def_or_undef (parse_in, "__AES__");
- if (isa_flag & OPTION_MASK_ISA_PCLMUL)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_PCLMUL))
def_or_undef (parse_in, "__PCLMUL__");
- if (isa_flag & OPTION_MASK_ISA_AVX)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_AVX))
def_or_undef (parse_in, "__AVX__");
- if (isa_flag & OPTION_MASK_ISA_FMA)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_FMA))
def_or_undef (parse_in, "__FMA__");
- if (isa_flag & OPTION_MASK_ISA_SSE4A)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE4A))
def_or_undef (parse_in, "__SSE4A__");
- if (isa_flag & OPTION_MASK_ISA_FMA4)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_FMA4))
def_or_undef (parse_in, "__FMA4__");
- if (isa_flag & OPTION_MASK_ISA_XOP)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_XOP))
def_or_undef (parse_in, "__XOP__");
- if (isa_flag & OPTION_MASK_ISA_LWP)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_LWP))
def_or_undef (parse_in, "__LWP__");
- if (isa_flag & OPTION_MASK_ISA_ABM)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_ABM))
def_or_undef (parse_in, "__ABM__");
- if (isa_flag & OPTION_MASK_ISA_POPCNT)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_POPCNT))
def_or_undef (parse_in, "__POPCNT__");
- if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_FSGSBASE))
def_or_undef (parse_in, "__FSGSBASE__");
- if (isa_flag & OPTION_MASK_ISA_RDRND)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_RDRND))
def_or_undef (parse_in, "__RDRND__");
- if (isa_flag & OPTION_MASK_ISA_F16C)
+ if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_F16C))
def_or_undef (parse_in, "__F16C__");
- if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
+ if ((fpmath & FPMATH_SSE) && (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE)))
def_or_undef (parse_in, "__SSE_MATH__");
- if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
+ if ((fpmath & FPMATH_SSE) && (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE2)))
def_or_undef (parse_in, "__SSE2_MATH__");
}
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 163086)
+++ gcc/config/i386/i386.c (working copy)
@@ -22383,6 +22383,7 @@ def_builtin (int mask, const char *name,
mask &= ~OPTION_MASK_ISA_64BIT;
if (mask == 0
+ || ix86_define_all_isa_intrin
|| (mask & ix86_isa_flags) != 0
|| (lang_hooks.builtin_function
== lang_hooks.builtin_function_ext_scope))
@@ -25181,14 +25182,17 @@ ix86_expand_builtin (tree exp, rtx targe
rtx op0, op1, op2, pat;
enum machine_mode mode0, mode1, mode2;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+ int builtin_isa_flags = ix86_builtins_isa[fcode].isa;
+ if (builtin_isa_flags != OPTION_MASK_ISA_64BIT)
+ builtin_isa_flags &= ~OPTION_MASK_ISA_64BIT;
/* Determine whether the builtin function is available under the current ISA.
Originally the builtin was not created if it wasn't applicable to the
current ISA based on the command line switches. With function specific
options, we need to check in the context of the function making the call
whether it is supported. */
- if (ix86_builtins_isa[fcode].isa
- && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
+ if (builtin_isa_flags
+ && !(builtin_isa_flags & ix86_isa_flags))
{
char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
NULL, NULL, false);