This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

define MMX/SSE intrinsics without requiring cmdline isa flags


Hi, I got many complains from the user that gcc's asm intrinsics are
hard to use -- because they need to specify command line --mxxx option
to use them even though the enclosing function has the right target
attribute:

#include <smmintrin.h>

int foo(...) __attribute__((__target__("sse4")));

int foo(...)
{
   ...
    _mm_testc_si128 (...
  ...
}

The reason is that in gcc those intrinsics are defined as wrappers to
the gcc builtins and their definitions are guarded by ISA macros that
are defined via command line option or target pragmas.

There are workarounds of the problem:

1. Move the function into a separation file and compile it with -mxxx option.

2. Use gcc builtins directly. The drawback is that the code won't be
portable; and some data types such as __m128i is also guarded by the
macros

3. Use Pragma target

    #pragma GCC push_options
    #pragma GCC target ("sse4")
   #include <smmintrin.h>
   int foo(...)
    {
      ...
    }
   # pragma GCC  pop_options

  Or use the pragma simply to wrap the header inclusion:

   my_smmintrin.h:

   #pragma GCC push_options
   #pragma GCC target ("sse4")
   #include <smmintrin.h>
  #pragma GCC pop_options

  #include "my_smmintrin.h"
  int foo(...) __attribute__(...);
  int foo(...)
   {
    ...
   }


None of the above are ideal:
1. There is an effort going to support optimization of calls to
multi-versioned functions (via runtime dispatch) via predicate
hoisting and caller cloning -- the end result is that it enables
inlining of target specific clones into callers which are also cloned.
This makes it important to avoid putting the target function into a
separate file -- otherwise CMO is needed.
2. Using pragma is ugly and push and pop options seem broken at this
moment (reset_options work fine).


So the question becomes why not allow the definitions of those
wrappers in the first place --- if the user includes the header, he
then intends to use them. See attached patch that does this.  GCC
bootstrapping shows that there is NO visible compile time change at
all with this patch.

Comments?

Thanks,

David
Index: gcc/testsuite/gcc.target/i386/crc32-4.c
===================================================================
--- gcc/testsuite/gcc.target/i386/crc32-4.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/crc32-4.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-sse4.2 -mno-crc32" } */
+/* { dg-options "-O2 -mno-sse4.2 -mno-crc32 -mno-define-all-isa-intrin" } */
 /* { dg-final { scan-assembler "__builtin_ia32_crc32di" } } */
 
 unsigned long long
Index: gcc/testsuite/gcc.target/i386/isa-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-1.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-1.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=x86-64 -msse4" } */
+/* { dg-options "-march=x86-64 -msse4 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-12.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-12.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-12.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=x86-64 -mfma4 -mno-sse3" } */
+/* { dg-options "-march=x86-64 -mfma4 -mno-sse3 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-14.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-14.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-14.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=x86-64 -msse4a -mfma4 -mno-sse" } */
+/* { dg-options "-march=x86-64 -msse4a -mfma4 -mno-sse -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-5.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-5.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-5.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=core2 -msse4a -mno-sse4" } */
+/* { dg-options "-march=core2 -msse4a -mno-sse4 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-7.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-7.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-7.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=amdfam10 -mfma4 -mno-sse4" } */
+/* { dg-options "-march=amdfam10 -mfma4 -mno-sse4 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-9.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-9.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-9.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=amdfam10 -mno-fma4" } */
+/* { dg-options "-march=amdfam10 -mno-fma4 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/defines-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/defines-2.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/defines-2.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=athlon64 -mno-mmx" } */
+/* { dg-options "-march=athlon64 -mno-mmx -mno-define-all-isa-intrin" } */
 
 #if defined(__MMX__) || defined(__3dNOW__) || defined(__3dNOW_A__)
 #error
Index: gcc/testsuite/gcc.target/i386/funcspec-8.c
===================================================================
--- gcc/testsuite/gcc.target/i386/funcspec-8.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/funcspec-8.c	(working copy)
@@ -2,7 +2,7 @@
    functions in functions with the appropriate function specific options.  */
 /* { dg-do compile } */
 /* { dg-skip-if "" { i?86-*-* x86_64-*-* } { "-march=*" } { "-march=k8" } } */
-/* { dg-options "-O2 -march=k8 -mfpmath=sse" } */
+/* { dg-options "-O2 -march=k8 -mfpmath=sse -mno-define-all-isa-intrin" } */
 
 typedef float     __m128  __attribute__ ((__vector_size__ (16), __may_alias__));
 typedef double    __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
Index: gcc/testsuite/gcc.target/i386/3dnow-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/3dnow-1.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/3dnow-1.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do assemble } */
 /* { dg-require-effective-target 3dnow } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -m3dnow" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -m3dnow -mno-define-all-isa-intrin" } */
 
 /* Test that the intrinsics compile with optimization.  All of them are
    defined as inline functions in mmintrin.h that reference the proper
Index: gcc/testsuite/gcc.target/i386/avx-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-2.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/avx-2.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul -mno-define-all-isa-intrin" } */
 
 #include <mm_malloc.h>
 
Index: gcc/testsuite/gcc.target/i386/isa-11.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-11.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-11.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=x86-64 -mfma4 -mno-ssse3" } */
+/* { dg-options "-march=x86-64 -mfma4 -mno-ssse3 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-13.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-13.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-13.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=x86-64 -mfma4 -mno-sse2" } */
+/* { dg-options "-march=x86-64 -mfma4 -mno-sse2 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-4.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-4.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-4.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=core2 -mfma4 -mno-sse4" } */
+/* { dg-options "-march=core2 -mfma4 -mno-sse4 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-6.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-6.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-6.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-skip-if "" { i?86-*-* x86_64-*-* } { "-march=*" } { "-march=amdfam10" } } */
-/* { dg-options "-march=amdfam10 -mno-sse4" } */
+/* { dg-options "-march=amdfam10 -mno-sse4 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/isa-8.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-8.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-8.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=amdfam10 -mfma4 -mno-sse4a" } */
+/* { dg-options "-march=amdfam10 -mfma4 -mno-sse4a -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/defines-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/defines-1.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/defines-1.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=nocona -mno-sse" } */
+/* { dg-options "-march=nocona -mno-sse -mno-define-all-isa-intrin" } */
 
 #if defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__)
 #error
Index: gcc/testsuite/gcc.target/i386/avx-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-1.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/avx-1.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx  -maes -mpclmul" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx  -maes -mpclmul -mno-define-all-isa-intrin" } */
 
 #include <mm_malloc.h>
 
Index: gcc/testsuite/gcc.target/i386/funcspec-9.c
===================================================================
--- gcc/testsuite/gcc.target/i386/funcspec-9.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/funcspec-9.c	(working copy)
@@ -1,6 +1,6 @@
 /* Test whether using target specific options, we can generate FMA4 code.  */
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=k8 -mfpmath=sse -msse2" } */
+/* { dg-options "-O2 -march=k8 -mfpmath=sse -msse2 -mno-define-all-isa-intrin" } */
 
 extern void exit (int);
 
Index: gcc/testsuite/gcc.target/i386/isa-10.c
===================================================================
--- gcc/testsuite/gcc.target/i386/isa-10.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/isa-10.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=x86-64 -mfma4 -mno-sse4" } */
+/* { dg-options "-march=x86-64 -mfma4 -mno-sse4 -mno-define-all-isa-intrin" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gcc.target/i386/3dnow-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/3dnow-2.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/3dnow-2.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do assemble } */
 /* { dg-require-effective-target 3dnow } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -m3dnow" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -m3dnow -mno-define-all-isa-intrin" } */
 
 /* Test that the intrinsics compile without optimization.  All of them are
    defined as inline functions in mmintrin.h that reference the proper
Index: gcc/testsuite/gcc.target/i386/sse-22.c
===================================================================
--- gcc/testsuite/gcc.target/i386/sse-22.c	(revision 163086)
+++ gcc/testsuite/gcc.target/i386/sse-22.c	(working copy)
@@ -1,6 +1,6 @@
 /* Same as sse-14, except converted to use #pragma GCC option.  */
 /* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -mno-define-all-isa-intrin" } */
 
 #include <mm_malloc.h>
 
Index: gcc/testsuite/gcc.target/x86_64/abi/abi-x86_64.exp
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/abi-x86_64.exp	(revision 163086)
+++ gcc/testsuite/gcc.target/x86_64/abi/abi-x86_64.exp	(working copy)
@@ -29,7 +29,7 @@ if { (![istarget x86_64-*-*] && ![istarg
 
 torture-init
 set-torture-options $C_TORTURE_OPTIONS
-set additional_flags "-W -Wall -Wno-abi"
+set additional_flags "-W -Wall -Wno-abi -mno-define-all-isa-intrin"
 
 foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
     if {[runtest_file_p $runtests $src]} {
Index: gcc/config/i386/i386.opt
===================================================================
--- gcc/config/i386/i386.opt	(revision 163086)
+++ gcc/config/i386/i386.opt	(working copy)
@@ -112,6 +112,11 @@ mcmodel=
 Target RejectNegative Joined Var(ix86_cmodel_string)
 Use given x86-64 code model
 
+mdefine-all-isa-intrin
+Target Report Var(ix86_define_all_isa_intrin) Init(1)
+Enable inline definitions of all ISA specific intrinsics in the header files
+
+
 mfancy-math-387
 Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
 Generate sin, cos, sqrt for FPU
Index: gcc/config/i386/i386-c.c
===================================================================
--- gcc/config/i386/i386-c.c	(revision 163086)
+++ gcc/config/i386/i386-c.c	(working copy)
@@ -208,53 +208,56 @@ ix86_target_macros_internal (int isa_fla
       break;
     }
 
-  if (isa_flag & OPTION_MASK_ISA_MMX)
+  #define CHECK_ISA_FLAGS(mask) \
+        (isa_flag & (mask)) || ix86_define_all_isa_intrin
+
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_MMX))
     def_or_undef (parse_in, "__MMX__");
-  if (isa_flag & OPTION_MASK_ISA_3DNOW)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_3DNOW))
     def_or_undef (parse_in, "__3dNOW__");
-  if (isa_flag & OPTION_MASK_ISA_3DNOW_A)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_3DNOW_A))
     def_or_undef (parse_in, "__3dNOW_A__");
-  if (isa_flag & OPTION_MASK_ISA_SSE)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE))
     def_or_undef (parse_in, "__SSE__");
-  if (isa_flag & OPTION_MASK_ISA_SSE2)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE2))
     def_or_undef (parse_in, "__SSE2__");
-  if (isa_flag & OPTION_MASK_ISA_SSE3)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE3))
     def_or_undef (parse_in, "__SSE3__");
-  if (isa_flag & OPTION_MASK_ISA_SSSE3)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSSE3))
     def_or_undef (parse_in, "__SSSE3__");
-  if (isa_flag & OPTION_MASK_ISA_SSE4_1)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE4_1))
     def_or_undef (parse_in, "__SSE4_1__");
-  if (isa_flag & OPTION_MASK_ISA_SSE4_2)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE4_2))
     def_or_undef (parse_in, "__SSE4_2__");
-  if (isa_flag & OPTION_MASK_ISA_AES)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_AES))
     def_or_undef (parse_in, "__AES__");
-  if (isa_flag & OPTION_MASK_ISA_PCLMUL)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_PCLMUL))
     def_or_undef (parse_in, "__PCLMUL__");
-  if (isa_flag & OPTION_MASK_ISA_AVX)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_AVX))
     def_or_undef (parse_in, "__AVX__");
-  if (isa_flag & OPTION_MASK_ISA_FMA)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_FMA))
     def_or_undef (parse_in, "__FMA__");
-  if (isa_flag & OPTION_MASK_ISA_SSE4A)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE4A))
     def_or_undef (parse_in, "__SSE4A__");
-  if (isa_flag & OPTION_MASK_ISA_FMA4)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_FMA4))
     def_or_undef (parse_in, "__FMA4__");
-  if (isa_flag & OPTION_MASK_ISA_XOP)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_XOP))
     def_or_undef (parse_in, "__XOP__");
-  if (isa_flag & OPTION_MASK_ISA_LWP)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_LWP))
     def_or_undef (parse_in, "__LWP__");
-  if (isa_flag & OPTION_MASK_ISA_ABM)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_ABM))
     def_or_undef (parse_in, "__ABM__");
-  if (isa_flag & OPTION_MASK_ISA_POPCNT)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_POPCNT))
     def_or_undef (parse_in, "__POPCNT__");
-  if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_FSGSBASE))
     def_or_undef (parse_in, "__FSGSBASE__");
-  if (isa_flag & OPTION_MASK_ISA_RDRND)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_RDRND))
     def_or_undef (parse_in, "__RDRND__");
-  if (isa_flag & OPTION_MASK_ISA_F16C)
+  if (CHECK_ISA_FLAGS (OPTION_MASK_ISA_F16C))
     def_or_undef (parse_in, "__F16C__");
-  if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
+  if ((fpmath & FPMATH_SSE) && (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE)))
     def_or_undef (parse_in, "__SSE_MATH__");
-  if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
+  if ((fpmath & FPMATH_SSE) && (CHECK_ISA_FLAGS (OPTION_MASK_ISA_SSE2)))
     def_or_undef (parse_in, "__SSE2_MATH__");
 }
 
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 163086)
+++ gcc/config/i386/i386.c	(working copy)
@@ -22383,6 +22383,7 @@ def_builtin (int mask, const char *name,
 
       mask &= ~OPTION_MASK_ISA_64BIT;
       if (mask == 0
+          || ix86_define_all_isa_intrin
 	  || (mask & ix86_isa_flags) != 0
 	  || (lang_hooks.builtin_function
 	      == lang_hooks.builtin_function_ext_scope))
@@ -25181,14 +25182,17 @@ ix86_expand_builtin (tree exp, rtx targe
   rtx op0, op1, op2, pat;
   enum machine_mode mode0, mode1, mode2;
   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  int builtin_isa_flags = ix86_builtins_isa[fcode].isa;
+  if (builtin_isa_flags != OPTION_MASK_ISA_64BIT)
+    builtin_isa_flags &= ~OPTION_MASK_ISA_64BIT;
 
   /* Determine whether the builtin function is available under the current ISA.
      Originally the builtin was not created if it wasn't applicable to the
      current ISA based on the command line switches.  With function specific
      options, we need to check in the context of the function making the call
      whether it is supported.  */
-  if (ix86_builtins_isa[fcode].isa
-      && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
+  if (builtin_isa_flags
+      && !(builtin_isa_flags & ix86_isa_flags))
     {
       char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
 				       NULL, NULL, false);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]