This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [RFC] Enable SSE2 math intrinsics for fpmath=sse and fast-math
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: Richard Guenther <rguenth at tat dot physik dot uni-tuebingen dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 8 Jun 2005 19:42:22 +0200
- Subject: Re: [RFC] Enable SSE2 math intrinsics for fpmath=sse and fast-math
- References: <42A6EF39.4080209@tat.physik.uni-tuebingen.de>
> On top of the sseregparm patch this adds support for emitting calls
> to Intel libimf.a SSE2 intrinsics. They are enabled if the target
> supports SSE2 and unsafe math optimizations are on.
>
> Of course this helps you only, if you have Intel libimf.a around.
> And it raises the question of where to put our own SSE2 intrinsics.
> There are four options:
> 1. in libgcc
I would preffer this choice, so we have no compatibility issues across
non-glibc targets and we can control them closely.... We already have
support for dropping target specific functions into libgcc while I
realize that those are probably somewhat more complex than what average
target does at the moment, but probably not the wrost example either ;)
Honza
> 2. in glibc libm
> 3. in both
> 4. in to-be-created gcc libm
>
> Where for option 3 we could play linker tricks to have simple
> SSE ABI wrappers to libm in libgcc and have them overridden by
> true implementations in libm, if available (does this work?).
>
> I'm open for suggestions, though in the long term I'd like 4 most.
>
> I'm also curious for benchmarks compared to Intel of say, PovRay.
>
> Thanks,
> Richard.
> 2005-06-08 Richard Guenther <rguenth@gcc.gnu.org>
>
> * c-common.c (builtin_function_2): Add *idecl argument
> to return 2nd created decl.
> (c_common_nodes_and_builtins): Store both, builtin and
> implicit decl.
> * config/i386/i386.c (ix86_init_builtins): With SSE2
> support and fpmath=sse change (some) builtin math
> functions to be sseregparm and redirect their assembler_name
> to an appropriate library routine.
>
>
> Index: c-common.c
> ===================================================================
> RCS file: /cvs/gcc/gcc/gcc/c-common.c,v
> retrieving revision 1.634
> diff -c -3 -p -r1.634 c-common.c
> *** c-common.c 6 Jun 2005 19:31:24 -0000 1.634
> --- c-common.c 8 Jun 2005 12:34:55 -0000
> *************** static tree builtin_function_2 (const ch
> *** 2499,2505 ****
> enum built_in_function function_code,
> enum built_in_class cl, int library_name_p,
> bool nonansi_p,
> ! tree attrs);
>
> /* Make a variant type in the proper way for C/C++, propagating qualifiers
> down to the element type of an array. */
> --- 2502,2508 ----
> enum built_in_function function_code,
> enum built_in_class cl, int library_name_p,
> bool nonansi_p,
> ! tree attrs, tree *idecl);
>
> /* Make a variant type in the proper way for C/C++, propagating qualifiers
> down to the element type of an array. */
> *************** c_common_nodes_and_builtins (void)
> *** 3236,3249 ****
> NONANSI_P, ATTRS, IMPLICIT, COND) \
> if (NAME && COND) \
> { \
> ! tree decl; \
> \
> gcc_assert ((!BOTH_P && !FALLBACK_P) \
> || !strncmp (NAME, "__builtin_", \
> strlen ("__builtin_"))); \
> \
> if (!BOTH_P) \
> ! decl = lang_hooks.builtin_function (NAME, builtin_types[TYPE], \
> ENUM, \
> CLASS, \
> (FALLBACK_P \
> --- 3239,3253 ----
> NONANSI_P, ATTRS, IMPLICIT, COND) \
> if (NAME && COND) \
> { \
> ! tree decl, idecl; \
> \
> gcc_assert ((!BOTH_P && !FALLBACK_P) \
> || !strncmp (NAME, "__builtin_", \
> strlen ("__builtin_"))); \
> \
> if (!BOTH_P) \
> ! idecl = decl \
> ! = lang_hooks.builtin_function (NAME, builtin_types[TYPE], \
> ENUM, \
> CLASS, \
> (FALLBACK_P \
> *************** c_common_nodes_and_builtins (void)
> *** 3259,3269 ****
> CLASS, \
> FALLBACK_P, \
> NONANSI_P, \
> ! built_in_attributes[(int) ATTRS]); \
> \
> built_in_decls[(int) ENUM] = decl; \
> if (IMPLICIT) \
> ! implicit_built_in_decls[(int) ENUM] = decl; \
> }
> #include "builtins.def"
> #undef DEF_BUILTIN
> --- 3263,3274 ----
> CLASS, \
> FALLBACK_P, \
> NONANSI_P, \
> ! built_in_attributes[(int) ATTRS], \
> ! &idecl); \
> \
> built_in_decls[(int) ENUM] = decl; \
> if (IMPLICIT) \
> ! implicit_built_in_decls[(int) ENUM] = idecl; \
> }
> #include "builtins.def"
> #undef DEF_BUILTIN
> *************** builtin_function_2 (const char *builtin_
> *** 3374,3380 ****
> tree builtin_type, tree type,
> enum built_in_function function_code,
> enum built_in_class cl, int library_name_p,
> ! bool nonansi_p, tree attrs)
> {
> tree bdecl = NULL_TREE;
> tree decl = NULL_TREE;
> --- 3379,3385 ----
> tree builtin_type, tree type,
> enum built_in_function function_code,
> enum built_in_class cl, int library_name_p,
> ! bool nonansi_p, tree attrs, tree *idecl)
> {
> tree bdecl = NULL_TREE;
> tree decl = NULL_TREE;
> *************** builtin_function_2 (const char *builtin_
> *** 3389,3394 ****
> --- 3394,3400 ----
> decl = lang_hooks.builtin_function (name, type, function_code, cl,
> NULL, attrs);
>
> + *idecl = decl;
> return (bdecl != 0 ? bdecl : decl);
> }
>
> Index: config/i386/i386.c
> ===================================================================
> RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
> retrieving revision 1.826
> diff -c -3 -p -r1.826 i386.c
> *** config/i386/i386.c 8 Jun 2005 05:05:18 -0000 1.826
> --- config/i386/i386.c 8 Jun 2005 13:08:39 -0000
> *************** ix86_init_builtins (void)
> *** 13620,13625 ****
> --- 13681,13735 ----
> {
> if (TARGET_MMX)
> ix86_init_mmx_sse_builtins ();
> +
> + if (flag_unsafe_math_optimizations
> + && ix86_fpmath == FPMATH_SSE)
> + {
> + /* Patch some builtins with sseregparm attribute and
> + redirect their assembler_name. Note that we need
> + to change assembler_name for both __builtin_NAME
> + and NAME, while the (shared) type needs to be fixed
> + only once. */
> + #define PATCH_SSE_BUILTIN(fncode, symbol) \
> + { \
> + tree sym = get_identifier (symbol); \
> + tree fn = built_in_decls[fncode]; \
> + TYPE_ATTRIBUTES (TREE_TYPE (fn)) \
> + = tree_cons (get_identifier ("sseregparm"), \
> + NULL_TREE, \
> + TYPE_ATTRIBUTES (TREE_TYPE (fn))); \
> + SET_DECL_ASSEMBLER_NAME (fn, sym); \
> + fn = implicit_built_in_decls[fncode]; \
> + if (fn) \
> + SET_DECL_ASSEMBLER_NAME (fn, sym); \
> + }
> +
> + PATCH_SSE_BUILTIN (BUILT_IN_ACOS, "__libm_sse2_acos")
> + PATCH_SSE_BUILTIN (BUILT_IN_ACOSF, "__libm_sse2_acosf")
> + PATCH_SSE_BUILTIN (BUILT_IN_ASIN, "__libm_sse2_asin")
> + PATCH_SSE_BUILTIN (BUILT_IN_ASINF, "__libm_sse2_asinf")
> + PATCH_SSE_BUILTIN (BUILT_IN_ATAN2, "__libm_sse2_atan2")
> + PATCH_SSE_BUILTIN (BUILT_IN_ATAN, "__libm_sse2_atan")
> + PATCH_SSE_BUILTIN (BUILT_IN_ATANF, "__libm_sse2_atanf")
> + PATCH_SSE_BUILTIN (BUILT_IN_COS, "__libm_sse2_cos")
> + PATCH_SSE_BUILTIN (BUILT_IN_COSF, "__libm_sse2_cosf")
> + PATCH_SSE_BUILTIN (BUILT_IN_EXP, "__libm_sse2_exp")
> + PATCH_SSE_BUILTIN (BUILT_IN_EXPF, "__libm_sse2_expf")
> + PATCH_SSE_BUILTIN (BUILT_IN_LOG10, "__libm_sse2_log10")
> + PATCH_SSE_BUILTIN (BUILT_IN_LOG10F, "__libm_sse2_log10f")
> + PATCH_SSE_BUILTIN (BUILT_IN_LOG, "__libm_sse2_log")
> + PATCH_SSE_BUILTIN (BUILT_IN_LOGF, "__libm_sse2_logf")
> + PATCH_SSE_BUILTIN (BUILT_IN_POW, "__libm_sse2_pow")
> + PATCH_SSE_BUILTIN (BUILT_IN_POWF, "__libm_sse2_powf")
> + PATCH_SSE_BUILTIN (BUILT_IN_SIN, "__libm_sse2_sin")
> + PATCH_SSE_BUILTIN (BUILT_IN_SINCOS, "__libm_sse2_sincos")
> + PATCH_SSE_BUILTIN (BUILT_IN_SINCOSF, "__libm_sse2_sincosf")
> + PATCH_SSE_BUILTIN (BUILT_IN_SINF, "__libm_sse2_sinf")
> + PATCH_SSE_BUILTIN (BUILT_IN_TAN, "__libm_sse2_tan")
> + PATCH_SSE_BUILTIN (BUILT_IN_TANF, "__libm_sse2_tanf")
> +
> + #undef PATCH_SSE_BUILTIN
> + }
> }
>
> /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX