[PATCH] (3/3) Enable use of libgcc-math vectorized functions in the x86_64 backend [2nd try]
Richard Guenther
rguenther@suse.de
Tue Nov 21 16:03:00 GMT 2006
This implements the builtin_vectorized_function target hook for x86_64
and uses it to allow vectorization of all the routines found in
libgcc-math. It also enables linking against libgcc-math if
-ftree-vectorize is specified, with --as-needed if that is supported.
Bootstrapped and tested on x86_64-unknown-linux-gnu (all 3 patches).
Ok for mainline?
I'd also like to ping patch (2/3) which didn't change for this 2nd try,
adding support to vectorize function calls to the vectorizer:
[PATCH] (2/3) Add vectorization of builtin functions
http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01144.html
this patch is independent of libgcc-math going in or not, as with it
we can easily vectorize sqrt for example.
Thanks,
Richard.
2006-11-16 Richard Guenther <rguenther@suse.de>
Zdenek Dvorak <dvorakz@suse.cz>
* config/i386/i386.c (TARGET_BUILTIN_VECTORIZED_FUNCTION): Define.
(ix86_builtin_vectorized_function): Target hook for
vectorized_function.
* config/i386/linux64.h (LIB_SPEC): Copy from config/linux.h.
Link with libgcc-math as needed, if building with -ftree-vectorize.
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c (revision 119056)
--- config/i386/i386.c (working copy)
*************** static bool ix86_pass_by_reference (CUMU
*** 1275,1280 ****
--- 1275,1281 ----
tree, bool);
static void ix86_init_builtins (void);
static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+ static tree ix86_builtin_vectorized_function (enum built_in_function, tree);
static const char *ix86_mangle_fundamental_type (tree);
static tree ix86_stack_protect_fail (void);
static rtx ix86_internal_arg_pointer (void);
*************** static section *x86_64_elf_select_sectio
*** 1339,1344 ****
--- 1340,1347 ----
#define TARGET_INIT_BUILTINS ix86_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+ #undef TARGET_BUILTIN_VECTORIZED_FUNCTION
+ #define TARGET_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
*************** ix86_expand_builtin (tree exp, rtx targe
*** 16866,16871 ****
--- 16869,16944 ----
gcc_unreachable ();
}
+ /* Returns builtin for a vectorized version of the builtin function
+ with builtin function code FN in the vector type TYPE, or NULL_TREE
+ if it is not available. */
+
+ static tree
+ ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
+ {
+ char name[20] = "__vr.._";
+ tree fntype, new_fndecl, args;
+ unsigned arity;
+ const char *bname;
+
+ if (!TARGET_64BIT
+ || !flag_unsafe_math_optimizations)
+ return NULL_TREE;
+
+ /* We can handle canonical vector mode argument builtins only. */
+ if (!VECTOR_FLOAT_TYPE_P (type)
+ || !((TYPE_MODE (TREE_TYPE (type)) == DFmode
+ && TYPE_VECTOR_SUBPARTS (type) == 2)
+ || (TYPE_MODE (TREE_TYPE (type)) == SFmode
+ && TYPE_VECTOR_SUBPARTS (type) == 4)))
+ return NULL_TREE;
+
+ switch (fn)
+ {
+ case BUILT_IN_SIN:
+ case BUILT_IN_COS:
+ case BUILT_IN_EXP:
+ case BUILT_IN_LOG:
+ case BUILT_IN_LOG2:
+ case BUILT_IN_LOG10:
+ name[4] = 'd';
+ name[5] = '2';
+ break;
+ case BUILT_IN_SINF:
+ case BUILT_IN_COSF:
+ case BUILT_IN_EXPF:
+ case BUILT_IN_POWF:
+ case BUILT_IN_LOGF:
+ case BUILT_IN_LOG2F:
+ case BUILT_IN_LOG10F:
+ name[4] = 's';
+ name[5] = '4';
+ break;
+ default:
+ return NULL_TREE;
+ }
+
+ bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+ sprintf (name + 7, "%s", bname+10);
+
+ arity = 0;
+ for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+ args = TREE_CHAIN (args))
+ arity++;
+
+ if (arity == 1)
+ fntype = build_function_type_list (type, type, NULL);
+ else
+ fntype = build_function_type_list (type, type, type, NULL);
+
+ new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
+ TREE_PUBLIC (new_fndecl) = 1;
+ DECL_EXTERNAL (new_fndecl) = 1;
+ DECL_IS_NOVOPS (new_fndecl) = 1;
+
+ return new_fndecl;
+ }
+
/* Store OPERAND to the memory after reload is completed. This means
that we can't easily use assign_stack_local. */
rtx
Index: config/i386/linux64.h
===================================================================
*** config/i386/linux64.h (revision 119056)
--- config/i386/linux64.h (working copy)
*************** Boston, MA 02110-1301, USA. */
*** 74,79 ****
--- 74,94 ----
%{" SPEC_64 ":%{!dynamic-linker:-dynamic-linker " LINUX_DYNAMIC_LINKER64 "}}} \
%{static:-static}}"
+ #undef LIB_SPEC
+ #ifdef HAVE_LD_AS_NEEDED
+ #define LIB_SPEC \
+ "%{pthread:-lpthread} \
+ %{shared:-lc} \
+ %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}} \
+ %{ftree-vectorize:--as-needed -lgcc-math --no-as-needed}"
+ #else
+ #define LIB_SPEC \
+ "%{pthread:-lpthread} \
+ %{shared:-lc} \
+ %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}} \
+ %{ftree-vectorize:-lgcc-math}"
+ #endif
+
/* Similar to standard Linux, but adding -ffast-math support. */
#undef ENDFILE_SPEC
#define ENDFILE_SPEC \
More information about the Gcc-patches
mailing list