This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] (3/3) Enable use of libgcc-math vectorized functions in the x86_64 backend [2nd try]


This implements the builtin_vectorized_function target hook for x86_64
and uses it to allow vectorization of all the routines found in 
libgcc-math.  It also enables linking against libgcc-math if
-ftree-vectorize is specified, with --as-needed if that is supported.

Bootstrapped and tested on x86_64-unknown-linux-gnu (all 3 patches).

Ok for mainline?

I'd also like to ping patch (2/3) which didn't change for this 2nd try,
adding support to vectorize function calls to the vectorizer:

[PATCH] (2/3) Add vectorization of builtin functions
http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01144.html

this patch is independent of libgcc-math going in or not, as with it
we can easily vectorize sqrt for example.

Thanks,
Richard.


2006-11-16  Richard Guenther  <rguenther@suse.de>
	Zdenek Dvorak <dvorakz@suse.cz>

	* config/i386/i386.c (TARGET_BUILTIN_VECTORIZED_FUNCTION): Define.
	(ix86_builtin_vectorized_function): Target hook for
	vectorized_function.
	* config/i386/linux64.h (LIB_SPEC): Copy from config/linux.h.
	Link with libgcc-math as needed, if building with -ftree-vectorize.

Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 119056)
--- config/i386/i386.c	(working copy)
*************** static bool ix86_pass_by_reference (CUMU
*** 1275,1280 ****
--- 1275,1281 ----
  				    tree, bool);
  static void ix86_init_builtins (void);
  static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+ static tree ix86_builtin_vectorized_function (enum built_in_function, tree);
  static const char *ix86_mangle_fundamental_type (tree);
  static tree ix86_stack_protect_fail (void);
  static rtx ix86_internal_arg_pointer (void);
*************** static section *x86_64_elf_select_sectio
*** 1339,1344 ****
--- 1340,1347 ----
  #define TARGET_INIT_BUILTINS ix86_init_builtins
  #undef TARGET_EXPAND_BUILTIN
  #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+ #undef TARGET_BUILTIN_VECTORIZED_FUNCTION
+ #define TARGET_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
  
  #undef TARGET_ASM_FUNCTION_EPILOGUE
  #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
*************** ix86_expand_builtin (tree exp, rtx targe
*** 16866,16871 ****
--- 16869,16944 ----
    gcc_unreachable ();
  }
  
+ /* Returns builtin for a vectorized version of the builtin function
+    with builtin function code FN in the vector type TYPE, or NULL_TREE
+    if it is not available.  */
+ 
+ static tree
+ ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
+ {
+   char name[20] = "__vr.._";
+   tree fntype, new_fndecl, args;
+   unsigned arity;
+   const char *bname;
+ 
+   if (!TARGET_64BIT
+       || !flag_unsafe_math_optimizations)
+     return NULL_TREE;
+ 
+   /* We can handle canonical vector mode argument builtins only.  */
+   if (!VECTOR_FLOAT_TYPE_P (type)
+       || !((TYPE_MODE (TREE_TYPE (type)) == DFmode
+ 	    &&  TYPE_VECTOR_SUBPARTS (type) == 2)
+ 	   || (TYPE_MODE (TREE_TYPE (type)) == SFmode
+ 	       &&  TYPE_VECTOR_SUBPARTS (type) == 4)))
+     return NULL_TREE;
+ 
+   switch (fn)
+     {
+     case BUILT_IN_SIN:
+     case BUILT_IN_COS:
+     case BUILT_IN_EXP:
+     case BUILT_IN_LOG:
+     case BUILT_IN_LOG2:
+     case BUILT_IN_LOG10:
+       name[4] = 'd';
+       name[5] = '2';
+       break;
+     case BUILT_IN_SINF:
+     case BUILT_IN_COSF:
+     case BUILT_IN_EXPF:
+     case BUILT_IN_POWF:
+     case BUILT_IN_LOGF:
+     case BUILT_IN_LOG2F:
+     case BUILT_IN_LOG10F:
+       name[4] = 's';
+       name[5] = '4';
+       break;
+      default:
+       return NULL_TREE;
+     }
+ 
+   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+   sprintf (name + 7, "%s", bname+10);
+ 
+   arity = 0;
+   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+        args = TREE_CHAIN (args))
+     arity++;
+ 
+   if (arity == 1)
+     fntype = build_function_type_list (type, type, NULL);
+   else
+     fntype = build_function_type_list (type, type, type, NULL);
+ 
+   new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
+   TREE_PUBLIC (new_fndecl) = 1;
+   DECL_EXTERNAL (new_fndecl) = 1;
+   DECL_IS_NOVOPS (new_fndecl) = 1;
+ 
+   return new_fndecl;
+ }
+ 
  /* Store OPERAND to the memory after reload is completed.  This means
     that we can't easily use assign_stack_local.  */
  rtx
Index: config/i386/linux64.h
===================================================================
*** config/i386/linux64.h	(revision 119056)
--- config/i386/linux64.h	(working copy)
*************** Boston, MA 02110-1301, USA.  */
*** 74,79 ****
--- 74,94 ----
        %{" SPEC_64 ":%{!dynamic-linker:-dynamic-linker " LINUX_DYNAMIC_LINKER64 "}}} \
      %{static:-static}}"
  
+ #undef  LIB_SPEC
+ #ifdef HAVE_LD_AS_NEEDED
+ #define LIB_SPEC \
+   "%{pthread:-lpthread} \
+    %{shared:-lc} \
+    %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}} \
+    %{ftree-vectorize:--as-needed -lgcc-math --no-as-needed}"
+ #else
+ #define LIB_SPEC \
+   "%{pthread:-lpthread} \
+    %{shared:-lc} \
+    %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}} \
+    %{ftree-vectorize:-lgcc-math}"
+ #endif
+ 
  /* Similar to standard Linux, but adding -ffast-math support.  */
  #undef  ENDFILE_SPEC
  #define ENDFILE_SPEC \


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]