[PATCH] (2/3) Add vectorization of builtin functions

Richard Guenther rguenther@suse.de
Thu Nov 16 12:50:00 GMT 2006


This adds vectorization of builtin functions to the vectorizer.  It does
so by adding the target hook

  tree vectorized_function (enum built_in_function fn, tree type)

which is supposed to return a FUNCTION_DECL for a vectorized variant of
the builtin function FN for the vector type TYPE.

Bootstrapped and regtested on x86_64-unknown-linux-gnu.

Ok for mainline?

Thanks,
Richard.

:ADDPATCH vectorizer:

2006-11-16  Richard Guenther  <rguenther@suse.de>
	Zdenek Dvorak <dvorakz@suse.cz>

	* target.h (struct gcc_target): Add builtin_vectorized_function
	target hook.
	* target-def.h (TARGET_VECTORIZE): Likewise.
	* targhooks.h (default_builtin_vectorized_function): Declare.
	* targhooks.c (default_builtin_vectorized_function): Define.
	* tree-vectorizer.h (stmt_vec_info_type): Add call_vec_info_type.
	(vectorizable_call): Declare.
	* tree-vect-analyze.c (vect_analyze_operations): Call
	vectorizable_call.
	* tree-vect-transform.c (vectorizable_function): New static function.
	(build_vectorized_function_call): Likewise.
	(vectorizable_call): New function.
	(vect_transform_stmt): Handle vectorizable calls.
	(vect_transform_loop): Rename is_store to remove_stmt.

Index: targhooks.c
===================================================================
*** targhooks.c	(revision 118884)
--- targhooks.c	(working copy)
*************** default_invalid_within_doloop (rtx insn)
*** 319,324 ****
--- 319,333 ----
    return NULL;
  }
  
+ /* Mapping of builtin functions to vectorized variants.  */
+ 
+ tree
+ default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED,
+ 				     tree type ATTRIBUTE_UNUSED)
+ {
+   return NULL_TREE;
+ }
+ 
  bool
  hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
  	CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
Index: targhooks.h
===================================================================
*** targhooks.h	(revision 118884)
--- targhooks.h	(working copy)
*************** extern const char * default_invalid_with
*** 57,62 ****
--- 57,64 ----
  
  extern bool default_narrow_bitfield (void);
  
+ extern tree default_builtin_vectorized_function (enum built_in_function, tree);
+ 
  /* These are here, and not in hooks.[ch], because not all users of
     hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */
  
Index: target.h
===================================================================
*** target.h	(revision 118884)
--- target.h	(working copy)
*************** struct gcc_target
*** 370,375 ****
--- 370,379 ----
         function.  */
      tree (* builtin_mask_for_load) (void);
  
+     /* Returns a code for builtin that realizes vectorized version of
+        function, or NULL_TREE if not available.  */
+     tree (* builtin_vectorized_function) (unsigned, tree);
+ 
      /* Target builtin that implements vector widening multiplication.
         builtin_mul_widen_eve computes the element-by-element products 
         for the even elements, and builtin_mul_widen_odd computes the
Index: tree-vectorizer.h
===================================================================
*** tree-vectorizer.h	(revision 118884)
--- tree-vectorizer.h	(working copy)
*************** enum stmt_vec_info_type {
*** 163,168 ****
--- 163,169 ----
    load_vec_info_type,
    store_vec_info_type,
    op_vec_info_type,
+   call_vec_info_type,
    assignment_vec_info_type,
    condition_vec_info_type,
    reduc_vec_info_type,
*************** extern bool vectorizable_operation (tree
*** 368,373 ****
--- 369,375 ----
  extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
+ extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *);
Index: tree-vect-analyze.c
===================================================================
*** tree-vect-analyze.c	(revision 118884)
--- tree-vect-analyze.c	(working copy)
*************** vect_analyze_operations (loop_vec_info l
*** 300,305 ****
--- 300,306 ----
  		    || vectorizable_operation (stmt, NULL, NULL)
  		    || vectorizable_assignment (stmt, NULL, NULL)
  		    || vectorizable_load (stmt, NULL, NULL)
+ 		    || vectorizable_call (stmt, NULL, NULL)
  		    || vectorizable_store (stmt, NULL, NULL)
  		    || vectorizable_condition (stmt, NULL, NULL));
  
Index: target-def.h
===================================================================
*** target-def.h	(revision 118884)
--- target-def.h	(working copy)
*************** Foundation, 51 Franklin Street, Fifth Fl
*** 332,344 ****
     TARGET_SCHED_SET_SCHED_FLAGS}
  
  #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
  
  #define TARGET_VECTORIZE                                                \
!   {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD,				\
!    TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,                             \
!    TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD}
  
  #define TARGET_DEFAULT_TARGET_FLAGS 0
  
--- 332,348 ----
     TARGET_SCHED_SET_SCHED_FLAGS}
  
  #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
+ #define TARGET_BUILTIN_VECTORIZED_FUNCTION default_builtin_vectorized_function
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
  
  #define TARGET_VECTORIZE                                                \
!   {									\
!     TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD,				\
!     TARGET_BUILTIN_VECTORIZED_FUNCTION,					\
!     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,                            \
!     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD				\
!   }
  
  #define TARGET_DEFAULT_TARGET_FLAGS 0
  
Index: tree-vect-transform.c
===================================================================
*** tree-vect-transform.c	(revision 118884)
--- tree-vect-transform.c	(working copy)
*************** vectorizable_reduction (tree stmt, block
*** 1549,1554 ****
--- 1549,1700 ----
    return true;
  }
  
+ /* Checks if CALL can be vectorized in type VECTYPE.  Returns
+    true if the target has a vectorized version of the function,
+    or false if the function cannot be vectorized.  */
+ 
+ static bool
+ vectorizable_function (tree call, tree vectype)
+ {
+   tree fndecl = get_callee_fndecl (call);
+ 
+   /* We only handle functions that do not read or clobber memory -- i.e.
+      const or novops ones.  */
+   if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
+     return false;
+ 
+   if (!fndecl
+       || TREE_CODE (fndecl) != FUNCTION_DECL
+       || !DECL_BUILT_IN (fndecl))
+     return false;
+ 
+   if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
+     return true;
+ 
+   return false;
+ }
+ 
+ /* Returns an expression that performs a call to vectorized version
+    of FNDECL in type VECTYPE, with the arguments given by ARGS.
+    If extra statements need to be generated, they are inserted
+    before BSI.  */
+ 
+ static tree
+ build_vectorized_function_call (tree fndecl,
+ 				tree vectype, tree args)
+ {
+   tree vfndecl;
+   enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
+ 
+   /* The target specific builtin should be available.  */
+   vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
+   gcc_assert (vfndecl != NULL_TREE);
+ 
+   return build_function_call_expr (vfndecl, args);
+ }
+ 
+ /* Function vectorizable_call.
+ 
+    Check if STMT performs a function call that can be vectorized. 
+    If VEC_STMT is also passed, vectorize the STMT: create a vectorized 
+    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
+ 
+ bool
+ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
+ {
+   tree vec_dest;
+   tree scalar_dest;
+   tree operation;
+   tree op, args, type;
+   tree vec_oprnd, vargs, *pvargs_end;
+   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+   tree fndecl, rhs, new_temp, def, def_stmt;
+   enum vect_def_type dt;
+ 
+   /* Is STMT a vectorizable call?   */
+   if (TREE_CODE (stmt) != MODIFY_EXPR)
+     return false;
+ 
+   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
+     return false;
+ 
+   operation = TREE_OPERAND (stmt, 1);
+   if (TREE_CODE (operation) != CALL_EXPR)
+     return false;
+    
+   /* For now, we only vectorize functions if a target specific builtin
+      is available.  TODO -- in some cases, it might be profitable to
+      insert the calls for pieces of the vector, in order to be able
+      to vectorize other operations in the loop.  */
+   if (!vectorizable_function (operation, vectype))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+ 	fprintf (vect_dump, "function is not vectorizable.");
+ 
+       return false;
+     }
+   gcc_assert (!stmt_references_memory_p (stmt));
+ 
+   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+     {
+       op = TREE_VALUE (args);
+ 
+       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+ 	{
+ 	  if (vect_print_dump_info (REPORT_DETAILS))
+ 	    fprintf (vect_dump, "use not simple.");
+ 	  return false;
+ 	}
+     }
+ 
+   if (!vec_stmt) /* transformation not required.  */
+     {
+       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+       return true;
+     }
+ 
+   /** Transform.  **/
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "transform operation.");
+ 
+   /* Handle def.  */
+   scalar_dest = TREE_OPERAND (stmt, 0);
+   vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ 
+   /* Handle uses.  */
+   vargs = NULL_TREE;
+   pvargs_end = &vargs;
+   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+     {
+       op = TREE_VALUE (args);
+       vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
+ 	  
+       *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
+       pvargs_end = &TREE_CHAIN (*pvargs_end);
+     }
+ 
+   fndecl = get_callee_fndecl (operation);
+   rhs = build_vectorized_function_call (fndecl, vectype, vargs);
+   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, rhs);
+   new_temp = make_ssa_name (vec_dest, *vec_stmt);
+   TREE_OPERAND (*vec_stmt, 0) = new_temp;
+ 
+   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+ 
+   /* The call in STMT might prevent it from being removed in dce.  We however
+      cannot remove it here, due to the way the ssa name it defines is mapped
+      to the new definition.  So just replace rhs of the statement with something
+      harmless.  */
+   type = TREE_TYPE (scalar_dest);
+   TREE_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
+ 
+   return true;
+ }
+ 
  
  /* Function vectorizable_assignment.
  
*************** vectorizable_condition (tree stmt, block
*** 3011,3017 ****
  bool
  vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
  {
!   bool is_store = false;
    tree vec_stmt = NULL_TREE;
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    tree orig_stmt_in_pattern;
--- 3157,3163 ----
  bool
  vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
  {
!   bool remove_stmt = false;
    tree vec_stmt = NULL_TREE;
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    tree orig_stmt_in_pattern;
*************** vect_transform_stmt (tree stmt, block_st
*** 3049,3055 ****
        case store_vec_info_type:
  	done = vectorizable_store (stmt, bsi, &vec_stmt);
  	gcc_assert (done);
! 	is_store = true;
  	break;
  
        case condition_vec_info_type:
--- 3195,3205 ----
        case store_vec_info_type:
  	done = vectorizable_store (stmt, bsi, &vec_stmt);
  	gcc_assert (done);
! 	remove_stmt = true;
! 	break;
! 
!       case call_vec_info_type:
! 	done = vectorizable_call (stmt, bsi, &vec_stmt);
  	break;
  
        case condition_vec_info_type:
*************** vect_transform_stmt (tree stmt, block_st
*** 3099,3105 ****
        }
      }
  
!   return is_store; 
  }
  
  
--- 3249,3255 ----
        }
      }
  
!   return remove_stmt; 
  }
  
  
*************** vect_transform_loop (loop_vec_info loop_
*** 3907,3913 ****
  	{
  	  tree stmt = bsi_stmt (si);
  	  stmt_vec_info stmt_info;
! 	  bool is_store;
  
  	  if (vect_print_dump_info (REPORT_DETAILS))
  	    {
--- 4057,4063 ----
  	{
  	  tree stmt = bsi_stmt (si);
  	  stmt_vec_info stmt_info;
! 	  bool remove_stmt;
  
  	  if (vect_print_dump_info (REPORT_DETAILS))
  	    {
*************** vect_transform_loop (loop_vec_info loop_
*** 3932,3939 ****
  	  if (vect_print_dump_info (REPORT_DETAILS))
  	    fprintf (vect_dump, "transform statement.");
  
! 	  is_store = vect_transform_stmt (stmt, &si);
! 	  if (is_store)
  	    {
  	      /* Free the attached stmt_vec_info and remove the stmt.  */
  	      stmt_ann_t ann = stmt_ann (stmt);
--- 4082,4089 ----
  	  if (vect_print_dump_info (REPORT_DETAILS))
  	    fprintf (vect_dump, "transform statement.");
  
! 	  remove_stmt = vect_transform_stmt (stmt, &si);
! 	  if (remove_stmt)
  	    {
  	      /* Free the attached stmt_vec_info and remove the stmt.  */
  	      stmt_ann_t ann = stmt_ann (stmt);



More information about the Gcc-patches mailing list