This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][PING] Add vectorization of builtin functions


Updated to after the latest vect merges.  This adds the necessary
target hooks and vectorizer changes to support vectorizing of function
calls.

Bootstrapped and regtested on x86_64-unknown-linux-gnu.

Ok for mainline?

Thanks,
Richard.

:ADDPATCH middle-end:

2006-11-16  Richard Guenther  <rguenther@suse.de>
	Zdenek Dvorak <dvorakz@suse.cz>

	* target.h (struct gcc_target): Add builtin_vectorized_function
	target hook.
	* target-def.h (TARGET_VECTORIZE): Likewise.
	* targhooks.h (default_builtin_vectorized_function): Declare.
	* targhooks.c (default_builtin_vectorized_function): Define.
	* tree-vectorizer.h (stmt_vec_info_type): Add call_vec_info_type.
	(vectorizable_call): Declare.
	* tree-vect-analyze.c (vect_analyze_operations): Call
	vectorizable_call.
	* tree-vect-transform.c (vectorizable_function): New static function.
	(build_vectorized_function_call): Likewise.
	(vectorizable_call): New function.
	(vect_transform_stmt): Handle vectorizable calls.

Index: targhooks.c
===================================================================
*** targhooks.c	(revision 118884)
--- targhooks.c	(working copy)
*************** default_invalid_within_doloop (rtx insn)
*** 319,324 ****
--- 319,333 ----
    return NULL;
  }
  
+ /* Mapping of builtin functions to vectorized variants.  */
+ 
+ tree
+ default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED,
+ 				     tree type ATTRIBUTE_UNUSED)
+ {
+   return NULL_TREE;
+ }
+ 
  bool
  hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
  	CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
Index: targhooks.h
===================================================================
*** targhooks.h	(revision 118884)
--- targhooks.h	(working copy)
*************** extern const char * default_invalid_with
*** 57,62 ****
--- 57,64 ----
  
  extern bool default_narrow_bitfield (void);
  
+ extern tree default_builtin_vectorized_function (enum built_in_function, tree);
+ 
  /* These are here, and not in hooks.[ch], because not all users of
     hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */
  
Index: target.h
===================================================================
*** target.h	(revision 118884)
--- target.h	(working copy)
*************** struct gcc_target
*** 370,375 ****
--- 370,379 ----
         function.  */
      tree (* builtin_mask_for_load) (void);
  
+     /* Returns a code for builtin that realizes vectorized version of
+        function, or NULL_TREE if not available.  */
+     tree (* builtin_vectorized_function) (unsigned, tree);
+ 
      /* Target builtin that implements vector widening multiplication.
         builtin_mul_widen_eve computes the element-by-element products 
         for the even elements, and builtin_mul_widen_odd computes the
Index: tree-vectorizer.h
===================================================================
*** tree-vectorizer.h	(revision 118884)
--- tree-vectorizer.h	(working copy)
*************** enum stmt_vec_info_type {
*** 163,168 ****
--- 163,169 ----
    load_vec_info_type,
    store_vec_info_type,
    op_vec_info_type,
+   call_vec_info_type,
    assignment_vec_info_type,
    condition_vec_info_type,
    reduc_vec_info_type,
*************** extern bool vectorizable_operation (tree
*** 368,373 ****
--- 369,375 ----
  extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
+ extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *);
Index: tree-vect-analyze.c
===================================================================
*** tree-vect-analyze.c	(revision 118884)
--- tree-vect-analyze.c	(working copy)
*************** vect_analyze_operations (loop_vec_info l
*** 300,305 ****
--- 300,306 ----
  		    || vectorizable_operation (stmt, NULL, NULL)
  		    || vectorizable_assignment (stmt, NULL, NULL)
  		    || vectorizable_load (stmt, NULL, NULL)
+ 		    || vectorizable_call (stmt, NULL, NULL)
  		    || vectorizable_store (stmt, NULL, NULL)
  		    || vectorizable_condition (stmt, NULL, NULL));
  
Index: target-def.h
===================================================================
*** target-def.h	(revision 118884)
--- target-def.h	(working copy)
*************** Foundation, 51 Franklin Street, Fifth Fl
*** 332,344 ****
     TARGET_SCHED_SET_SCHED_FLAGS}
  
  #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
  
  #define TARGET_VECTORIZE                                                \
!   {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD,				\
!    TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,                             \
!    TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD}
  
  #define TARGET_DEFAULT_TARGET_FLAGS 0
  
--- 332,348 ----
     TARGET_SCHED_SET_SCHED_FLAGS}
  
  #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
+ #define TARGET_BUILTIN_VECTORIZED_FUNCTION default_builtin_vectorized_function
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
  
  #define TARGET_VECTORIZE                                                \
!   {									\
!     TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD,				\
!     TARGET_BUILTIN_VECTORIZED_FUNCTION,					\
!     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,                            \
!     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD				\
!   }
  
  #define TARGET_DEFAULT_TARGET_FLAGS 0
  
Index: tree-vect-transform.c
===================================================================
*** tree-vect-transform.c	(revision 119115)
--- tree-vect-transform.c	(working copy)
*************** vectorizable_reduction (tree stmt, block
*** 1561,1566 ****
--- 1561,1712 ----
    return true;
  }
  
+ /* Checks if CALL can be vectorized in type VECTYPE.  Returns
+    true if the target has a vectorized version of the function,
+    or false if the function cannot be vectorized.  */
+ 
+ static bool
+ vectorizable_function (tree call, tree vectype)
+ {
+   tree fndecl = get_callee_fndecl (call);
+ 
+   /* We only handle functions that do not read or clobber memory -- i.e.
+      const or novops ones.  */
+   if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
+     return false;
+ 
+   if (!fndecl
+       || TREE_CODE (fndecl) != FUNCTION_DECL
+       || !DECL_BUILT_IN (fndecl))
+     return false;
+ 
+   if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
+     return true;
+ 
+   return false;
+ }
+ 
+ /* Returns an expression that performs a call to vectorized version
+    of FNDECL in type VECTYPE, with the arguments given by ARGS.
+    If extra statements need to be generated, they are inserted
+    before BSI.  */
+ 
+ static tree
+ build_vectorized_function_call (tree fndecl,
+ 				tree vectype, tree args)
+ {
+   tree vfndecl;
+   enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
+ 
+   /* The target specific builtin should be available.  */
+   vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
+   gcc_assert (vfndecl != NULL_TREE);
+ 
+   return build_function_call_expr (vfndecl, args);
+ }
+ 
+ /* Function vectorizable_call.
+ 
+    Check if STMT performs a function call that can be vectorized. 
+    If VEC_STMT is also passed, vectorize the STMT: create a vectorized 
+    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
+ 
+ bool
+ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
+ {
+   tree vec_dest;
+   tree scalar_dest;
+   tree operation;
+   tree op, args, type;
+   tree vec_oprnd, vargs, *pvargs_end;
+   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+   tree fndecl, rhs, new_temp, def, def_stmt;
+   enum vect_def_type dt;
+ 
+   /* Is STMT a vectorizable call?   */
+   if (TREE_CODE (stmt) != MODIFY_EXPR)
+     return false;
+ 
+   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
+     return false;
+ 
+   operation = TREE_OPERAND (stmt, 1);
+   if (TREE_CODE (operation) != CALL_EXPR)
+     return false;
+    
+   /* For now, we only vectorize functions if a target specific builtin
+      is available.  TODO -- in some cases, it might be profitable to
+      insert the calls for pieces of the vector, in order to be able
+      to vectorize other operations in the loop.  */
+   if (!vectorizable_function (operation, vectype))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+ 	fprintf (vect_dump, "function is not vectorizable.");
+ 
+       return false;
+     }
+   gcc_assert (!stmt_references_memory_p (stmt));
+ 
+   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+     {
+       op = TREE_VALUE (args);
+ 
+       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+ 	{
+ 	  if (vect_print_dump_info (REPORT_DETAILS))
+ 	    fprintf (vect_dump, "use not simple.");
+ 	  return false;
+ 	}
+     }
+ 
+   if (!vec_stmt) /* transformation not required.  */
+     {
+       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+       return true;
+     }
+ 
+   /** Transform.  **/
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "transform operation.");
+ 
+   /* Handle def.  */
+   scalar_dest = TREE_OPERAND (stmt, 0);
+   vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ 
+   /* Handle uses.  */
+   vargs = NULL_TREE;
+   pvargs_end = &vargs;
+   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+     {
+       op = TREE_VALUE (args);
+       vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
+ 	  
+       *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
+       pvargs_end = &TREE_CHAIN (*pvargs_end);
+     }
+ 
+   fndecl = get_callee_fndecl (operation);
+   rhs = build_vectorized_function_call (fndecl, vectype, vargs);
+   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, rhs);
+   new_temp = make_ssa_name (vec_dest, *vec_stmt);
+   TREE_OPERAND (*vec_stmt, 0) = new_temp;
+ 
+   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+ 
+   /* The call in STMT might prevent it from being removed in dce.  We however
+      cannot remove it here, due to the way the ssa name it defines is mapped
+      to the new definition.  So just replace rhs of the statement with something
+      harmless.  */
+   type = TREE_TYPE (scalar_dest);
+   TREE_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
+ 
+   return true;
+ }
+ 
  
  /* Function vectorizable_assignment.
  
*************** vect_transform_stmt (tree stmt, block_st
*** 3713,3718 ****
--- 3859,3868 ----
  	gcc_assert (done);
  	break;
  
+       case call_vec_info_type:
+ 	done = vectorizable_call (stmt, bsi, &vec_stmt);
+ 	break;
+ 
        default:
  	if (vect_print_dump_info (REPORT_DETAILS))
  	  fprintf (vect_dump, "stmt not supported.");


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]