[PATCH] (2/3) Add vectorization of builtin functions

Dorit Nuzman DORIT@il.ibm.com
Thu Nov 16 16:13:00 GMT 2006


>
> This adds vectorization of builtin functions to the vectorizer.  It does

looks good to me...

thanks,

dorit

> so by adding the target hook
>
>   tree vectorized_function (enum built_in_function fn, tree type)
>
> which is supposed to return a FUNCTION_DECL for a vectorized variant of
> the builtin function FN for the vector type TYPE.
>
> Bootstrapped and regtested on x86_64-unknown-linux-gnu.
>
> Ok for mainline?
>
> Thanks,
> Richard.
>
> :ADDPATCH vectorizer:
>
> 2006-11-16  Richard Guenther  <rguenther@suse.de>
>    Zdenek Dvorak <dvorakz@suse.cz>
>
>    * target.h (struct gcc_target): Add builtin_vectorized_function
>    target hook.
>    * target-def.h (TARGET_VECTORIZE): Likewise.
>    * targhooks.h (default_builtin_vectorized_function): Declare.
>    * targhooks.c (default_builtin_vectorized_function): Define.
>    * tree-vectorizer.h (stmt_vec_info_type): Add call_vec_info_type.
>    (vectorizable_call): Declare.
>    * tree-vect-analyze.c (vect_analyze_operations): Call
>    vectorizable_call.
>    * tree-vect-transform.c (vectorizable_function): New static function.
>    (build_vectorized_function_call): Likewise.
>    (vectorizable_call): New function.
>    (vect_transform_stmt): Handle vectorizable calls.
>    (vect_transform_loop): Rename is_store to remove_stmt.
>
> Index: targhooks.c
> ===================================================================
> *** targhooks.c   (revision 118884)
> --- targhooks.c   (working copy)
> *************** default_invalid_within_doloop (rtx insn)
> *** 319,324 ****
> --- 319,333 ----
>     return NULL;
>   }
>
> + /* Mapping of builtin functions to vectorized variants.  */
> +
> + tree
> + default_builtin_vectorized_function (enum built_in_function fn
> ATTRIBUTE_UNUSED,
> +                  tree type ATTRIBUTE_UNUSED)
> + {
> +   return NULL_TREE;
> + }
> +
>   bool
>   hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
>      CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
> Index: targhooks.h
> ===================================================================
> *** targhooks.h   (revision 118884)
> --- targhooks.h   (working copy)
> *************** extern const char * default_invalid_with
> *** 57,62 ****
> --- 57,64 ----
>
>   extern bool default_narrow_bitfield (void);
>
> + extern tree default_builtin_vectorized_function (enum
> built_in_function, tree);
> +
>   /* These are here, and not in hooks.[ch], because not all users of
>      hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */
>
> Index: target.h
> ===================================================================
> *** target.h   (revision 118884)
> --- target.h   (working copy)
> *************** struct gcc_target
> *** 370,375 ****
> --- 370,379 ----
>          function.  */
>       tree (* builtin_mask_for_load) (void);
>
> +     /* Returns a code for builtin that realizes vectorized version of
> +        function, or NULL_TREE if not available.  */
> +     tree (* builtin_vectorized_function) (unsigned, tree);
> +
>       /* Target builtin that implements vector widening multiplication.
>          builtin_mul_widen_eve computes the element-by-element products
>          for the even elements, and builtin_mul_widen_odd computes the
> Index: tree-vectorizer.h
> ===================================================================
> *** tree-vectorizer.h   (revision 118884)
> --- tree-vectorizer.h   (working copy)
> *************** enum stmt_vec_info_type {
> *** 163,168 ****
> --- 163,169 ----
>     load_vec_info_type,
>     store_vec_info_type,
>     op_vec_info_type,
> +   call_vec_info_type,
>     assignment_vec_info_type,
>     condition_vec_info_type,
>     reduc_vec_info_type,
> *************** extern bool vectorizable_operation (tree
> *** 368,373 ****
> --- 369,375 ----
>   extern bool vectorizable_type_promotion (tree, block_stmt_iterator
> *, tree *);
>   extern bool vectorizable_type_demotion (tree, block_stmt_iterator
> *, tree *);
>   extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree
*);
> + extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
>   extern bool vectorizable_condition (tree, block_stmt_iterator *, tree
*);
>   extern bool vectorizable_live_operation (tree, block_stmt_iterator
> *, tree *);
>   extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree
*);
> Index: tree-vect-analyze.c
> ===================================================================
> *** tree-vect-analyze.c   (revision 118884)
> --- tree-vect-analyze.c   (working copy)
> *************** vect_analyze_operations (loop_vec_info l
> *** 300,305 ****
> --- 300,306 ----
>             || vectorizable_operation (stmt, NULL, NULL)
>             || vectorizable_assignment (stmt, NULL, NULL)
>             || vectorizable_load (stmt, NULL, NULL)
> +           || vectorizable_call (stmt, NULL, NULL)
>             || vectorizable_store (stmt, NULL, NULL)
>             || vectorizable_condition (stmt, NULL, NULL));
>
> Index: target-def.h
> ===================================================================
> *** target-def.h   (revision 118884)
> --- target-def.h   (working copy)
> *************** Foundation, 51 Franklin Street, Fifth Fl
> *** 332,344 ****
>      TARGET_SCHED_SET_SCHED_FLAGS}
>
>   #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
>   #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
>   #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
>
>   #define TARGET_VECTORIZE
\
> !   {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD,            \
> !    TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,
\
> !    TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD}
>
>   #define TARGET_DEFAULT_TARGET_FLAGS 0
>
> --- 332,348 ----
>      TARGET_SCHED_SET_SCHED_FLAGS}
>
>   #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
> + #define TARGET_BUILTIN_VECTORIZED_FUNCTION
> default_builtin_vectorized_function
>   #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
>   #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
>
>   #define TARGET_VECTORIZE
\
> !   {                           \
> !     TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD,            \
> !     TARGET_BUILTIN_VECTORIZED_FUNCTION,               \
> !     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,
\
> !     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD            \
> !   }
>
>   #define TARGET_DEFAULT_TARGET_FLAGS 0
>
> Index: tree-vect-transform.c
> ===================================================================
> *** tree-vect-transform.c   (revision 118884)
> --- tree-vect-transform.c   (working copy)
> *************** vectorizable_reduction (tree stmt, block
> *** 1549,1554 ****
> --- 1549,1700 ----
>     return true;
>   }
>
> + /* Checks if CALL can be vectorized in type VECTYPE.  Returns
> +    true if the target has a vectorized version of the function,
> +    or false if the function cannot be vectorized.  */
> +
> + static bool
> + vectorizable_function (tree call, tree vectype)
> + {
> +   tree fndecl = get_callee_fndecl (call);
> +
> +   /* We only handle functions that do not read or clobber memory --
i.e.
> +      const or novops ones.  */
> +   if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
> +     return false;
> +
> +   if (!fndecl
> +       || TREE_CODE (fndecl) != FUNCTION_DECL
> +       || !DECL_BUILT_IN (fndecl))
> +     return false;
> +
> +   if (targetm.vectorize.builtin_vectorized_function
> (DECL_FUNCTION_CODE (fndecl), vectype))
> +     return true;
> +
> +   return false;
> + }
> +
> + /* Returns an expression that performs a call to vectorized version
> +    of FNDECL in type VECTYPE, with the arguments given by ARGS.
> +    If extra statements need to be generated, they are inserted
> +    before BSI.  */
> +
> + static tree
> + build_vectorized_function_call (tree fndecl,
> +             tree vectype, tree args)
> + {
> +   tree vfndecl;
> +   enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
> +
> +   /* The target specific builtin should be available.  */
> +   vfndecl = targetm.vectorize.builtin_vectorized_function (code,
vectype);
> +   gcc_assert (vfndecl != NULL_TREE);
> +
> +   return build_function_call_expr (vfndecl, args);
> + }
> +
> + /* Function vectorizable_call.
> +
> +    Check if STMT performs a function call that can be vectorized.
> +    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
> +    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
> +    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
> +
> + bool
> + vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
> + {
> +   tree vec_dest;
> +   tree scalar_dest;
> +   tree operation;
> +   tree op, args, type;
> +   tree vec_oprnd, vargs, *pvargs_end;
> +   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> +   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> +   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> +   tree fndecl, rhs, new_temp, def, def_stmt;
> +   enum vect_def_type dt;
> +
> +   /* Is STMT a vectorizable call?   */
> +   if (TREE_CODE (stmt) != MODIFY_EXPR)
> +     return false;
> +
> +   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
> +     return false;
> +
> +   operation = TREE_OPERAND (stmt, 1);
> +   if (TREE_CODE (operation) != CALL_EXPR)
> +     return false;
> +
> +   /* For now, we only vectorize functions if a target specific builtin
> +      is available.  TODO -- in some cases, it might be profitable to
> +      insert the calls for pieces of the vector, in order to be able
> +      to vectorize other operations in the loop.  */
> +   if (!vectorizable_function (operation, vectype))
> +     {
> +       if (vect_print_dump_info (REPORT_DETAILS))
> +    fprintf (vect_dump, "function is not vectorizable.");
> +
> +       return false;
> +     }
> +   gcc_assert (!stmt_references_memory_p (stmt));
> +
> +   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN
(args))
> +     {
> +       op = TREE_VALUE (args);
> +
> +       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
> +    {
> +      if (vect_print_dump_info (REPORT_DETAILS))
> +        fprintf (vect_dump, "use not simple.");
> +      return false;
> +    }
> +     }
> +
> +   if (!vec_stmt) /* transformation not required.  */
> +     {
> +       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
> +       return true;
> +     }
> +
> +   /** Transform.  **/
> +
> +   if (vect_print_dump_info (REPORT_DETAILS))
> +     fprintf (vect_dump, "transform operation.");
> +
> +   /* Handle def.  */
> +   scalar_dest = TREE_OPERAND (stmt, 0);
> +   vec_dest = vect_create_destination_var (scalar_dest, vectype);
> +
> +   /* Handle uses.  */
> +   vargs = NULL_TREE;
> +   pvargs_end = &vargs;
> +   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN
(args))
> +     {
> +       op = TREE_VALUE (args);
> +       vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
> +
> +       *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
> +       pvargs_end = &TREE_CHAIN (*pvargs_end);
> +     }
> +
> +   fndecl = get_callee_fndecl (operation);
> +   rhs = build_vectorized_function_call (fndecl, vectype, vargs);
> +   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, rhs);
> +   new_temp = make_ssa_name (vec_dest, *vec_stmt);
> +   TREE_OPERAND (*vec_stmt, 0) = new_temp;
> +
> +   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
> +
> +   /* The call in STMT might prevent it from being removed in dce. We
however
> +      cannot remove it here, due to the way the ssa name it definesis
mapped
> +      to the new definition.  So just replace rhs of the statement
> with something
> +      harmless.  */
> +   type = TREE_TYPE (scalar_dest);
> +   TREE_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
> +
> +   return true;
> + }
> +
>
>   /* Function vectorizable_assignment.
>
> *************** vectorizable_condition (tree stmt, block
> *** 3011,3017 ****
>   bool
>   vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
>   {
> !   bool is_store = false;
>     tree vec_stmt = NULL_TREE;
>     stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>     tree orig_stmt_in_pattern;
> --- 3157,3163 ----
>   bool
>   vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
>   {
> !   bool remove_stmt = false;
>     tree vec_stmt = NULL_TREE;
>     stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>     tree orig_stmt_in_pattern;
> *************** vect_transform_stmt (tree stmt, block_st
> *** 3049,3055 ****
>         case store_vec_info_type:
>      done = vectorizable_store (stmt, bsi, &vec_stmt);
>      gcc_assert (done);
> !    is_store = true;
>      break;
>
>         case condition_vec_info_type:
> --- 3195,3205 ----
>         case store_vec_info_type:
>      done = vectorizable_store (stmt, bsi, &vec_stmt);
>      gcc_assert (done);
> !    remove_stmt = true;
> !    break;
> !
> !       case call_vec_info_type:
> !    done = vectorizable_call (stmt, bsi, &vec_stmt);
>      break;
>
>         case condition_vec_info_type:
> *************** vect_transform_stmt (tree stmt, block_st
> *** 3099,3105 ****
>         }
>       }
>
> !   return is_store;
>   }
>
>
> --- 3249,3255 ----
>         }
>       }
>
> !   return remove_stmt;
>   }
>
>
> *************** vect_transform_loop (loop_vec_info loop_
> *** 3907,3913 ****
>      {
>        tree stmt = bsi_stmt (si);
>        stmt_vec_info stmt_info;
> !      bool is_store;
>
>        if (vect_print_dump_info (REPORT_DETAILS))
>          {
> --- 4057,4063 ----
>      {
>        tree stmt = bsi_stmt (si);
>        stmt_vec_info stmt_info;
> !      bool remove_stmt;
>
>        if (vect_print_dump_info (REPORT_DETAILS))
>          {
> *************** vect_transform_loop (loop_vec_info loop_
> *** 3932,3939 ****
>        if (vect_print_dump_info (REPORT_DETAILS))
>          fprintf (vect_dump, "transform statement.");
>
> !      is_store = vect_transform_stmt (stmt, &si);
> !      if (is_store)
>          {
>            /* Free the attached stmt_vec_info and remove the stmt.  */
>            stmt_ann_t ann = stmt_ann (stmt);
> --- 4082,4089 ----
>        if (vect_print_dump_info (REPORT_DETAILS))
>          fprintf (vect_dump, "transform statement.");
>
> !      remove_stmt = vect_transform_stmt (stmt, &si);
> !      if (remove_stmt)
>          {
>            /* Free the attached stmt_vec_info and remove the stmt.  */
>            stmt_ann_t ann = stmt_ann (stmt);
>



More information about the Gcc-patches mailing list