[PATCH] (2/3) Add vectorization of builtin functions

Dorit Nuzman DORIT@il.ibm.com
Sat Nov 18 18:14:00 GMT 2006


> On Thu, 16 Nov 2006, Dorit Nuzman wrote:
>
> > >
> > > This adds vectorization of builtin functions to the vectorizer.  It
does
> >
> > looks good to me...
>
> This patch^Whack on top of it enables vectorization of lrint.
>
...
>
>
> just as an example - the x86 builtins need to survive in an array
> to be accessible later.  We also need to address that most of the
> x86 SSE builtins lack const or pure attributes (so the DECL_NOVOPS
> hack ontop of the keep the decl hack ;)).
>

I think that we have the same problem with the Altivec builtins, and that
Andrew Pinski is going to fix this (
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29779#c5)

> Just FYI...  (we might be able to avoid too many new tree-codes by
> resorting to builtins where appropriate)
>

the trade-off is having function calls in the code, which may hinder some
optimizations (?). Available tree-codes is indeed a problem, but probably
the solution is to increase it...

Anyhow - having the general ability to vectorize function calls is very
useful - thanks for that!

dorit

> Richard.
>
> Index: config/i386/i386.c
> ===================================================================
> *** config/i386/i386.c   (revision 118927)
> --- config/i386/i386.c   (working copy)
> *************** static bool ix86_pass_by_reference (CUMU
> *** 1222,1227 ****
> --- 1222,1228 ----
>                   tree, bool);
>   static void ix86_init_builtins (void);
>   static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode,
int);
> + static tree ix86_builtin_vectorized_function (enum built_in_function,
tree);
>   static const char *ix86_mangle_fundamental_type (tree);
>   static tree ix86_stack_protect_fail (void);
>   static rtx ix86_internal_arg_pointer (void);
> *************** static section *x86_64_elf_select_sectio
> *** 1286,1291 ****
> --- 1287,1294 ----
>   #define TARGET_INIT_BUILTINS ix86_init_builtins
>   #undef TARGET_EXPAND_BUILTIN
>   #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
> + #undef TARGET_BUILTIN_VECTORIZED_FUNCTION
> + #define TARGET_BUILTIN_VECTORIZED_FUNCTION
ix86_builtin_vectorized_function
>
>   #undef TARGET_ASM_FUNCTION_EPILOGUE
>   #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
> *************** enum ix86_builtins
> *** 14775,14787 ****
>     IX86_BUILTIN_MAX
>   };
>
> ! #define def_builtin(MASK, NAME, TYPE, CODE)            \
> ! do {                           \
> !   if ((MASK) & target_flags                  \
> !       && (!((MASK) & MASK_64BIT) || TARGET_64BIT))         \
> !     add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,   \
> !           NULL, NULL_TREE);         \
> ! } while (0)
>
>   /* Bits for builtin_description.flag.  */
>
> --- 14778,14801 ----
>     IX86_BUILTIN_MAX
>   };
>
> ! /* Add a ix86 target builtin function with CODE, NAME and TYPE.  Do so,
> !    if the target_flags include one of MASK.
> !    Returns the function decl or NULL_TREE, if the builtin was not
added.  */
> !
> ! static inline tree
> ! def_builtin (int mask, const char *name, tree type, enum ix86_builtins
code)
> ! {
> !   tree decl = NULL_TREE;
> !   if (mask & target_flags
> !       && (!(mask & MASK_64BIT) || TARGET_64BIT))
> !     decl = add_builtin_function (name, type, code, BUILT_IN_MD,
> !              NULL, NULL_TREE);
> !   return decl;
> ! }
> !
> ! /* FIXME.  */
> ! static tree cvtpd2dq_builtin = NULL_TREE;
> ! static tree cvtps2pi_builtin = NULL_TREE;
>
>   /* Bits for builtin_description.flag.  */
>
> *************** ix86_init_mmx_sse_builtins (void)
> *** 15649,15655 ****
>     def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr",
> void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
>     def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr",
> unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
>     def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps",
> v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
> !   def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi",
> v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
>     def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss",
> v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
>     def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss",
> v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
>     def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si",
> int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
> --- 15663,15671 ----
>     def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr",
> void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
>     def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr",
> unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
>     def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps",
> v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
> !   cvtps2pi_builtin = def_builtin (MASK_SSE,
> "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
> !   if (cvtps2pi_builtin)
> !     DECL_IS_NOVOPS (cvtps2pi_builtin) = 1;
>     def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss",
> v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
>     def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss",
> v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
>     def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si",
> int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
> *************** ix86_init_mmx_sse_builtins (void)
> *** 15744,15750 ****
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd",
> v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps",
> v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
>
> !   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq",
> v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi",
> v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps",
> v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq",
> v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
> --- 15760,15768 ----
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd",
> v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps",
> v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
>
> !   cvtpd2dq_builtin = def_builtin (MASK_SSE2,
> "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
> !   if (cvtpd2dq_builtin)
> !     DECL_IS_NOVOPS (cvtpd2dq_builtin) = 1;
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi",
> v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps",
> v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
>     def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq",
> v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
> *************** ix86_expand_builtin (tree exp, rtx targe
> *** 16778,16783 ****
> --- 16796,16841 ----
>     gcc_unreachable ();
>   }
>
> + /* Returns builtin for a vectorized version of the builtin function
> +    with builtin function code FN in the vector type TYPE, or NULL_TREE
> +    if it is not available.  */
> +
> + static tree
> + ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
> + {
> +   char name[20] = "__vr.._";
> +   tree fntype, new_fndecl, args;
> +   unsigned arity;
> +   const char *bname;
> +
> +   if (!TARGET_64BIT
> +       || !flag_unsafe_math_optimizations)
> +     return NULL_TREE;
> +
> +   /* We can handle canonical vector mode argument builtins only.  */
> + /*  if (!VECTOR_FLOAT_TYPE_P (type)
> +       || !((TYPE_MODE (TREE_TYPE (type)) == DFmode
> +        &&  TYPE_VECTOR_SUBPARTS (type) == 2)
> +       || (TYPE_MODE (TREE_TYPE (type)) == SFmode
> +           &&  TYPE_VECTOR_SUBPARTS (type) == 4)))
> +     return NULL_TREE; */
> +
> +   switch (fn)
> +     {
> +     case BUILT_IN_LRINT:
> +     case BUILT_IN_LLRINT:
> +       return cvtpd2dq_builtin;
> +
> +     case BUILT_IN_LRINTF:
> +     case BUILT_IN_LLRINTF:
> +       return cvtps2pi_builtin;
> +
> +     default:
> +       ;
> +     }
> +   return NULL_TREE;
> + }
> +
>   /* Store OPERAND to the memory after reload is completed.  This means
>      that we can't easily use assign_stack_local.  */
>   rtx



More information about the Gcc-patches mailing list