This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] (2/3) Add vectorization of builtin functions
- From: Dorit Nuzman <DORIT at il dot ibm dot com>
- To: Richard Guenther <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org, Zdenek Dvorak <rakdver at atrey dot karlin dot mff dot cuni dot cz>, pinskia at gcc dot gnu dot org
- Date: Sat, 18 Nov 2006 20:11:08 +0200
- Subject: Re: [PATCH] (2/3) Add vectorization of builtin functions
> On Thu, 16 Nov 2006, Dorit Nuzman wrote:
>
> > >
> > > This adds vectorization of builtin functions to the vectorizer. It
does
> >
> > looks good to me...
>
> This patch^Whack on top of it enables vectorization of lrint.
>
...
>
>
> just as an example - the x86 builtins need to survive in an array
> to be accessible later. We also need to address that most of the
> x86 SSE builtins lack const or pure attributes (so the DECL_NOVOPS
> hack ontop of the keep the decl hack ;)).
>
I think that we have the same problem with the Altivec builtins, and that
Andrew Pinski is going to fix this (
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29779#c5)
> Just FYI... (we might be able to avoid too many new tree-codes by
> resorting to builtins where appropriate)
>
the trade-off is having function calls in the code, which may hinder some
optimizations (?). Available tree-codes is indeed a problem, but probably
the solution is to increase it...
Anyhow - having the general ability to vectorize function calls is very
useful - thanks for that!
dorit
> Richard.
>
> Index: config/i386/i386.c
> ===================================================================
> *** config/i386/i386.c (revision 118927)
> --- config/i386/i386.c (working copy)
> *************** static bool ix86_pass_by_reference (CUMU
> *** 1222,1227 ****
> --- 1222,1228 ----
> tree, bool);
> static void ix86_init_builtins (void);
> static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode,
int);
> + static tree ix86_builtin_vectorized_function (enum built_in_function,
tree);
> static const char *ix86_mangle_fundamental_type (tree);
> static tree ix86_stack_protect_fail (void);
> static rtx ix86_internal_arg_pointer (void);
> *************** static section *x86_64_elf_select_sectio
> *** 1286,1291 ****
> --- 1287,1294 ----
> #define TARGET_INIT_BUILTINS ix86_init_builtins
> #undef TARGET_EXPAND_BUILTIN
> #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
> + #undef TARGET_BUILTIN_VECTORIZED_FUNCTION
> + #define TARGET_BUILTIN_VECTORIZED_FUNCTION
ix86_builtin_vectorized_function
>
> #undef TARGET_ASM_FUNCTION_EPILOGUE
> #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
> *************** enum ix86_builtins
> *** 14775,14787 ****
> IX86_BUILTIN_MAX
> };
>
> ! #define def_builtin(MASK, NAME, TYPE, CODE) \
> ! do { \
> ! if ((MASK) & target_flags \
> ! && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
> ! add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
> ! NULL, NULL_TREE); \
> ! } while (0)
>
> /* Bits for builtin_description.flag. */
>
> --- 14778,14801 ----
> IX86_BUILTIN_MAX
> };
>
> ! /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
> ! if the target_flags include one of MASK.
> ! Returns the function decl or NULL_TREE, if the builtin was not
added. */
> !
> ! static inline tree
> ! def_builtin (int mask, const char *name, tree type, enum ix86_builtins
code)
> ! {
> ! tree decl = NULL_TREE;
> ! if (mask & target_flags
> ! && (!(mask & MASK_64BIT) || TARGET_64BIT))
> ! decl = add_builtin_function (name, type, code, BUILT_IN_MD,
> ! NULL, NULL_TREE);
> ! return decl;
> ! }
> !
> ! /* FIXME. */
> ! static tree cvtpd2dq_builtin = NULL_TREE;
> ! static tree cvtps2pi_builtin = NULL_TREE;
>
> /* Bits for builtin_description.flag. */
>
> *************** ix86_init_mmx_sse_builtins (void)
> *** 15649,15655 ****
> def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr",
> void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
> def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr",
> unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
> def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps",
> v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
> ! def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi",
> v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
> def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss",
> v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
> def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss",
> v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
> def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si",
> int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
> --- 15663,15671 ----
> def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr",
> void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
> def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr",
> unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
> def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps",
> v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
> ! cvtps2pi_builtin = def_builtin (MASK_SSE,
> "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
> ! if (cvtps2pi_builtin)
> ! DECL_IS_NOVOPS (cvtps2pi_builtin) = 1;
> def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss",
> v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
> def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss",
> v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
> def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si",
> int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
> *************** ix86_init_mmx_sse_builtins (void)
> *** 15744,15750 ****
> def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd",
> v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
> def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps",
> v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
>
> ! def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq",
> v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
> def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi",
> v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
> def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps",
> v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
> def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq",
> v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
> --- 15760,15768 ----
> def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd",
> v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
> def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps",
> v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
>
> ! cvtpd2dq_builtin = def_builtin (MASK_SSE2,
> "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
> ! if (cvtpd2dq_builtin)
> ! DECL_IS_NOVOPS (cvtpd2dq_builtin) = 1;
> def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi",
> v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
> def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps",
> v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
> def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq",
> v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
> *************** ix86_expand_builtin (tree exp, rtx targe
> *** 16778,16783 ****
> --- 16796,16841 ----
> gcc_unreachable ();
> }
>
> + /* Returns builtin for a vectorized version of the builtin function
> + with builtin function code FN in the vector type TYPE, or NULL_TREE
> + if it is not available. */
> +
> + static tree
> + ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
> + {
> + char name[20] = "__vr.._";
> + tree fntype, new_fndecl, args;
> + unsigned arity;
> + const char *bname;
> +
> + if (!TARGET_64BIT
> + || !flag_unsafe_math_optimizations)
> + return NULL_TREE;
> +
> + /* We can handle canonical vector mode argument builtins only. */
> + /* if (!VECTOR_FLOAT_TYPE_P (type)
> + || !((TYPE_MODE (TREE_TYPE (type)) == DFmode
> + && TYPE_VECTOR_SUBPARTS (type) == 2)
> + || (TYPE_MODE (TREE_TYPE (type)) == SFmode
> + && TYPE_VECTOR_SUBPARTS (type) == 4)))
> + return NULL_TREE; */
> +
> + switch (fn)
> + {
> + case BUILT_IN_LRINT:
> + case BUILT_IN_LLRINT:
> + return cvtpd2dq_builtin;
> +
> + case BUILT_IN_LRINTF:
> + case BUILT_IN_LLRINTF:
> + return cvtps2pi_builtin;
> +
> + default:
> + ;
> + }
> + return NULL_TREE;
> + }
> +
> /* Store OPERAND to the memory after reload is completed. This means
> that we can't easily use assign_stack_local. */
> rtx