[PATCH] (2/3) Add vectorization of builtin functions

Richard Guenther rguenther@suse.de
Fri Nov 17 14:44:00 GMT 2006


On Thu, 16 Nov 2006, Dorit Nuzman wrote:

> >
> > This adds vectorization of builtin functions to the vectorizer.  It does
> 
> looks good to me...

This patch^Whack on top of it enables vectorization of lrint.

double foo[1024];
long bar[1024];

void foobar(void)
{
  int i;
  for (i=0; i<1024; ++i)
    bar[i] = __builtin_lrint (foo[i]);
}

foobar:
.LFB2:
        cvtpd2dq        foo(%rip), %xmm0
        movdqa  %xmm0, bar(%rip)
        movl    $16, %eax
        .p2align 4,,7
.L2:
        cvtpd2dq        foo(%rax), %xmm0
        movdqa  %xmm0, bar(%rax)
        addq    $16, %rax
        cmpq    $8192, %rax
        jne     .L2
        rep ; ret


just as an example - the x86 builtins need to survive in an array
to be accessible later.  We also need to address that most of the
x86 SSE builtins lack const or pure attributes (so the DECL_NOVOPS
hack ontop of the keep the decl hack ;)).

Just FYI...  (we might be able to avoid too many new tree-codes by
resorting to builtins where appropriate)

Richard.

Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 118927)
--- config/i386/i386.c	(working copy)
*************** static bool ix86_pass_by_reference (CUMU
*** 1222,1227 ****
--- 1222,1228 ----
  				    tree, bool);
  static void ix86_init_builtins (void);
  static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+ static tree ix86_builtin_vectorized_function (enum built_in_function, tree);
  static const char *ix86_mangle_fundamental_type (tree);
  static tree ix86_stack_protect_fail (void);
  static rtx ix86_internal_arg_pointer (void);
*************** static section *x86_64_elf_select_sectio
*** 1286,1291 ****
--- 1287,1294 ----
  #define TARGET_INIT_BUILTINS ix86_init_builtins
  #undef TARGET_EXPAND_BUILTIN
  #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+ #undef TARGET_BUILTIN_VECTORIZED_FUNCTION
+ #define TARGET_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
  
  #undef TARGET_ASM_FUNCTION_EPILOGUE
  #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
*************** enum ix86_builtins
*** 14775,14787 ****
    IX86_BUILTIN_MAX
  };
  
! #define def_builtin(MASK, NAME, TYPE, CODE)				\
! do {									\
!   if ((MASK) & target_flags						\
!       && (!((MASK) & MASK_64BIT) || TARGET_64BIT))			\
!     add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
! 			 NULL, NULL_TREE);			\
! } while (0)
  
  /* Bits for builtin_description.flag.  */
  
--- 14778,14801 ----
    IX86_BUILTIN_MAX
  };
  
! /* Add a ix86 target builtin function with CODE, NAME and TYPE.  Do so,
!    if the target_flags include one of MASK.
!    Returns the function decl or NULL_TREE, if the builtin was not added.  */
! 
! static inline tree
! def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
! {
!   tree decl = NULL_TREE;
!   if (mask & target_flags
!       && (!(mask & MASK_64BIT) || TARGET_64BIT))
!     decl = add_builtin_function (name, type, code, BUILT_IN_MD,
! 				 NULL, NULL_TREE);
!   return decl;
! }
! 
! /* FIXME.  */
! static tree cvtpd2dq_builtin = NULL_TREE;
! static tree cvtps2pi_builtin = NULL_TREE;
  
  /* Bits for builtin_description.flag.  */
  
*************** ix86_init_mmx_sse_builtins (void)
*** 15649,15655 ****
    def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
    def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
    def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
!   def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
    def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
    def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
    def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
--- 15663,15671 ----
    def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
    def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
    def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
!   cvtps2pi_builtin = def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
!   if (cvtps2pi_builtin)
!     DECL_IS_NOVOPS (cvtps2pi_builtin) = 1;
    def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
    def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
    def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
*************** ix86_init_mmx_sse_builtins (void)
*** 15744,15750 ****
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
  
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
--- 15760,15768 ----
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
  
!   cvtpd2dq_builtin = def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
!   if (cvtpd2dq_builtin)
!     DECL_IS_NOVOPS (cvtpd2dq_builtin) = 1;
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
*************** ix86_expand_builtin (tree exp, rtx targe
*** 16778,16783 ****
--- 16796,16841 ----
    gcc_unreachable ();
  }
  
+ /* Returns builtin for a vectorized version of the builtin function
+    with builtin function code FN in the vector type TYPE, or NULL_TREE
+    if it is not available.  */
+ 
+ static tree
+ ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
+ {
+   char name[20] = "__vr.._";
+   tree fntype, new_fndecl, args;
+   unsigned arity;
+   const char *bname;
+ 
+   if (!TARGET_64BIT
+       || !flag_unsafe_math_optimizations)
+     return NULL_TREE;
+ 
+   /* We can handle canonical vector mode argument builtins only.  */
+ /*  if (!VECTOR_FLOAT_TYPE_P (type)
+       || !((TYPE_MODE (TREE_TYPE (type)) == DFmode
+ 	    &&  TYPE_VECTOR_SUBPARTS (type) == 2)
+ 	   || (TYPE_MODE (TREE_TYPE (type)) == SFmode
+ 	       &&  TYPE_VECTOR_SUBPARTS (type) == 4)))
+     return NULL_TREE; */
+ 
+   switch (fn)
+     {
+     case BUILT_IN_LRINT:
+     case BUILT_IN_LLRINT:
+       return cvtpd2dq_builtin;
+ 
+     case BUILT_IN_LRINTF:
+     case BUILT_IN_LLRINTF:
+       return cvtps2pi_builtin;
+ 
+     default:
+       ;
+     }
+   return NULL_TREE;
+ }
+ 
  /* Store OPERAND to the memory after reload is completed.  This means
     that we can't easily use assign_stack_local.  */
  rtx



More information about the Gcc-patches mailing list