This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][RFC] Do some vectorizer-friendly canonicalization before vectorization


On Thu, 23 Nov 2006, Richard Guenther wrote:

> On Thu, 23 Nov 2006, Dorit Nuzman wrote:
> 
> > Richard Guenther <rguenther@suse.de> wrote on 23/11/2006 12:39:40:
> > 
> > > The following passes for me on x86_64-unknown-linux-gnu.  Can you check
> > > it works on ppc?
> > >
> Ok, so consider the pow (x, 0.5) pattern matching removed until the
> function vectorizing is approved (where we then can check for support).

Or try this one - it adds this capability.

Richard.

Index: targhooks.c
===================================================================
*** targhooks.c	(revision 119115)
--- targhooks.c	(working copy)
*************** default_invalid_within_doloop (rtx insn)
*** 319,324 ****
--- 319,333 ----
    return NULL;
  }
  
+ /* Mapping of builtin functions to vectorized variants.  */
+ 
+ tree
+ default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED,
+ 				     tree type ATTRIBUTE_UNUSED)
+ {
+   return NULL_TREE;
+ }
+ 
  bool
  hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
  	CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
Index: targhooks.h
===================================================================
*** targhooks.h	(revision 119115)
--- targhooks.h	(working copy)
*************** extern const char * default_invalid_with
*** 57,62 ****
--- 57,64 ----
  
  extern bool default_narrow_bitfield (void);
  
+ extern tree default_builtin_vectorized_function (enum built_in_function, tree);
+ 
  /* These are here, and not in hooks.[ch], because not all users of
     hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */
  
Index: target.h
===================================================================
*** target.h	(revision 119115)
--- target.h	(working copy)
*************** struct gcc_target
*** 370,375 ****
--- 370,379 ----
         function.  */
      tree (* builtin_mask_for_load) (void);
  
+     /* Returns a code for builtin that realizes vectorized version of
+        function, or NULL_TREE if not available.  */
+     tree (* builtin_vectorized_function) (unsigned, tree);
+ 
      /* Target builtin that implements vector widening multiplication.
         builtin_mul_widen_eve computes the element-by-element products 
         for the even elements, and builtin_mul_widen_odd computes the
Index: testsuite/gcc.dg/vect/vect-pow-1.c
===================================================================
*** testsuite/gcc.dg/vect/vect-pow-1.c	(revision 119115)
--- testsuite/gcc.dg/vect/vect-pow-1.c	(working copy)
***************
*** 1,14 ****
- /* { dg-do compile } */
- /* { dg-options "-O2 -ftree-vectorize -ffast-math -fdump-tree-vect-details" } */
- 
- double x[256];
- 
- void foo(void)
- {
-   int i;
-   for (i=0; i<256; ++i)
-     x[i] = x[i] * x[i];
- }
- 
- /* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
- /* { dg-final { cleanup-tree-dump "vect" } } */
--- 0 ----
Index: testsuite/gcc.dg/vect/fast-math-vect-pow-1.c
===================================================================
*** testsuite/gcc.dg/vect/fast-math-vect-pow-1.c	(revision 119115)
--- testsuite/gcc.dg/vect/fast-math-vect-pow-1.c	(working copy)
***************
*** 1,7 ****
  /* { dg-do compile } */
! /* { dg-options "-O2 -ftree-vectorize -ffast-math -fdump-tree-vect-details" } */
  
! double x[256];
  
  void foo(void)
  {
--- 1,7 ----
  /* { dg-do compile } */
! /* { dg-require-effective-target vect_float } */
  
! float x[256];
  
  void foo(void)
  {
*************** void foo(void)
*** 10,14 ****
      x[i] = x[i] * x[i];
  }
  
! /* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 10,14 ----
      x[i] = x[i] * x[i];
  }
  
! /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-pow-2.c
===================================================================
*** testsuite/gcc.dg/vect/vect-pow-2.c	(revision 119115)
--- testsuite/gcc.dg/vect/vect-pow-2.c	(working copy)
***************
*** 1,14 ****
- /* { dg-do compile } */
- /* { dg-options "-O2 -ftree-vectorize -fno-math-errno -fdump-tree-vect-details" } */
- 
- double x[256];
- 
- void foo(void)
- {
-   int i;
-   for (i=0; i<256; ++i)
-     x[i] = __builtin_pow (x[i], 0.5);
- }
- 
- /* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
- /* { dg-final { cleanup-tree-dump "vect" } } */
--- 0 ----
Index: testsuite/gcc.dg/vect/vect.exp
===================================================================
*** testsuite/gcc.dg/vect/vect.exp	(revision 119115)
--- testsuite/gcc.dg/vect/vect.exp	(working copy)
*************** lappend DEFAULT_VECTCFLAGS "-ffast-math"
*** 97,102 ****
--- 97,108 ----
  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-vect*.\[cS\]]]  \
  	"" $DEFAULT_VECTCFLAGS
  
+ # -fno-math-errno tests
+ set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+ lappend DEFAULT_VECTCFLAGS "-fno-math-errno"
+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-math-errno-vect*.\[cS\]]]  \
+ 	"" $DEFAULT_VECTCFLAGS
+ 
  # -fwrapv tests
  set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
  lappend DEFAULT_VECTCFLAGS "-fwrapv"
Index: testsuite/gcc.dg/vect/no-math-errno-vect-pow-1.c
===================================================================
*** testsuite/gcc.dg/vect/no-math-errno-vect-pow-1.c	(revision 119115)
--- testsuite/gcc.dg/vect/no-math-errno-vect-pow-1.c	(working copy)
***************
*** 1,13 ****
  /* { dg-do compile } */
! /* { dg-options "-O2 -ftree-vectorize -fno-math-errno -fdump-tree-vect-details" } */
  
! double x[256];
  
  void foo(void)
  {
    int i;
    for (i=0; i<256; ++i)
!     x[i] = __builtin_pow (x[i], 0.5);
  }
  
  /* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
--- 1,13 ----
  /* { dg-do compile } */
! /* { dg-require-effective-target vect_float } */
  
! float x[256];
  
  void foo(void)
  {
    int i;
    for (i=0; i<256; ++i)
!     x[i] = __builtin_powf (x[i], 0.5);
  }
  
  /* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
Index: tree-vectorizer.h
===================================================================
*** tree-vectorizer.h	(revision 119115)
--- tree-vectorizer.h	(working copy)
*************** enum stmt_vec_info_type {
*** 163,168 ****
--- 163,169 ----
    load_vec_info_type,
    store_vec_info_type,
    op_vec_info_type,
+   call_vec_info_type,
    assignment_vec_info_type,
    condition_vec_info_type,
    reduc_vec_info_type,
*************** extern bool vectorizable_operation (tree
*** 397,402 ****
--- 398,405 ----
  extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
+ extern bool vectorizable_function (tree, tree);
+ extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);
  extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *);
Index: tree-vect-analyze.c
===================================================================
*** tree-vect-analyze.c	(revision 119115)
--- tree-vect-analyze.c	(working copy)
*************** vect_analyze_operations (loop_vec_info l
*** 301,306 ****
--- 301,307 ----
  		    || vectorizable_operation (stmt, NULL, NULL)
  		    || vectorizable_assignment (stmt, NULL, NULL)
  		    || vectorizable_load (stmt, NULL, NULL)
+ 		    || vectorizable_call (stmt, NULL, NULL)
  		    || vectorizable_store (stmt, NULL, NULL)
  		    || vectorizable_condition (stmt, NULL, NULL));
  
Index: tree-vect-patterns.c
===================================================================
*** tree-vect-patterns.c	(revision 119115)
--- tree-vect-patterns.c	(working copy)
*************** vect_recog_pow_pattern (tree last_stmt, 
*** 466,472 ****
    /* We now have a pow or powi builtin function call with a constant
       exponent.  */
  
-   *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
    *type_out = NULL_TREE;
  
    /* Catch squaring.  */
--- 466,471 ----
*************** vect_recog_pow_pattern (tree last_stmt, 
*** 474,480 ****
         && tree_low_cst (exp, 0) == 2)
        || (TREE_CODE (exp) == REAL_CST
            && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
!     return build2 (MULT_EXPR, TREE_TYPE (base), base, base);
  
    /* Catch square root.  */
    if (TREE_CODE (exp) == REAL_CST
--- 473,482 ----
         && tree_low_cst (exp, 0) == 2)
        || (TREE_CODE (exp) == REAL_CST
            && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
!     {
!       *type_in = TREE_TYPE (base);
!       return build2 (MULT_EXPR, TREE_TYPE (base), base, base);
!     }
  
    /* Catch square root.  */
    if (TREE_CODE (exp) == REAL_CST
*************** vect_recog_pow_pattern (tree last_stmt, 
*** 482,488 ****
      {
        tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
        tree newarglist = build_tree_list (NULL_TREE, base);
!       return build_function_call_expr (newfn, newarglist);
      }
  
    return NULL_TREE;
--- 484,496 ----
      {
        tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
        tree newarglist = build_tree_list (NULL_TREE, base);
!       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
!       if (*type_in)
! 	{
! 	  newfn = build_function_call_expr (newfn, newarglist);
! 	  if (vectorizable_function (newfn, *type_in))
! 	    return newfn;
! 	}
      }
  
    return NULL_TREE;
Index: target-def.h
===================================================================
*** target-def.h	(revision 119115)
--- target-def.h	(working copy)
*************** Foundation, 51 Franklin Street, Fifth Fl
*** 332,344 ****
     TARGET_SCHED_SET_SCHED_FLAGS}
  
  #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
  
  #define TARGET_VECTORIZE                                                \
!   {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD,				\
!    TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,                             \
!    TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD}
  
  #define TARGET_DEFAULT_TARGET_FLAGS 0
  
--- 332,348 ----
     TARGET_SCHED_SET_SCHED_FLAGS}
  
  #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
+ #define TARGET_BUILTIN_VECTORIZED_FUNCTION default_builtin_vectorized_function
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
  
  #define TARGET_VECTORIZE                                                \
!   {									\
!     TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD,				\
!     TARGET_BUILTIN_VECTORIZED_FUNCTION,					\
!     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,                            \
!     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD				\
!   }
  
  #define TARGET_DEFAULT_TARGET_FLAGS 0
  
Index: tree-vect-transform.c
===================================================================
*** tree-vect-transform.c	(revision 119115)
--- tree-vect-transform.c	(working copy)
*************** vectorizable_reduction (tree stmt, block
*** 1561,1566 ****
--- 1561,1712 ----
    return true;
  }
  
+ /* Checks if CALL can be vectorized in type VECTYPE.  Returns
+    true if the target has a vectorized version of the function,
+    or false if the function cannot be vectorized.  */
+ 
+ bool
+ vectorizable_function (tree call, tree vectype)
+ {
+   tree fndecl = get_callee_fndecl (call);
+ 
+   /* We only handle functions that do not read or clobber memory -- i.e.
+      const or novops ones.  */
+   if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
+     return false;
+ 
+   if (!fndecl
+       || TREE_CODE (fndecl) != FUNCTION_DECL
+       || !DECL_BUILT_IN (fndecl))
+     return false;
+ 
+   if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
+     return true;
+ 
+   return false;
+ }
+ 
+ /* Returns an expression that performs a call to vectorized version
+    of FNDECL in type VECTYPE, with the arguments given by ARGS.
+    If extra statements need to be generated, they are inserted
+    before BSI.  */
+ 
+ static tree
+ build_vectorized_function_call (tree fndecl,
+ 				tree vectype, tree args)
+ {
+   tree vfndecl;
+   enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
+ 
+   /* The target specific builtin should be available.  */
+   vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
+   gcc_assert (vfndecl != NULL_TREE);
+ 
+   return build_function_call_expr (vfndecl, args);
+ }
+ 
+ /* Function vectorizable_call.
+ 
+    Check if STMT performs a function call that can be vectorized. 
+    If VEC_STMT is also passed, vectorize the STMT: create a vectorized 
+    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
+ 
+ bool
+ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
+ {
+   tree vec_dest;
+   tree scalar_dest;
+   tree operation;
+   tree op, args, type;
+   tree vec_oprnd, vargs, *pvargs_end;
+   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+   tree fndecl, rhs, new_temp, def, def_stmt;
+   enum vect_def_type dt;
+ 
+   /* Is STMT a vectorizable call?   */
+   if (TREE_CODE (stmt) != MODIFY_EXPR)
+     return false;
+ 
+   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
+     return false;
+ 
+   operation = TREE_OPERAND (stmt, 1);
+   if (TREE_CODE (operation) != CALL_EXPR)
+     return false;
+    
+   /* For now, we only vectorize functions if a target specific builtin
+      is available.  TODO -- in some cases, it might be profitable to
+      insert the calls for pieces of the vector, in order to be able
+      to vectorize other operations in the loop.  */
+   if (!vectorizable_function (operation, vectype))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+ 	fprintf (vect_dump, "function is not vectorizable.");
+ 
+       return false;
+     }
+   gcc_assert (!stmt_references_memory_p (stmt));
+ 
+   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+     {
+       op = TREE_VALUE (args);
+ 
+       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+ 	{
+ 	  if (vect_print_dump_info (REPORT_DETAILS))
+ 	    fprintf (vect_dump, "use not simple.");
+ 	  return false;
+ 	}
+     }
+ 
+   if (!vec_stmt) /* transformation not required.  */
+     {
+       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+       return true;
+     }
+ 
+   /** Transform.  **/
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "transform operation.");
+ 
+   /* Handle def.  */
+   scalar_dest = TREE_OPERAND (stmt, 0);
+   vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ 
+   /* Handle uses.  */
+   vargs = NULL_TREE;
+   pvargs_end = &vargs;
+   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+     {
+       op = TREE_VALUE (args);
+       vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
+ 	  
+       *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
+       pvargs_end = &TREE_CHAIN (*pvargs_end);
+     }
+ 
+   fndecl = get_callee_fndecl (operation);
+   rhs = build_vectorized_function_call (fndecl, vectype, vargs);
+   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, rhs);
+   new_temp = make_ssa_name (vec_dest, *vec_stmt);
+   TREE_OPERAND (*vec_stmt, 0) = new_temp;
+ 
+   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+ 
+   /* The call in STMT might prevent it from being removed in dce.  We however
+      cannot remove it here, due to the way the ssa name it defines is mapped
+      to the new definition.  So just replace rhs of the statement with something
+      harmless.  */
+   type = TREE_TYPE (scalar_dest);
+   TREE_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
+ 
+   return true;
+ }
+ 
  
  /* Function vectorizable_assignment.
  
*************** vect_transform_stmt (tree stmt, block_st
*** 3713,3718 ****
--- 3859,3868 ----
  	gcc_assert (done);
  	break;
  
+       case call_vec_info_type:
+ 	done = vectorizable_call (stmt, bsi, &vec_stmt);
+ 	break;
+ 
        default:
  	if (vect_print_dump_info (REPORT_DETAILS))
  	  fprintf (vect_dump, "stmt not supported.");


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]