This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, 4.6] Backport fixes for PR50031, PR50969


This patch backports the two recent trunk fixes for powerpc64
vectorization degradations.  The fixes are largely identical to their
4.7 counterparts except that (a) the logic for
STMT_VINFO_PATTERN_DEF_SEQ does not apply in 4.6, and (b) the changes to
vectorizable_conversion in 4.7 correspond to changes in
vectorizable_type_demotion and vectorizable_type_promotion in 4.6.

Bootstrapped and tested for regressions and performance for
powerpc64-linux.  OK to commit after the trunk patch has a few days of
burn-in?

Thanks,
Bill


2012-02-10  Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	    Ira Rosen <irar@il.ibm.com>

	PR tree-optimization/50031
	PR tree-optimization/50969
	* targhooks.c (default_builtin_vectorization_cost): Handle
	vec_promote_demote.
	* target.h (enum vect_cost_for_stmt): Add vec_promote_demote.
	* tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle
	all types of reduction and pattern statements.
	(vect_estimate_min_profitable_iters): Likewise.
	* tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function.
	(vect_model_store_cost): Use vec_perm rather than vector_stmt for
	statement cost.
	(vect_model_load_cost): Likewise.
	(vect_get_load_cost): Likewise; add dump logic for explicit realigns.
	(vectorizable_type_demotion): Call vect_model_promotion_demotion_cost.
	(vectorizable_type_promotion): Likewise.
	* config/spu/spu.c (spu_builtin_vectorization_cost): Handle
	vec_promote_demote.
	* config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
	* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update
	vec_perm for VSX and handle vec_promote_demote.


Index: gcc/targhooks.c
===================================================================
--- gcc/targhooks.c	(revision 184047)
+++ gcc/targhooks.c	(working copy)
@@ -529,6 +529,7 @@ default_builtin_vectorization_cost (enum vect_cost
       case scalar_to_vec:
       case cond_branch_not_taken:
       case vec_perm:
+      case vec_promote_demote:
         return 1;
 
       case unaligned_load:
Index: gcc/target.h
===================================================================
--- gcc/target.h	(revision 184047)
+++ gcc/target.h	(working copy)
@@ -128,7 +128,8 @@ enum vect_cost_for_stmt
   scalar_to_vec,
   cond_branch_not_taken,
   cond_branch_taken,
-  vec_perm
+  vec_perm,
+  vec_promote_demote
 };
 
 /* Sets of optimization levels at which an option may be enabled by
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 184047)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -2104,7 +2104,8 @@ vect_get_single_scalar_iteraion_cost (loop_vec_inf
           if (stmt_info
               && !STMT_VINFO_RELEVANT_P (stmt_info)
               && (!STMT_VINFO_LIVE_P (stmt_info)
-                  || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
             continue;
 
           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
@@ -2251,11 +2252,19 @@ vect_estimate_min_profitable_iters (loop_vec_info
 	{
 	  gimple stmt = gsi_stmt (si);
 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+	    {
+	      stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+	      stmt_info = vinfo_for_stmt (stmt);
+	    }
+
 	  /* Skip stmts that are not vectorized inside the loop.  */
 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
 	      && (!STMT_VINFO_LIVE_P (stmt_info)
-		  || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+		  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
 	    continue;
+
 	  vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
 	  /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
 	     some of the "outside" costs are generated inside the outer-loop.  */
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 184047)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -623,6 +623,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, i
 }
 
 
+/* Model cost for type demotion and promotion operations.  PWR is normally
+   zero for single-step promotions and demotions.  It will be one if 
+   two-step promotion/demotion is required, and so on.  Each additional
+   step doubles the number of instructions required.  */
+
+static void
+vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
+				    enum vect_def_type *dt, int pwr)
+{
+  int i, tmp;
+  int inside_cost = 0, outside_cost = 0, single_stmt_cost;
+
+  /* The SLP costs were already calculated during SLP tree build.  */
+  if (PURE_SLP_STMT (stmt_info))
+    return;
+
+  single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
+  for (i = 0; i < pwr + 1; i++)
+    {
+      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
+	(i + 1) : i;
+      inside_cost += vect_pow2 (tmp) * single_stmt_cost;
+    }
+
+  /* FORNOW: Assuming maximum 2 args per stmts.  */
+  for (i = 0; i < 2; i++)
+    {
+      if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
+        outside_cost += vect_get_stmt_cost (vector_stmt);
+    }
+
+  if (vect_print_dump_info (REPORT_COST))
+    fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
+             "outside_cost = %d .", inside_cost, outside_cost);
+
+  /* Set the costs in STMT_INFO.  */
+  stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
+  stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
+}
+
 /* Function vect_cost_strided_group_size
 
    For strided load or store, return the group_size only if it is the first
@@ -691,7 +731,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, in
     {
       /* Uses a high and low interleave operation for each needed permute.  */
       inside_cost = ncopies * exact_log2(group_size) * group_size
-        * vect_get_stmt_cost (vector_stmt);
+        * vect_get_stmt_cost (vec_perm);
 
       if (vect_print_dump_info (REPORT_COST))
         fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
@@ -795,7 +835,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int
     {
       /* Uses an even and odd extract operations for each needed permute.  */
       inside_cost = ncopies * exact_log2(group_size) * group_size
-	* vect_get_stmt_cost (vector_stmt);
+	* vect_get_stmt_cost (vec_perm);
 
       if (vect_print_dump_info (REPORT_COST))
         fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
@@ -855,7 +895,7 @@ vect_get_load_cost (struct data_reference *dr, int
     case dr_explicit_realign:
       {
         *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
-           + vect_get_stmt_cost (vector_stmt));
+				   + vect_get_stmt_cost (vec_perm));
 
         /* FIXME: If the misalignment remains fixed across the iterations of
            the containing loop, the following cost should be added to the
@@ -863,6 +903,9 @@ vect_get_load_cost (struct data_reference *dr, int
         if (targetm.vectorize.builtin_mask_for_load)
           *inside_cost += vect_get_stmt_cost (vector_stmt);
 
+        if (vect_print_dump_info (REPORT_COST))
+          fprintf (vect_dump, "vect_model_load_cost: explicit realign");
+
         break;
       }
     case dr_explicit_realign_optimized:
@@ -886,7 +929,12 @@ vect_get_load_cost (struct data_reference *dr, int
           }
 
         *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
-          + vect_get_stmt_cost (vector_stmt));
+				   + vect_get_stmt_cost (vec_perm));
+
+        if (vect_print_dump_info (REPORT_COST))
+          fprintf (vect_dump,
+		   "vect_model_load_cost: explicit realign optimized");
+
         break;
       }
 
@@ -2919,7 +2967,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
       STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "=== vectorizable_demotion ===");
-      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+      vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
       return true;
     }
 
@@ -3217,7 +3265,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
       STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "=== vectorizable_promotion ===");
-      vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
+      vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
       return true;
     }
 
Index: gcc/config/spu/spu.c
===================================================================
--- gcc/config/spu/spu.c	(revision 184047)
+++ gcc/config/spu/spu.c	(working copy)
@@ -6794,6 +6794,7 @@ spu_builtin_vectorization_cost (enum vect_cost_for
       case scalar_to_vec:
       case cond_branch_not_taken:
       case vec_perm:
+      case vec_promote_demote:
         return 1;
 
       case scalar_store:
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 184047)
+++ gcc/config/i386/i386.c	(working copy)
@@ -32816,7 +32816,8 @@ ix86_builtin_vectorization_cost (enum vect_cost_fo
         return ix86_cost->cond_not_taken_branch_cost;
 
       case vec_perm:
-        return 1;
+      case vec_promote_demote:
+        return ix86_cost->vec_stmt_cost;
 
       default:
         gcc_unreachable ();
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 184047)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -3695,12 +3695,23 @@ rs6000_builtin_vectorization_cost (enum vect_cost_
       case vec_to_scalar:
       case scalar_to_vec:
       case cond_branch_not_taken:
-      case vec_perm:
         return 1;
 
       case cond_branch_taken:
         return 3;
 
+      case vec_perm:
+	if (TARGET_VSX)
+	  return 4;
+	else
+	  return 1;
+
+      case vec_promote_demote:
+	if (TARGET_VSX)
+	  return 5;
+	else
+	  return 1;
+
       case unaligned_load:
         if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
           {



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]