[gcc r15-1727] vect: Determine input vectype for multiple lane-reducing operations

Feng Xue fxue@gcc.gnu.org
Sun Jun 30 07:07:41 GMT 2024


https://gcc.gnu.org/g:3aa004f1db327d5728a8fd0afcfed24e767f0499

commit r15-1727-g3aa004f1db327d5728a8fd0afcfed24e767f0499
Author: Feng Xue <fxue@os.amperecomputing.com>
Date:   Sun Jun 16 13:00:32 2024 +0800

    vect: Determine input vectype for multiple lane-reducing operations
    
    The input vectype of reduction PHI statement must be determined before
    vect cost computation for the reduction. Since lance-reducing operation has
    different input vectype from normal one, so we need to traverse all reduction
    statements to find out the input vectype with the least lanes, and set that to
    the PHI statement.
    
    2024-06-16 Feng Xue <fxue@os.amperecomputing.com>
    
    gcc/
            * tree-vect-loop.cc (vectorizable_reduction): Determine input vectype
            during traversal of reduction statements.

Diff:
---
 gcc/tree-vect-loop.cc | 79 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 56 insertions(+), 23 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 6f32867f85a..3095ff5ab6b 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7643,7 +7643,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
     {
       stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
       stmt_vec_info vdef = vect_stmt_to_vectorize (def);
-      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
+      int reduc_idx = STMT_VINFO_REDUC_IDX (vdef);
+
+      if (reduc_idx == -1)
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7686,10 +7688,57 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 	      return false;
 	    }
 	}
-      else if (!stmt_info)
-	/* First non-conversion stmt.  */
-	stmt_info = vdef;
-      reduc_def = op.ops[STMT_VINFO_REDUC_IDX (vdef)];
+      else
+	{
+	  /* First non-conversion stmt.  */
+	  if (!stmt_info)
+	    stmt_info = vdef;
+
+	  if (lane_reducing_op_p (op.code))
+	    {
+	      enum vect_def_type dt;
+	      tree vectype_op;
+
+	      /* The last operand of lane-reducing operation is for
+		 reduction.  */
+	      gcc_assert (reduc_idx > 0 && reduc_idx == (int) op.num_ops - 1);
+
+	      if (!vect_is_simple_use (op.ops[0], loop_vinfo, &dt, &vectype_op))
+		return false;
+	      
+	      tree type_op = TREE_TYPE (op.ops[0]);
+
+	      if (!vectype_op)
+		{	      
+		  vectype_op = get_vectype_for_scalar_type (loop_vinfo,
+							    type_op);
+		  if (!vectype_op)
+		    return false;
+		}
+
+	      /* For lane-reducing operation vectorizable analysis needs the
+		 reduction PHI information */
+	      STMT_VINFO_REDUC_DEF (def) = phi_info;
+
+	      /* Each lane-reducing operation has its own input vectype, while
+		 reduction PHI will record the input vectype with the least
+		 lanes.  */
+	      STMT_VINFO_REDUC_VECTYPE_IN (vdef) = vectype_op;
+
+	      /* To accommodate lane-reducing operations of mixed input
+		 vectypes, choose input vectype with the least lanes for the
+		 reduction PHI statement, which would result in the most
+		 ncopies for vectorized reduction results.  */
+	      if (!vectype_in
+		  || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
+		       < GET_MODE_SIZE (SCALAR_TYPE_MODE (type_op))))
+		vectype_in = vectype_op;
+	    }
+	  else
+	    vectype_in = STMT_VINFO_VECTYPE (phi_info);
+	}
+
+      reduc_def = op.ops[reduc_idx];
       reduc_chain_length++;
       if (!stmt_info && slp_node)
 	slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
@@ -7747,6 +7796,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 
   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
   STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
+  STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
+
   gimple_match_op op;
   if (!gimple_extract_op (stmt_info->stmt, &op))
     gcc_unreachable ();
@@ -7831,16 +7882,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 	  = get_vectype_for_scalar_type (loop_vinfo,
 					 TREE_TYPE (op.ops[i]), slp_op[i]);
 
-      /* To properly compute ncopies we are interested in the widest
-	 non-reduction input type in case we're looking at a widening
-	 accumulation that we later handle in vect_transform_reduction.  */
-      if (lane_reducing
-	  && vectype_op[i]
-	  && (!vectype_in
-	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
-		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_op[i]))))))
-	vectype_in = vectype_op[i];
-
       /* Record how the non-reduction-def value of COND_EXPR is defined.
 	 ???  For a chain of multiple CONDs we'd have to match them up all.  */
       if (op.code == COND_EXPR && reduc_chain_length == 1)
@@ -7859,14 +7900,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 	    }
 	}
     }
-  if (!vectype_in)
-    vectype_in = STMT_VINFO_VECTYPE (phi_info);
-  STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
-
-  /* Each lane-reducing operation has its own input vectype, while reduction
-     PHI records the input vectype with least lanes.  */
-  if (lane_reducing)
-    STMT_VINFO_REDUC_VECTYPE_IN (stmt_info) = vectype_in;
 
   enum vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (phi_info);
   STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type;


More information about the Gcc-cvs mailing list