[committed] openmp: Improve composite triangular loop lowering and expansion

Jakub Jelinek jakub@redhat.com
Tue Oct 13 07:35:44 GMT 2020


Hi!

This propagates needed values from the point where number of iterations
is calculated on composite loops to the places where that information
is needed to use the more efficient square root discovery to compute
the starting iterator values from the logical iteration number.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2020-10-13  Jakub Jelinek  <jakub@redhat.com>

	* omp-low.c (add_taskreg_looptemp_clauses): For triangular loops
	with non-constant number of iterations add another 4 _looptemp_
	clauses before the (optional) one for lastprivate.
	(lower_omp_for_lastprivate): Skip those clauses when looking for
	the lastprivate clause.
	(lower_omp_for): For triangular loops with non-constant number of
	iterations add another 4 _looptemp_ clauses.
	* omp-expand.c (expand_omp_for_init_counts): For triangular loops
	with non-constant number of iterations set counts[0],
	fd->first_inner_iterations, fd->factor and fd->adjn1 from the newly
	added _looptemp_ clauses.
	(expand_omp_for_init_vars): Initialize the newly added _looptemp_
	clauses.
	(find_lastprivate_looptemp): New function.
	(expand_omp_for_static_nochunk, expand_omp_for_static_chunk,
	expand_omp_taskloop_for_outer): Use it instead of manually skipping
	_looptemp_ clauses.

--- gcc/omp-low.c.jj	2020-10-08 11:10:24.109546260 +0200
+++ gcc/omp-low.c	2020-10-12 19:43:10.670666202 +0200
@@ -1919,12 +1919,38 @@ add_taskreg_looptemp_clauses (enum gf_ma
 	     GIMPLE_OMP_FOR, add one more temporaries for the total number
 	     of iterations (product of count1 ... countN-1).  */
 	  if (omp_find_clause (gimple_omp_for_clauses (for_stmt),
-			       OMP_CLAUSE_LASTPRIVATE))
-	    count++;
-	  else if (msk == GF_OMP_FOR_KIND_FOR
-		   && omp_find_clause (gimple_omp_parallel_clauses (stmt),
-				       OMP_CLAUSE_LASTPRIVATE))
-	    count++;
+			       OMP_CLAUSE_LASTPRIVATE)
+	      || (msk == GF_OMP_FOR_KIND_FOR
+		  && omp_find_clause (gimple_omp_parallel_clauses (stmt),
+				      OMP_CLAUSE_LASTPRIVATE)))
+	    {
+	      tree temp = create_tmp_var (type);
+	      tree c = build_omp_clause (UNKNOWN_LOCATION,
+					 OMP_CLAUSE__LOOPTEMP_);
+	      insert_decl_map (&outer_ctx->cb, temp, temp);
+	      OMP_CLAUSE_DECL (c) = temp;
+	      OMP_CLAUSE_CHAIN (c) = gimple_omp_taskreg_clauses (stmt);
+	      gimple_omp_taskreg_set_clauses (stmt, c);
+	    }
+	  if (fd.non_rect
+	      && fd.last_nonrect == fd.first_nonrect + 1)
+	    if (tree v = gimple_omp_for_index (for_stmt, fd.last_nonrect))
+	      if (!TYPE_UNSIGNED (TREE_TYPE (v)))
+		{
+		  v = gimple_omp_for_index (for_stmt, fd.first_nonrect);
+		  tree type2 = TREE_TYPE (v);
+		  count++;
+		  for (i = 0; i < 3; i++)
+		    {
+		      tree temp = create_tmp_var (type2);
+		      tree c = build_omp_clause (UNKNOWN_LOCATION,
+						 OMP_CLAUSE__LOOPTEMP_);
+		      insert_decl_map (&outer_ctx->cb, temp, temp);
+		      OMP_CLAUSE_DECL (c) = temp;
+		      OMP_CLAUSE_CHAIN (c) = gimple_omp_taskreg_clauses (stmt);
+		      gimple_omp_taskreg_set_clauses (stmt, c);
+		    }
+		}
 	}
       for (i = 0; i < count; i++)
 	{
@@ -9530,7 +9556,13 @@ lower_omp_for_lastprivate (struct omp_fo
 	  tree innerc = omp_find_clause (taskreg_clauses,
 					 OMP_CLAUSE__LOOPTEMP_);
 	  gcc_assert (innerc);
-	  for (i = 0; i < fd->collapse; i++)
+	  int count = fd->collapse;
+	  if (fd->non_rect
+	      && fd->last_nonrect == fd->first_nonrect + 1)
+	    if (tree v = gimple_omp_for_index (fd->for_stmt, fd->last_nonrect))
+	      if (!TYPE_UNSIGNED (TREE_TYPE (v)))
+		count += 4;
+	  for (i = 0; i < count; i++)
 	    {
 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
 					OMP_CLAUSE__LOOPTEMP_);
@@ -10453,12 +10485,26 @@ lower_omp_for (gimple_stmt_iterator *gsi
       if (fd.collapse > 1
 	  && TREE_CODE (fd.loop.n2) != INTEGER_CST)
 	count += fd.collapse - 1;
+      size_t count2 = 0;
+      tree type2 = NULL_TREE;
       bool taskreg_for
 	= (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR
 	   || gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP);
       tree outerc = NULL, *pc = gimple_omp_for_clauses_ptr (stmt);
       tree simtc = NULL;
       tree clauses = *pc;
+      if (fd.collapse > 1
+	  && fd.non_rect
+	  && fd.last_nonrect == fd.first_nonrect + 1
+	  && TREE_CODE (fd.loop.n2) != INTEGER_CST)
+	if (tree v = gimple_omp_for_index (stmt, fd.last_nonrect))
+	  if (!TYPE_UNSIGNED (TREE_TYPE (v)))
+	    {
+	      v = gimple_omp_for_index (stmt, fd.first_nonrect);
+	      type2 = TREE_TYPE (v);
+	      count++;
+	      count2 = 3;
+	    }
       if (taskreg_for)
 	outerc
 	  = omp_find_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt),
@@ -10466,7 +10512,7 @@ lower_omp_for (gimple_stmt_iterator *gsi
       if (ctx->simt_stmt)
 	simtc = omp_find_clause (gimple_omp_for_clauses (ctx->simt_stmt),
 				 OMP_CLAUSE__LOOPTEMP_);
-      for (i = 0; i < count; i++)
+      for (i = 0; i < count + count2; i++)
 	{
 	  tree temp;
 	  if (taskreg_for)
@@ -10485,7 +10531,7 @@ lower_omp_for (gimple_stmt_iterator *gsi
 	      if (ctx->simt_stmt)
 		temp = OMP_CLAUSE_DECL (simtc);
 	      else
-		temp = create_tmp_var (type);
+		temp = create_tmp_var (i >= count ? type2 : type);
 	      insert_decl_map (&ctx->outer->cb, temp, temp);
 	    }
 	  *pc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
--- gcc/omp-expand.c.jj	2020-10-07 10:49:28.345534230 +0200
+++ gcc/omp-expand.c	2020-10-12 19:21:48.942242673 +0200
@@ -1790,6 +1790,23 @@ expand_omp_for_init_counts (struct omp_f
 	  else
 	    counts[0] = NULL_TREE;
 	}
+      if (fd->non_rect
+	  && fd->last_nonrect == fd->first_nonrect + 1
+	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+	{
+	  tree c[4];
+	  for (i = 0; i < 4; i++)
+	    {
+	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+					OMP_CLAUSE__LOOPTEMP_);
+	      gcc_assert (innerc);
+	      c[i] = OMP_CLAUSE_DECL (innerc);
+	    }
+	  counts[0] = c[0];
+	  fd->first_inner_iterations = c[1];
+	  fd->factor = c[2];
+	  fd->adjn1 = c[3];
+	}
       return;
     }
 
@@ -2434,7 +2451,12 @@ expand_omp_for_init_vars (struct omp_for
 	 use it.  */
       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
       gcc_assert (innerc);
-      for (i = 0; i < fd->collapse; i++)
+      int count = 0;
+      if (fd->non_rect
+	  && fd->last_nonrect == fd->first_nonrect + 1
+	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+	count = 4;
+      for (i = 0; i < fd->collapse + count; i++)
 	{
 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
 				    OMP_CLAUSE__LOOPTEMP_);
@@ -2442,7 +2464,19 @@ expand_omp_for_init_vars (struct omp_for
 	  if (i)
 	    {
 	      tree tem = OMP_CLAUSE_DECL (innerc);
-	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
+	      tree t;
+	      if (i < fd->collapse)
+		t = counts[i];
+	      else
+		switch (i - fd->collapse)
+		  {
+		  case 0: t = counts[0]; break;
+		  case 1: t = fd->first_inner_iterations; break;
+		  case 2: t = fd->factor; break;
+		  case 3: t = fd->adjn1; break;
+		  default: gcc_unreachable ();
+		  }
+	      t = fold_convert (TREE_TYPE (tem), t);
 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
 					    false, GSI_CONTINUE_LINKING);
 	      gassign *stmt = gimple_build_assign (tem, t);
@@ -2478,10 +2512,7 @@ expand_omp_for_init_vars (struct omp_for
 	  basic_block bb_triang = NULL, bb_triang_dom = NULL;
 	  if (fd->first_nonrect + 1 == fd->last_nonrect
 	      && (TREE_CODE (fd->loop.n2) == INTEGER_CST
-		  || (fd->first_inner_iterations
-		      /* For now.  Later add clauses to propagate the
-			 values.  */
-		      && !gimple_omp_for_combined_into_p (fd->for_stmt)))
+		  || fd->first_inner_iterations)
 	      && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
 		  != CODE_FOR_nothing))
 	    {
@@ -4641,6 +4672,35 @@ expand_omp_scantemp_alloc (tree clauses,
     return ptr;
 }
 
+/* Return the last _looptemp_ clause if one has been created for
+   lastprivate on distribute parallel for{, simd} or taskloop.
+   FD is the loop data and INNERC should be the second _looptemp_
+   clause (the one holding the end of the range).
+   This is followed by collapse - 1 _looptemp_ clauses for the
+   counts[1] and up, and for triangular loops followed by 4
+   further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
+   one factor and one adjn1).  After this there is optionally one
+   _looptemp_ clause that this function returns.  */
+
+static tree
+find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
+{
+  gcc_assert (innerc);
+  int count = fd->collapse - 1;
+  if (fd->non_rect
+      && fd->last_nonrect == fd->first_nonrect + 1
+      && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+    count += 4;
+  for (int i = 0; i < count; i++)
+    {
+      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+				OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+    }
+  return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+			  OMP_CLAUSE__LOOPTEMP_);
+}
+
 /* A subroutine of expand_omp_for.  Generate code for a parallel
    loop with static schedule and no specified chunk size.  Given
    parameters:
@@ -5065,15 +5125,7 @@ expand_omp_for_static_nochunk (struct om
       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
 	{
-	  int i;
-	  for (i = 1; i < fd->collapse; i++)
-	    {
-	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-					OMP_CLAUSE__LOOPTEMP_);
-	      gcc_assert (innerc);
-	    }
-	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-				    OMP_CLAUSE__LOOPTEMP_);
+	  innerc = find_lastprivate_looptemp (fd, innerc);
 	  if (innerc)
 	    {
 	      /* If needed (distribute parallel for with lastprivate),
@@ -5790,15 +5842,7 @@ expand_omp_for_static_chunk (struct omp_
       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
 	{
-	  int i;
-	  for (i = 1; i < fd->collapse; i++)
-	    {
-	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-					OMP_CLAUSE__LOOPTEMP_);
-	      gcc_assert (innerc);
-	    }
-	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-				    OMP_CLAUSE__LOOPTEMP_);
+	  innerc = find_lastprivate_looptemp (fd, innerc);
 	  if (innerc)
 	    {
 	      /* If needed (distribute parallel for with lastprivate),
@@ -7041,15 +7085,7 @@ expand_omp_taskloop_for_outer (struct om
   tree endvar = OMP_CLAUSE_DECL (innerc);
   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
     {
-      gcc_assert (innerc);
-      for (i = 1; i < fd->collapse; i++)
-	{
-	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-				    OMP_CLAUSE__LOOPTEMP_);
-	  gcc_assert (innerc);
-	}
-      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-				OMP_CLAUSE__LOOPTEMP_);
+      innerc = find_lastprivate_looptemp (fd, innerc);
       if (innerc)
 	{
 	  /* If needed (inner taskloop has lastprivate clause), propagate


	Jakub



More information about the Gcc-patches mailing list