This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Fix epilogue for double reduction vectorization


Hi,

When vectorizing double reductions:

sum = 0
for i
  {
    for j
      sum +=...
  }
use sum

scalar extraction should be done on the exit from the outer loop:

vsum = 0
for vi
  {
    for vj
      vsum += ...
  }
scalar_sum = reduce_vector_to_scalar (vsum)
use scalar_sum

And not inside the outer loop as it is done now (the code is also correct
but not optimized). This patch fixes this problem.

Bootstrapped and tested on x86_64-suse-linux.
Committed.

Ira

ChangeLog:

	* tree-vect-loop.c (vect_create_epilog_for_reduction): Switch
	to outer loop when creating reduction epilogue for double reduction,
	and switch back to the inner loop when updating the phi nodes.
	Update uses of outer loop exit phi nodes in double reduction (instead
	of uses of reduction).

Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c    (revision 162620)
+++ tree-vect-loop.c    (working copy)
@@ -3101,8 +3101,8 @@ vect_create_epilog_for_reduction (VEC (t
   tree vec_initial_def = NULL;
   tree reduction_op, expr, def;
   tree orig_name, scalar_result;
-  imm_use_iterator imm_iter;
-  use_operand_p use_p;
+  imm_use_iterator imm_iter, phi_imm_iter;
+  use_operand_p use_p, phi_use_p;
   bool extract_scalar_result = false;
   gimple use_stmt, orig_stmt, reduction_phi = NULL;
   bool nested_in_vect_loop = false;
@@ -3264,6 +3264,14 @@ vect_create_epilog_for_reduction (VEC (t
         }
     }

+  /* The epilogue is created for the outer-loop, i.e., for the loop being
+     vectorized.  */
+  if (double_reduc)
+    {
+      loop = outer_loop;
+      exit_bb = single_exit (loop)->dest;
+    }
+
   exit_gsi = gsi_after_labels (exit_bb);

   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
@@ -3519,6 +3527,9 @@ vect_create_epilog_for_reduction (VEC (t

 vect_finalize_reduction:

+  if (double_reduc)
+    loop = loop->inner;
+
   /* 2.5 Adjust the final result by the initial value of the reduction
         variable. (When such adjustment is not needed, then
         'adjustment_def' is zero).  For example, if code is PLUS we
create:
@@ -3738,7 +3749,45 @@ vect_finalize_reduction:
                     }
                 }
             }
+        }

+      VEC_free (gimple, heap, phis);
+      if (nested_in_vect_loop)
+        {
+          if (double_reduc)
+            loop = outer_loop;
+          else
+            continue;
+        }
+
+      phis = VEC_alloc (gimple, heap, 3);
+      /* Find the loop-closed-use at the loop exit of the original scalar
+         result. (The reduction result is expected to have two immediate
uses -
+         one at the latch block, and one at the loop exit). For double
+         reductions we are looking for exit phis of the outer loop.  */
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
+        {
+          if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
+            VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
+          else
+            {
+              if (double_reduc && gimple_code (USE_STMT (use_p)) ==
GIMPLE_PHI)
+                {
+                  tree phi_res = PHI_RESULT (USE_STMT (use_p));
+
+                  FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
+                    {
+                      if (!flow_bb_inside_loop_p (loop,
+                                             gimple_bb (USE_STMT
(phi_use_p))))
+                        VEC_safe_push (gimple, heap, phis,
+                                       USE_STMT (phi_use_p));
+                    }
+                }
+            }
+        }
+
+      for (i = 0; VEC_iterate (gimple, phis, i, exit_phi); i++)
+        {
           /* Replace the uses:  */
           orig_name = PHI_RESULT (exit_phi);
           scalar_result = VEC_index (tree, scalar_results, k);



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]