[PATCH][GRAPHITE] Fix PR82563

Richard Biener rguenther@suse.de
Tue Oct 17 13:17:00 GMT 2017


PR82573 shows the ugly part of an earlier fix, that we now split the
entry edge of SCOPs during analysis phase to get a GBB for the entry
edge PHI copies.  That invalidates loop-closed SSA in some cases like
the PR.  So the following patch gets rid of that "fake" GBB by explicitely
emitting SESE entry edge copies (sources are all parameters).

This seems to remove quite some "spurious" optimized loop nests from SPEC.
I do see spurious schedule differences detected while the AST generator
still generates the same code, like for gcc.dg/graphite/interchange-1.c.
One of the cases "fixed" with this patch is

[scheduler] original schedule:
domain: "[P_2806, P_364] -> { S_294[] : -2147483648 <= P_2806 <= 
2147483647 and -9223372036854775808 <= P_364 <= 9223372036854775807; 
S_651[] : -2147483648 <= P_2806 <= 2147483647 and -9223372036854775808 <= 
P_364 <= 9223372036854775807; S_210[i33] : -2147483648 <= P_2806 <= 
2147483647 and -9223372036854775808 <= P_364 <= 9223372036854775807 and 0 
<= i33 <= 2147483645 and 4294967296*floor((-1 + P_2806)/4294967296) < 
P_2806 - i33; S_211[i34] : -2147483648 <= P_2806 <= 2147483647 and 
-9223372036854775808 <= P_364 <= 9223372036854775807 and 0 <= i34 <= 
2147483645 and 4294967296*floor((-1 + P_2806)/4294967296) < P_2806 - i34; 
S_687[] : -2147483648 <= P_2806 <= 2147483647 and -9223372036854775808 <= 
P_364 <= 9223372036854775807 }"
child:
  sequence:
  - filter: "[P_2806, P_364] -> { S_687[] }"
  - filter: "[P_2806, P_364] -> { S_210[i33] }"
    child:
      schedule: "[P_2806, P_364] -> L_33[{ S_210[i33] -> [(i33)] }]"
  - filter: "[P_2806, P_364] -> { S_294[] }"
  - filter: "[P_2806, P_364] -> { S_651[] }"
  - filter: "[P_2806, P_364] -> { S_211[i34] }"
    child:
      schedule: "[P_2806, P_364] -> L_34[{ S_211[i34] -> [(i34)] }]"

[scheduler] isl transformed schedule:
domain: "[P_2806, P_364] -> { S_294[] : -2147483648 <= P_2806 <= 
2147483647 and -9223372036854775808 <= P_364 <= 9223372036854775807; 
S_651[] : -2147483648 <= P_2806 <= 2147483647 and -9223372036854775808 <= 
P_364 <= 9223372036854775807; S_210[i33] : -2147483648 <= P_2806 <= 
2147483647 and -9223372036854775808 <= P_364 <= 9223372036854775807 and 0 
<= i33 <= 2147483645 and 4294967296*floor((-1 + P_2806)/4294967296) < 
P_2806 - i33; S_211[i34] : -2147483648 <= P_2806 <= 2147483647 and 
-9223372036854775808 <= P_364 <= 9223372036854775807 and 0 <= i34 <= 
2147483645 and 4294967296*floor((-1 + P_2806)/4294967296) < P_2806 - i34; 
S_687[] : -2147483648 <= P_2806 <= 2147483647 and -9223372036854775808 <= 
P_364 <= 9223372036854775807 }"
child:
  sequence:
  - filter: "[P_2806, P_364] -> { S_210[i33]; S_687[] }"
    child:
      schedule: "[P_2806, P_364] -> [{ S_210[i33] -> [(i33)]; S_687[] -> 
[(0)] }]"
      permutable: 1
      child:
        sequence:
        - filter: "[P_2806, P_364] -> { S_687[] }"
        - filter: "[P_2806, P_364] -> { S_210[i33] }"
  - filter: "[P_2806, P_364] -> { S_294[] }"
  - filter: "[P_2806, P_364] -> { S_651[] }"
  - filter: "[P_2806, P_364] -> { S_211[i34] }"
    child:
      schedule: "[P_2806, P_364] -> [{ S_211[i34] -> [(i34)] }]"
      permutable: 1
      coincident: [ 1 ]

[scheduler] original ast:
{
  S_687();
  for (int c0 = 0; c0 < P_2806; c0 += 1)
    S_210(c0);
  S_294();
  S_651();
  for (int c0 = 0; c0 < P_2806; c0 += 1)
    S_211(c0);
}

[scheduler] AST generated by isl:
{
  S_687();
  for (int c0 = 0; c0 < P_2806; c0 += 1)
    S_210(c0);
  S_294();
  S_651();
  for (int c0 = 0; c0 < P_2806; c0 += 1)
    S_211(c0);
}

where with the patch S_687 () is no longer there (and S_210 depending
on it via RAW).

Overall the number of "optimized" loop nests in SPEC CPU 2006 drops
from 348 to 279.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2017-10-17  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/82563
	* graphite-isl-ast-to-gimple.c (generate_entry_out_of_ssa_copies):
	New function.
	(graphite_regenerate_ast_isl): Call it.
	* graphite-scop-detection.c (build_scops): Remove entry edge split.

	* gcc.dg/graphite/pr82563.c: New testcase.

Index: gcc/graphite-isl-ast-to-gimple.c
===================================================================
--- gcc/graphite-isl-ast-to-gimple.c	(revision 253807)
+++ gcc/graphite-isl-ast-to-gimple.c	(working copy)
@@ -1501,6 +1501,35 @@ copy_internal_parameters (sese_info_p re
     }
 }
 
+/* Generate out-of-SSA copies for the entry edge FALSE_ENTRY/TRUE_ENTRY
+   in REGION.  */
+
+static void
+generate_entry_out_of_ssa_copies (edge false_entry,
+				  edge true_entry,
+				  sese_info_p region)
+{
+  gimple_stmt_iterator gsi_tgt = gsi_start_bb (true_entry->dest);
+  for (gphi_iterator psi = gsi_start_phis (false_entry->dest);
+       !gsi_end_p (psi); gsi_next (&psi))
+    {
+      gphi *phi = psi.phi ();
+      tree res = gimple_phi_result (phi);
+      if (virtual_operand_p (res))
+	continue;
+      /* When there's no out-of-SSA var registered do not bother
+         to create one.  */
+      vec <tree> *renames = region->rename_map->get (res);
+      if (! renames || renames->is_empty ())
+	continue;
+      tree new_phi_def = (*renames)[0];
+      gassign *ass = gimple_build_assign (new_phi_def,
+					  PHI_ARG_DEF_FROM_EDGE (phi,
+								 false_entry));
+      gsi_insert_after (&gsi_tgt, ass, GSI_NEW_STMT);
+    }
+}
+
 /* GIMPLE Loop Generator: generates loops in GIMPLE form for the given SCOP.
    Return true if code generation succeeded.  */
 
@@ -1548,6 +1577,9 @@ graphite_regenerate_ast_isl (scop_p scop
   t.translate_isl_ast (context_loop, root_node, e, ip);
   if (! t.codegen_error_p ())
     {
+      generate_entry_out_of_ssa_copies (if_region->false_region->region.entry,
+					if_region->true_region->region.entry,
+					region);
       sese_insert_phis_for_liveouts (region,
 				     if_region->region->region.exit->src,
 				     if_region->false_region->region.exit,
Index: gcc/graphite-scop-detection.c
===================================================================
--- gcc/graphite-scop-detection.c	(revision 253807)
+++ gcc/graphite-scop-detection.c	(working copy)
@@ -1708,10 +1708,6 @@ build_scops (vec<scop_p> *scops)
   sese_l *s;
   FOR_EACH_VEC_ELT (scops_l, i, s)
     {
-      /* For our out-of-SSA we need a block on s->entry, similar to how
-         we include the LCSSA block in the region.  */
-      s->entry = single_pred_edge (split_edge (s->entry));
-
       scop_p scop = new_scop (s->entry, s->exit);
 
       /* Record all basic blocks and their conditions in REGION.  */
Index: gcc/testsuite/gcc.dg/graphite/pr82563.c
===================================================================
--- gcc/testsuite/gcc.dg/graphite/pr82563.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/graphite/pr82563.c	(working copy)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -floop-nest-optimize" } */
+
+int tj, cw, xf;
+
+void
+zp (int *ei)
+{
+  for (;;)
+    {
+      int hd = 0;
+
+      if (cw != 0 && xf != 0)
+	{
+	  for (hd = 0; hd < 3; ++hd)
+	    cw = (tj != 0) ? 0 : *ei;
+	  for (;;)
+	    ;
+	}
+
+      while (tj != 0)
+	tj = (__UINTPTR_TYPE__)&hd;
+    }
+}



More information about the Gcc-patches mailing list