This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR58453


The following "fixes" the bogus loop distribution that breaks
434.zeusmp by applying the cost model for -ftree-loop-distribute-patterns
as well (with -O3 -ftree-loop-distribution the bug doesn't reproduce).
This papers over the real issue only but it is part of the real
fix I am developing (a missing feature I pushed back for too long as
well).  So as it unbreaks 434.zeusmp I'll apply this beforehand.
The real fix, while complete, needs some TLC and compile-time work.

Bootstrap and regtest running on x86_64-unknown-linux-gnu, I'll 
double-check SPEC 2k6 before committing this as well.

Thanks,
Richard.

2013-09-20  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/58453
	* tree-loop-distribution.c (distribute_loop): Apply the cost
	model for -ftree-loop-distribute-patterns, too.

	* gcc.dg/tree-ssa/ldist-23.c: New testcase.

Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c	(revision 202774)
--- gcc/tree-loop-distribution.c	(working copy)
*************** distribute_loop (struct loop *loop, vec<
*** 1514,1531 ****
        any_builtin |= partition_builtin_p (partition);
      }
  
    /* If we are only distributing patterns fuse all partitions that
!      were not properly classified as builtins.  Else fuse partitions
!      with similar memory accesses.  */
    if (!flag_tree_loop_distribution)
      {
        partition_t into;
-       /* If we did not detect any builtin simply bail out.  */
-       if (!any_builtin)
- 	{
- 	  nbp = 0;
- 	  goto ldist_done;
- 	}
        /* Only fuse adjacent non-builtin partitions, see PR53616.
           ???  Use dependence information to improve partition ordering.  */
        i = 0;
--- 1514,1564 ----
        any_builtin |= partition_builtin_p (partition);
      }
  
+   /* If we did not detect any builtin but are not asked to apply
+      regular loop distribution simply bail out.  */
+   if (!flag_tree_loop_distribution
+       && !any_builtin)
+     {
+       nbp = 0;
+       goto ldist_done;
+     }
+ 
+   /* Apply our simple cost model - fuse partitions with similar
+      memory accesses.  */
+   partition_t into;
+   for (i = 0; partitions.iterate (i, &into); ++i)
+     {
+       if (partition_builtin_p (into))
+ 	continue;
+       for (int j = i + 1;
+ 	   partitions.iterate (j, &partition); ++j)
+ 	{
+ 	  if (!partition_builtin_p (partition)
+ 	      && similar_memory_accesses (rdg, into, partition))
+ 	    {
+ 	      if (dump_file && (dump_flags & TDF_DETAILS))
+ 		{
+ 		  fprintf (dump_file, "fusing partitions\n");
+ 		  dump_bitmap (dump_file, into->stmts);
+ 		  dump_bitmap (dump_file, partition->stmts);
+ 		  fprintf (dump_file, "because they have similar "
+ 			   "memory accesses\n");
+ 		}
+ 	      bitmap_ior_into (into->stmts, partition->stmts);
+ 	      if (partition->kind == PKIND_REDUCTION)
+ 		into->kind = PKIND_REDUCTION;
+ 	      partitions.ordered_remove (j);
+ 	      partition_free (partition);
+ 	      j--;
+ 	    }
+ 	}
+     }
+ 
    /* If we are only distributing patterns fuse all partitions that
!      were not properly classified as builtins.  */
    if (!flag_tree_loop_distribution)
      {
        partition_t into;
        /* Only fuse adjacent non-builtin partitions, see PR53616.
           ???  Use dependence information to improve partition ordering.  */
        i = 0;
*************** distribute_loop (struct loop *loop, vec<
*** 1549,1586 ****
  	}
        while ((unsigned) i < partitions.length ());
      }
-   else
-     {
-       partition_t into;
-       int j;
-       for (i = 0; partitions.iterate (i, &into); ++i)
- 	{
- 	  if (partition_builtin_p (into))
- 	    continue;
- 	  for (j = i + 1;
- 	       partitions.iterate (j, &partition); ++j)
- 	    {
- 	      if (!partition_builtin_p (partition)
- 		  && similar_memory_accesses (rdg, into, partition))
- 		{
- 		  if (dump_file && (dump_flags & TDF_DETAILS))
- 		    {
- 		      fprintf (dump_file, "fusing partitions\n");
- 		      dump_bitmap (dump_file, into->stmts);
- 		      dump_bitmap (dump_file, partition->stmts);
- 		      fprintf (dump_file, "because they have similar "
- 			       "memory accesses\n");
- 		    }
- 		  bitmap_ior_into (into->stmts, partition->stmts);
- 		  if (partition->kind == PKIND_REDUCTION)
- 		    into->kind = PKIND_REDUCTION;
- 		  partitions.ordered_remove (j);
- 		  partition_free (partition);
- 		  j--;
- 		}
- 	    }
- 	}
-     }
  
    /* Fuse all reduction partitions into the last.  */
    if (partitions.length () > 1)
--- 1582,1587 ----
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c	(revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c	(working copy)
***************
*** 0 ****
--- 1,34 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fdump-tree-ldist-details" } */
+ 
+ extern void abort (void);
+ 
+ int a[128], b[128], c[128], d[128];
+ 
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+   int i;
+   for (i = 0; i < 128; ++i)
+     {
+       a[i] = a[i] + 1;
+       b[i] = d[i];
+       c[i] = a[i] / d[i];
+     }
+ }
+ int main()
+ {
+   int i;
+   for (i = 0; i < 128; ++i)
+     a[i] = i;
+   for (i = 0; i < 128; ++i)
+     d[i] = 1;
+   foo ();
+   if (c[0] != 1)
+     abort ();
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump "split to 2 loops" "ldist" } } */
+ /* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]