[PATCH] More loop distribution TLC

Richard Biener rguenther@suse.de
Wed Oct 2 12:58:00 GMT 2013


I split out some TLC to loop distribution from a patch I'll post
shortly.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2013-10-02  Richard Biener  <rguenther@suse.de>

	* tree-loop-distribution.c: Include tree-vectorizer.h for
	find_loop_location.
	(enum partition_kind): Remove PKIND_REDUCTION.
	(struct partition_s): Remove has_writes member, add reduction_p
	member.
	(partition_alloc): Adjust.
	(partition_builtin_p): Likewise.
	(partition_has_writes): Remove.
	(partition_reduction_p): New function.
	(partition_merge_into): Likewise.
	(generate_code_for_partition): Commonize builtin partition
	handling tail.
	(rdg_cannot_recompute_vertex_p): Remove.
	(already_processed_vertex_p): Likewise.
	(rdg_flag_vertex): Do not set has_writes.
	(classify_partition): Adjust.
	(rdg_build_partitions): Do not set has_writes, treat all
	partitions as useful.
	(distribute_loop): Record number of library calls generated.
	Adjust.
	(tree_loop_distribution): Report number of loops and library
	calls generated as opt-info.

	* gcc.dg/tree-ssa/ldist-11.c: Adjust.
	* gcc.dg/tree-ssa/ldist-17.c: Likewise.
	* gcc.dg/tree-ssa/ldist-23.c: Likewise.
	* gcc.dg/tree-ssa/ldist-pr45948.c: Likewise.
	* gfortran.dg/ldist-pr45199.f: Likewise.

Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c.orig	2013-10-02 14:42:48.000000000 +0200
--- gcc/tree-loop-distribution.c	2013-10-02 14:49:05.775900940 +0200
*************** along with GCC; see the file COPYING3.
*** 51,56 ****
--- 51,57 ----
  #include "tree-scalar-evolution.h"
  #include "tree-pass.h"
  #include "gimple-pretty-print.h"
+ #include "tree-vectorizer.h"
  
  
  /* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
*************** build_rdg (vec<loop_p> loop_nest, contro
*** 557,570 ****
  
  
  enum partition_kind {
!     PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY
  };
  
  typedef struct partition_s
  {
    bitmap stmts;
    bitmap loops;
!   bool has_writes;
    enum partition_kind kind;
    /* data-references a kind != PKIND_NORMAL partition is about.  */
    data_reference_p main_dr;
--- 558,571 ----
  
  
  enum partition_kind {
!     PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
  };
  
  typedef struct partition_s
  {
    bitmap stmts;
    bitmap loops;
!   bool reduction_p;
    enum partition_kind kind;
    /* data-references a kind != PKIND_NORMAL partition is about.  */
    data_reference_p main_dr;
*************** partition_alloc (bitmap stmts, bitmap lo
*** 581,587 ****
    partition_t partition = XCNEW (struct partition_s);
    partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
    partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
!   partition->has_writes = false;
    partition->kind = PKIND_NORMAL;
    return partition;
  }
--- 582,588 ----
    partition_t partition = XCNEW (struct partition_s);
    partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
    partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
!   partition->reduction_p = false;
    partition->kind = PKIND_NORMAL;
    return partition;
  }
*************** partition_free (partition_t partition)
*** 601,617 ****
  static bool
  partition_builtin_p (partition_t partition)
  {
!   return partition->kind > PKIND_REDUCTION;
  }
  
! /* Returns true if the partition has an writes.  */
  
  static bool
! partition_has_writes (partition_t partition)
  {
!   return partition->has_writes;
  }
  
  /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
     the LOOP.  */
  
--- 602,630 ----
  static bool
  partition_builtin_p (partition_t partition)
  {
!   return partition->kind != PKIND_NORMAL;
  }
  
! /* Returns true if the partition contains a reduction.  */
  
  static bool
! partition_reduction_p (partition_t partition)
  {
!   return partition->reduction_p;
  }
  
+ /* Merge PARTITION into the partition DEST.  */
+ 
+ static void
+ partition_merge_into (partition_t dest, partition_t partition)
+ {
+   dest->kind = PKIND_NORMAL;
+   bitmap_ior_into (dest->stmts, partition->stmts);
+   if (partition_reduction_p (partition))
+     dest->reduction_p = true;
+ }
+ 
+ 
  /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
     the LOOP.  */
  
*************** generate_code_for_partition (struct loop
*** 998,1055 ****
  {
    switch (partition->kind)
      {
      case PKIND_MEMSET:
        generate_memset_builtin (loop, partition);
-       /* If this is the last partition for which we generate code, we have
- 	 to destroy the loop.  */
-       if (!copy_p)
- 	destroy_loop (loop);
        break;
  
      case PKIND_MEMCPY:
        generate_memcpy_builtin (loop, partition);
-       /* If this is the last partition for which we generate code, we have
- 	 to destroy the loop.  */
-       if (!copy_p)
- 	destroy_loop (loop);
-       break;
- 
-     case PKIND_REDUCTION:
-       /* Reductions all have to be in the last partition.  */
-       gcc_assert (!copy_p);
-     case PKIND_NORMAL:
-       generate_loops_for_partition (loop, partition, copy_p);
        break;
  
      default:
        gcc_unreachable ();
      }
- }
- 
- 
- /* Returns true if the node V of RDG cannot be recomputed.  */
- 
- static bool
- rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
- {
-   if (RDG_MEM_WRITE_STMT (rdg, v))
-     return true;
  
!   return false;
! }
! 
! /* Returns true when the vertex V has already been generated in the
!    current partition (V is in PROCESSED), or when V belongs to another
!    partition and cannot be recomputed (V is not in REMAINING_STMTS).  */
! 
! static inline bool
! already_processed_vertex_p (bitmap processed, int v)
! {
!   return bitmap_bit_p (processed, v);
  }
  
- static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
- 					   bitmap);
  
  /* Flag V from RDG as part of PARTITION, and also flag its loop number
     in LOOPS.  */
--- 1011,1041 ----
  {
    switch (partition->kind)
      {
+     case PKIND_NORMAL:
+       /* Reductions all have to be in the last partition.  */
+       gcc_assert (!partition_reduction_p (partition)
+ 		  || !copy_p);
+       generate_loops_for_partition (loop, partition, copy_p);
+       return;
+ 
      case PKIND_MEMSET:
        generate_memset_builtin (loop, partition);
        break;
  
      case PKIND_MEMCPY:
        generate_memcpy_builtin (loop, partition);
        break;
  
      default:
        gcc_unreachable ();
      }
  
!   /* Common tail for partitions we turn into a call.  If this was the last
!      partition for which we generate code, we have to destroy the loop.  */
!   if (!copy_p)
!     destroy_loop (loop);
  }
  
  
  /* Flag V from RDG as part of PARTITION, and also flag its loop number
     in LOOPS.  */
*************** rdg_flag_vertex (struct graph *rdg, int
*** 1064,1072 ****
  
    loop = loop_containing_stmt (RDG_STMT (rdg, v));
    bitmap_set_bit (partition->loops, loop->num);
- 
-   if (rdg_cannot_recompute_vertex_p (rdg, v))
-     partition->has_writes = true;
  }
  
  /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
--- 1050,1055 ----
*************** classify_partition (loop_p loop, struct
*** 1127,1141 ****
        if (gimple_has_volatile_ops (stmt))
  	volatiles_p = true;
  
!       /* If the stmt has uses outside of the loop fail.
! 	 ???  If the stmt is generated in another partition that
! 	 is not created as builtin we can ignore this.  */
        if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
  	{
! 	  if (dump_file && (dump_flags & TDF_DETAILS))
! 	    fprintf (dump_file, "not generating builtin, partition has "
! 		     "scalar uses outside of the loop\n");
! 	  partition->kind = PKIND_REDUCTION;
  	  return;
  	}
      }
--- 1110,1119 ----
        if (gimple_has_volatile_ops (stmt))
  	volatiles_p = true;
  
!       /* If the stmt has uses outside of the loop mark it as reduction.  */
        if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
  	{
! 	  partition->reduction_p = true;
  	  return;
  	}
      }
*************** rdg_build_partitions (struct graph *rdg,
*** 1356,1376 ****
  
        np = build_rdg_partition_for_vertex (rdg, v);
        bitmap_ior_into (partition->stmts, np->stmts);
-       partition->has_writes = partition_has_writes (np);
        bitmap_ior_into (processed, np->stmts);
        partition_free (np);
  
!       if (partition_has_writes (partition))
  	{
! 	  if (dump_file && (dump_flags & TDF_DETAILS))
! 	    {
! 	      fprintf (dump_file, "ldist useful partition:\n");
! 	      dump_bitmap (dump_file, partition->stmts);
! 	    }
! 
! 	  partitions->safe_push (partition);
! 	  partition = partition_alloc (NULL, NULL);
  	}
      }
  
    /* All vertices should have been assigned to at least one partition now,
--- 1334,1350 ----
  
        np = build_rdg_partition_for_vertex (rdg, v);
        bitmap_ior_into (partition->stmts, np->stmts);
        bitmap_ior_into (processed, np->stmts);
        partition_free (np);
  
!       if (dump_file && (dump_flags & TDF_DETAILS))
  	{
! 	  fprintf (dump_file, "ldist useful partition:\n");
! 	  dump_bitmap (dump_file, partition->stmts);
  	}
+ 
+       partitions->safe_push (partition);
+       partition = partition_alloc (NULL, NULL);
      }
  
    /* All vertices should have been assigned to at least one partition now,
*************** partition_contains_all_rw (struct graph
*** 1480,1486 ****
  
  static int
  distribute_loop (struct loop *loop, vec<gimple> stmts,
! 		 control_dependences *cd)
  {
    struct graph *rdg;
    vec<loop_p> loop_nest;
--- 1454,1460 ----
  
  static int
  distribute_loop (struct loop *loop, vec<gimple> stmts,
! 		 control_dependences *cd, int *nb_calls)
  {
    struct graph *rdg;
    vec<loop_p> loop_nest;
*************** distribute_loop (struct loop *loop, vec<
*** 1489,1494 ****
--- 1463,1469 ----
    bool any_builtin;
    int i, nbp;
  
+   *nb_calls = 0;
    loop_nest.create (3);
    if (!find_loop_nest (loop, &loop_nest))
      {
*************** distribute_loop (struct loop *loop, vec<
*** 1551,1559 ****
  		  fprintf (dump_file, "because they have similar "
  			   "memory accesses\n");
  		}
! 	      bitmap_ior_into (into->stmts, partition->stmts);
! 	      if (partition->kind == PKIND_REDUCTION)
! 		into->kind = PKIND_REDUCTION;
  	      partitions.ordered_remove (j);
  	      partition_free (partition);
  	      j--;
--- 1526,1532 ----
  		  fprintf (dump_file, "because they have similar "
  			   "memory accesses\n");
  		}
! 	      partition_merge_into (into, partition);
  	      partitions.ordered_remove (j);
  	      partition_free (partition);
  	      j--;
*************** distribute_loop (struct loop *loop, vec<
*** 1577,1585 ****
  	  for (++i; partitions.iterate (i, &partition); ++i)
  	    if (!partition_builtin_p (partition))
  	      {
! 		bitmap_ior_into (into->stmts, partition->stmts);
! 		if (partition->kind == PKIND_REDUCTION)
! 		  into->kind = PKIND_REDUCTION;
  		partitions.ordered_remove (i);
  		partition_free (partition);
  		i--;
--- 1550,1556 ----
  	  for (++i; partitions.iterate (i, &partition); ++i)
  	    if (!partition_builtin_p (partition))
  	      {
! 		partition_merge_into (into, partition);
  		partitions.ordered_remove (i);
  		partition_free (partition);
  		i--;
*************** distribute_loop (struct loop *loop, vec<
*** 1597,1603 ****
        for (i = partitions.length () - 2; i >= 0; --i)
  	{
  	  partition_t what = partitions[i];
! 	  if (what->kind == PKIND_REDUCTION)
  	    {
  	      if (dump_file && (dump_flags & TDF_DETAILS))
  		{
--- 1568,1574 ----
        for (i = partitions.length () - 2; i >= 0; --i)
  	{
  	  partition_t what = partitions[i];
! 	  if (partition_reduction_p (what))
  	    {
  	      if (dump_file && (dump_flags & TDF_DETAILS))
  		{
*************** distribute_loop (struct loop *loop, vec<
*** 1606,1613 ****
  		  dump_bitmap (dump_file, what->stmts);
  		  fprintf (dump_file, "because the latter has reductions\n");
  		}
! 	      bitmap_ior_into (into->stmts, what->stmts);
! 	      into->kind = PKIND_REDUCTION;
  	      partitions.ordered_remove (i);
  	      partition_free (what);
  	    }
--- 1577,1583 ----
  		  dump_bitmap (dump_file, what->stmts);
  		  fprintf (dump_file, "because the latter has reductions\n");
  		}
! 	      partition_merge_into (into, what);
  	      partitions.ordered_remove (i);
  	      partition_free (what);
  	    }
*************** distribute_loop (struct loop *loop, vec<
*** 1627,1633 ****
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partitions, i, partition)
!     generate_code_for_partition (loop, partition, i < nbp - 1);
  
   ldist_done:
  
--- 1597,1607 ----
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partitions, i, partition)
!     {
!       if (partition_builtin_p (partition))
! 	(*nb_calls)++;
!       generate_code_for_partition (loop, partition, i < nbp - 1);
!     }
  
   ldist_done:
  
*************** distribute_loop (struct loop *loop, vec<
*** 1637,1643 ****
  
    free_rdg (rdg);
    loop_nest.release ();
!   return nbp;
  }
  
  /* Distribute all loops in the current function.  */
--- 1611,1617 ----
  
    free_rdg (rdg);
    loop_nest.release ();
!   return nbp - *nb_calls;
  }
  
  /* Distribute all loops in the current function.  */
*************** tree_loop_distribution (void)
*** 1667,1673 ****
        vec<gimple> work_list = vNULL;
        basic_block *bbs;
        int num = loop->num;
-       int nb_generated_loops = 0;
        unsigned int i;
  
        /* If the loop doesn't have a single exit we will fail anyway,
--- 1641,1646 ----
*************** tree_loop_distribution (void)
*** 1722,1727 ****
--- 1695,1703 ----
  out:
        free (bbs);
  
+       int nb_generated_loops = 0;
+       int nb_generated_calls = 0;
+       location_t loc = find_loop_location (loop);
        if (work_list.length () > 0)
  	{
  	  if (!cd)
*************** out:
*** 1731,1750 ****
  	      cd = new control_dependences (create_edge_list ());
  	      free_dominance_info (CDI_POST_DOMINATORS);
  	    }
! 	  nb_generated_loops = distribute_loop (loop, work_list, cd);
  	}
  
!       if (nb_generated_loops > 0)
! 	changed = true;
! 
!       if (dump_file && (dump_flags & TDF_DETAILS))
  	{
! 	  if (nb_generated_loops > 1)
! 	    fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
! 		     num, nb_generated_loops);
! 	  else
! 	    fprintf (dump_file, "Loop %d is the same.\n", num);
  	}
  
        work_list.release ();
      }
--- 1707,1726 ----
  	      cd = new control_dependences (create_edge_list ());
  	      free_dominance_info (CDI_POST_DOMINATORS);
  	    }
! 	  nb_generated_loops = distribute_loop (loop, work_list, cd,
! 						&nb_generated_calls);
  	}
  
!       if (nb_generated_loops + nb_generated_calls > 0)
  	{
! 	  changed = true;
! 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
! 			   loc, "Loop %d distributed: split to %d loops "
! 			   "and %d library calls.\n",
! 			   num, nb_generated_loops, nb_generated_calls);
  	}
+       else if (dump_file && (dump_flags & TDF_DETAILS))
+ 	fprintf (dump_file, "Loop %d is the same.\n", num);
  
        work_list.release ();
      }
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c.orig	2013-10-02 14:42:48.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c	2013-10-02 14:42:56.874651935 +0200
*************** void foo (int * __restrict__ ia,
*** 28,33 ****
    */
  }
  
! /* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
  /* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
--- 28,33 ----
    */
  }
  
! /* { dg-final { scan-tree-dump-times "distributed: split to 1 loops and 1 library calls" 1 "ldist" } } */
  /* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c.orig	2013-10-02 14:42:48.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c	2013-10-02 14:42:56.874651935 +0200
*************** mad_synth_mute (struct mad_synth *synth)
*** 45,50 ****
    return;
  }
  
! /* { dg-final { scan-tree-dump "distributed: split to 4" "ldist" } } */
  /* { dg-final { scan-tree-dump-times "generated memset zero" 4 "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
--- 45,50 ----
    return;
  }
  
! /* { dg-final { scan-tree-dump "distributed: split to 0 loops and 4 library calls" "ldist" } } */
  /* { dg-final { scan-tree-dump-times "generated memset zero" 4 "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c.orig	2013-10-02 14:42:48.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c	2013-10-02 14:42:56.875651946 +0200
*************** int main()
*** 29,34 ****
    return 0;
  }
  
! /* { dg-final { scan-tree-dump "split to 2 loops" "ldist" } } */
  /* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
--- 29,34 ----
    return 0;
  }
  
! /* { dg-final { scan-tree-dump "split to 1 loops and 1 library call" "ldist" } } */
  /* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-pr45948.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-pr45948.c.orig	2013-10-02 14:42:48.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-pr45948.c	2013-10-02 14:42:56.875651946 +0200
*************** foo (int i, int n)
*** 18,23 ****
  
  /* We should apply loop distribution and generate 2 memset (0).  */
  
! /* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */
  /* { dg-final { scan-tree-dump-times "generated memset zero" 2 "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
--- 18,23 ----
  
  /* We should apply loop distribution and generate 2 memset (0).  */
  
! /* { dg-final { scan-tree-dump "distributed: split to 0 loops and 2 library calls" "ldist" } } */
  /* { dg-final { scan-tree-dump-times "generated memset zero" 2 "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
Index: gcc/testsuite/gfortran.dg/ldist-pr45199.f
===================================================================
*** gcc/testsuite/gfortran.dg/ldist-pr45199.f.orig	2012-01-30 16:44:51.000000000 +0100
--- gcc/testsuite/gfortran.dg/ldist-pr45199.f	2013-10-02 14:43:54.388314258 +0200
***************
*** 22,27 ****
  
  ! GCC should apply memset zero loop distribution and it should not ICE.
  
! ! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } }
  ! { dg-final { scan-tree-dump-times "generated memset zero" 9 "ldist" } }
  ! { dg-final { cleanup-tree-dump "ldist" } }
--- 22,27 ----
  
  ! GCC should apply memset zero loop distribution and it should not ICE.
  
! ! { dg-final { scan-tree-dump "distributed: split to 0 loops and 9 library calls" "ldist" } }
  ! { dg-final { scan-tree-dump-times "generated memset zero" 9 "ldist" } }
  ! { dg-final { cleanup-tree-dump "ldist" } }



More information about the Gcc-patches mailing list