This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][3/n] loop distribution TLC


This re-organizes builtin detection and generation properly into
an analysis and transform phase (noting that when we fail late
we will generate wrong code at the moment - eventually non-fatal,
but at least it will have duplicate work in the left-over loop).

With this in place adding more kinds of builtins to detect should
not be any rocket science anymore.

Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

Richard.

2012-05-31  Richard Guenther  <rguenther@suse.de>

	* tree-loop-distribution.c (enum partition_kind): New enum.
	(struct partition_s): Add kind and main_stmt members.
	(partition_alloc): Initialize kind to PKIND_NORMAL.
	(partition_builtin_p): New function.
	(copy_loop_before): Remove failure path and assert instead.
	(generate_loops_for_partition): Likewise.
	(generate_memset_zero): Fold into ...
	(generate_memset_builtin): ... this.
	(classify_partition): New function with code from
	can_generate_builtin and generate_builtin.
	(generate_builtin): Remove.
	(can_generate_builtin): Likewise.
	(fuse_partitions_with_similar_memory_accesses): Call
	partition_builtin_p instead of can_generate_builtin.
	(rdg_build_partitions): Do not call
	fuse_partitions_with_similar_memory_accesses here...
	(ldist_gen): ... but here after classifying all partitions.
	Remove failure path of generate_code_for_partition.
	(generate_code_for_partition): Generate code according
	to partition classification.

Index: trunk/gcc/tree-loop-distribution.c
===================================================================
*** trunk.orig/gcc/tree-loop-distribution.c	2012-05-31 15:58:12.000000000 +0200
--- trunk/gcc/tree-loop-distribution.c	2012-05-31 16:26:17.255459534 +0200
*************** along with GCC; see the file COPYING3.
*** 52,60 ****
--- 52,65 ----
  #include "tree-scalar-evolution.h"
  #include "tree-pass.h"
  
+ enum partition_kind { PKIND_NORMAL, PKIND_MEMSET };
+ 
  typedef struct partition_s
  {
    bitmap stmts;
+   enum partition_kind kind;
+   /* Main statement a kind != PKIND_NORMAL partition is about.  */
+   gimple main_stmt;
  } *partition_t;
  
  DEF_VEC_P (partition_t);
*************** partition_alloc (bitmap stmts)
*** 67,72 ****
--- 72,78 ----
  {
    partition_t partition = XCNEW (struct partition_s);
    partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
+   partition->kind = PKIND_NORMAL;
    return partition;
  }
  
*************** partition_free (partition_t partition)
*** 79,84 ****
--- 85,97 ----
    free (partition);
  }
  
+ /* Returns true if the partition can be generated as a builtin.  */
+ 
+ static bool
+ partition_builtin_p (partition_t partition)
+ {
+   return partition->kind != PKIND_NORMAL;
+ }
  
  /* If bit I is not set, it means that this node represents an
     operation that has already been performed, and that should not be
*************** copy_loop_before (struct loop *loop)
*** 183,198 ****
    struct loop *res;
    edge preheader = loop_preheader_edge (loop);
  
-   if (!single_exit (loop))
-     return NULL;
- 
    initialize_original_copy_tables ();
    res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
    free_original_copy_tables ();
  
-   if (!res)
-     return NULL;
- 
    update_phis_for_loop_copy (loop, res);
    rename_variables_in_loop (res);
  
--- 196,206 ----
    struct loop *res;
    edge preheader = loop_preheader_edge (loop);
  
    initialize_original_copy_tables ();
    res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
+   gcc_assert (res != NULL);
    free_original_copy_tables ();
  
    update_phis_for_loop_copy (loop, res);
    rename_variables_in_loop (res);
  
*************** create_bb_after_loop (struct loop *loop)
*** 216,225 ****
     copied when COPY_P is true.  All the statements not flagged in the
     PARTITION bitmap are removed from the loop or from its copy.  The
     statements are indexed in sequence inside a basic block, and the
!    basic blocks of a loop are taken in dom order.  Returns true when
!    the code gen succeeded. */
  
! static bool
  generate_loops_for_partition (struct loop *loop, partition_t partition,
  			      bool copy_p)
  {
--- 224,232 ----
     copied when COPY_P is true.  All the statements not flagged in the
     PARTITION bitmap are removed from the loop or from its copy.  The
     statements are indexed in sequence inside a basic block, and the
!    basic blocks of a loop are taken in dom order.  */
  
! static void
  generate_loops_for_partition (struct loop *loop, partition_t partition,
  			      bool copy_p)
  {
*************** generate_loops_for_partition (struct loo
*** 230,242 ****
    if (copy_p)
      {
        loop = copy_loop_before (loop);
        create_preheader (loop, CP_SIMPLE_PREHEADERS);
        create_bb_after_loop (loop);
      }
  
-   if (loop == NULL)
-     return false;
- 
    /* Remove stmts not in the PARTITION bitmap.  The order in which we
       visit the phi nodes and the statements is exactly as in
       stmts_from_loop.  */
--- 237,247 ----
    if (copy_p)
      {
        loop = copy_loop_before (loop);
+       gcc_assert (loop != NULL);
        create_preheader (loop, CP_SIMPLE_PREHEADERS);
        create_bb_after_loop (loop);
      }
  
    /* Remove stmts not in the PARTITION bitmap.  The order in which we
       visit the phi nodes and the statements is exactly as in
       stmts_from_loop.  */
*************** generate_loops_for_partition (struct loo
*** 293,299 ****
      }
  
    free (bbs);
-   return true;
  }
  
  /* Build the size argument for a memset call.  */
--- 298,303 ----
*************** build_size_arg_loc (location_t loc, tree
*** 313,331 ****
    return x;
  }
  
! /* Generate a call to memset.  Return true when the operation succeeded.  */
  
  static void
! generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
! 		      gimple_stmt_iterator bsi)
  {
!   tree addr_base, nb_bytes;
!   bool res = false;
    gimple_seq stmt_list = NULL, stmts;
-   gimple fn_call;
-   tree mem, fn;
    struct data_reference *dr = XCNEW (struct data_reference);
!   location_t loc = gimple_location (stmt);
  
    DR_STMT (dr) = stmt;
    DR_REF (dr) = op0;
--- 317,345 ----
    return x;
  }
  
! /* Generate a call to memset for PARTITION in LOOP.  */
  
  static void
! generate_memset_builtin (struct loop *loop, partition_t partition)
  {
!   gimple_stmt_iterator gsi;
!   gimple stmt, fn_call;
!   tree op0, nb_iter, mem, fn, addr_base, nb_bytes;
    gimple_seq stmt_list = NULL, stmts;
    struct data_reference *dr = XCNEW (struct data_reference);
!   location_t loc;
!   bool res;
! 
!   stmt = partition->main_stmt;
!   loc = gimple_location (stmt);
!   op0 = gimple_assign_lhs (stmt);
!   if (gimple_bb (stmt) == loop->latch)
!     nb_iter = number_of_latch_executions (loop);
!   else
!     nb_iter = number_of_exit_cond_executions (loop);
! 
!   /* The new statements will be placed before LOOP.  */
!   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
  
    DR_STMT (dr) = stmt;
    DR_REF (dr) = op0;
*************** generate_memset_zero (gimple stmt, tree
*** 353,359 ****
    fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
    fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
    gimple_seq_add_stmt (&stmt_list, fn_call);
!   gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "generated memset zero\n");
--- 367,373 ----
    fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
    fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
    gimple_seq_add_stmt (&stmt_list, fn_call);
!   gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "generated memset zero\n");
*************** generate_memset_zero (gimple stmt, tree
*** 361,473 ****
    free_data_ref (dr);
  }
  
! /* Tries to generate a builtin function for the instructions of LOOP
!    pointed to by the bits set in PARTITION.  Returns true when the
!    operation succeeded.  */
  
! static bool
! generate_builtin (struct loop *loop, partition_t partition, bool copy_p)
  {
!   bool res = false;
!   unsigned i, x = 0;
    basic_block *bbs;
!   gimple write = NULL;
!   gimple_stmt_iterator bsi;
!   tree nb_iter = number_of_exit_cond_executions (loop);
! 
!   if (!nb_iter || nb_iter == chrec_dont_know)
!     return false;
  
    bbs = get_loop_body_in_dom_order (loop);
  
!   for (i = 0; i < loop->num_nodes; i++)
!     {
!       basic_block bb = bbs[i];
! 
!       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
! 	x++;
! 
!       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
! 	{
! 	  gimple stmt = gsi_stmt (bsi);
! 
! 	  if (gimple_code (stmt) == GIMPLE_LABEL
! 	      || is_gimple_debug (stmt))
! 	    continue;
! 
! 	  if (!bitmap_bit_p (partition->stmts, x++))
! 	    continue;
! 
! 	  /* If the stmt has uses outside of the loop fail.
! 	     ???  If the stmt is generated in another partition that
! 	     is not created as builtin we can ignore this.  */
! 	  if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
! 	    {
! 	      if (dump_file && (dump_flags & TDF_DETAILS))
! 		fprintf (dump_file, "not generating builtin, partition has "
! 			 "scalar uses outside of the loop\n");
! 	      goto end;
! 	    }
! 
! 	  if (is_gimple_assign (stmt)
! 	      && !is_gimple_reg (gimple_assign_lhs (stmt)))
! 	    {
! 	      /* Don't generate the builtins when there are more than
! 		 one memory write.  */
! 	      if (write != NULL)
! 		goto end;
! 
! 	      write = stmt;
! 	      if (bb == loop->latch)
! 		nb_iter = number_of_latch_executions (loop);
! 	    }
! 	}
!     }
! 
!   if (!stmt_with_adjacent_zero_store_dr_p (write))
!     goto end;
! 
!   /* The new statements will be placed before LOOP.  */
!   bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
!   generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
!   res = true;
! 
!   /* If this is the last partition for which we generate code, we have
!      to destroy the loop.  */
!   if (!copy_p)
!     {
!       unsigned nbbs = loop->num_nodes;
!       edge exit = single_exit (loop);
!       basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
!       redirect_edge_pred (exit, src);
!       exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
!       exit->flags |= EDGE_FALLTHRU;
!       cancel_loop_tree (loop);
!       rescan_loop_exit (exit, false, true);
! 
!       for (i = 0; i < nbbs; i++)
! 	delete_basic_block (bbs[i]);
  
!       set_immediate_dominator (CDI_DOMINATORS, dest,
! 			       recompute_dominator (CDI_DOMINATORS, dest));
!     }
! 
!  end:
    free (bbs);
!   return res;
  }
  
! /* Generates code for PARTITION.  For simple loops, this function can
!    generate a built-in.  */
  
! static bool
  generate_code_for_partition (struct loop *loop, partition_t partition,
  			     bool copy_p)
  {
!   if (generate_builtin (loop, partition, copy_p))
!     return true;
  
!   return generate_loops_for_partition (loop, partition, copy_p);
  }
  
  
--- 375,430 ----
    free_data_ref (dr);
  }
  
! /* Remove and destroy the loop LOOP.  */
  
! static void
! destroy_loop (struct loop *loop)
  {
!   unsigned nbbs = loop->num_nodes;
!   edge exit = single_exit (loop);
!   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
    basic_block *bbs;
!   unsigned i;
  
    bbs = get_loop_body_in_dom_order (loop);
  
!   redirect_edge_pred (exit, src);
!   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
!   exit->flags |= EDGE_FALLTHRU;
!   cancel_loop_tree (loop);
!   rescan_loop_exit (exit, false, true);
  
!   for (i = 0; i < nbbs; i++)
!     delete_basic_block (bbs[i]);
    free (bbs);
! 
!   set_immediate_dominator (CDI_DOMINATORS, dest,
! 			   recompute_dominator (CDI_DOMINATORS, dest));
  }
  
! /* Generates code for PARTITION.  */
  
! static void
  generate_code_for_partition (struct loop *loop, partition_t partition,
  			     bool copy_p)
  {
!   switch (partition->kind)
!     {
!     case PKIND_MEMSET:
!       generate_memset_builtin (loop, partition);
!       /* If this is the last partition for which we generate code, we have
! 	 to destroy the loop.  */
!       if (!copy_p)
! 	destroy_loop (loop);
!       break;
! 
!     case PKIND_NORMAL:
!       generate_loops_for_partition (loop, partition, copy_p);
!       break;
  
!     default:
!       gcc_unreachable ();
!     }
  }
  
  
*************** rdg_build_components (struct graph *rdg,
*** 828,859 ****
    BITMAP_FREE (saved_components);
  }
  
! /* Returns true when it is possible to generate a builtin pattern for
!    the PARTITION of RDG.  For the moment we detect only the memset
!    zero pattern.  */
  
! static bool
! can_generate_builtin (struct graph *rdg, partition_t partition)
  {
-   unsigned i;
    bitmap_iterator bi;
!   int nb_reads = 0;
!   int nb_writes = 0;
!   int stores_zero = 0;
  
    EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
!     if (RDG_MEM_READS_STMT (rdg, i))
!       nb_reads++;
!     else if (RDG_MEM_WRITE_STMT (rdg, i))
!       {
! 	gimple stmt = RDG_STMT (rdg, i);
! 	nb_writes++;
! 	if (!gimple_has_volatile_ops (stmt)
! 	    && stmt_with_adjacent_zero_store_dr_p (stmt))
! 	  stores_zero++;
!       }
  
!   return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
  }
  
  /* Returns true when PARTITION1 and PARTITION2 have similar memory
--- 785,855 ----
    BITMAP_FREE (saved_components);
  }
  
! /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
!    For the moment we detect only the memset zero pattern.  */
  
! static void
! classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
  {
    bitmap_iterator bi;
!   unsigned i;
!   tree nb_iter;
! 
!   partition->kind = PKIND_NORMAL;
!   partition->main_stmt = NULL;
! 
!   /* Perform general partition disqualification for builtins.  */
!   nb_iter = number_of_exit_cond_executions (loop);
!   if (!nb_iter || nb_iter == chrec_dont_know)
!     return;
  
    EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
!     {
!       gimple stmt = RDG_STMT (rdg, i);
! 
!       if (gimple_has_volatile_ops (stmt))
! 	return;
  
!       /* If the stmt has uses outside of the loop fail.
! 	 ???  If the stmt is generated in another partition that
! 	 is not created as builtin we can ignore this.  */
!       if (gimple_code (stmt) != GIMPLE_PHI
! 	  && stmt_has_scalar_dependences_outside_loop (loop, stmt))
! 	{
! 	  if (dump_file && (dump_flags & TDF_DETAILS))
! 	    fprintf (dump_file, "not generating builtin, partition has "
! 		     "scalar uses outside of the loop\n");
! 	  return;
! 	}
!     }
! 
!   /* Detect memset.  */
!   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
!     {
!       gimple stmt = RDG_STMT (rdg, i);
! 
!       if (gimple_code (stmt) == GIMPLE_PHI)
! 	continue;
! 
!       /* Any scalar stmts are ok.  */
!       if (!gimple_vuse (stmt))
! 	continue;
! 
!       /* Exactly one store.  */
!       if (gimple_assign_single_p (stmt)
! 	  && !is_gimple_reg (gimple_assign_lhs (stmt)))
! 	{
! 	  if (partition->main_stmt != NULL)
! 	    return;
! 	  partition->main_stmt = stmt;
! 	}
!       else
! 	return;
!     }
! 
!   if (partition->main_stmt != NULL
!       && stmt_with_adjacent_zero_store_dr_p (partition->main_stmt))
!     partition->kind = PKIND_MEMSET;
  }
  
  /* Returns true when PARTITION1 and PARTITION2 have similar memory
*************** fuse_partitions_with_similar_memory_acce
*** 891,900 ****
    partition_t partition1, partition2;
  
    FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
!     if (!can_generate_builtin (rdg, partition1))
        FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
  	if (p1 != p2
! 	    && !can_generate_builtin (rdg, partition2)
  	    && similar_memory_accesses (rdg, partition1, partition2))
  	  {
  	    bitmap_ior_into (partition1->stmts, partition2->stmts);
--- 887,896 ----
    partition_t partition1, partition2;
  
    FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
!     if (!partition_builtin_p (partition1))
        FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
  	if (p1 != p2
! 	    && !partition_builtin_p (partition2)
  	    && similar_memory_accesses (rdg, partition1, partition2))
  	  {
  	    bitmap_ior_into (partition1->stmts, partition2->stmts);
*************** rdg_build_partitions (struct graph *rdg,
*** 971,978 ****
      VEC_safe_push (partition_t, heap, *partitions, partition);
    else
      partition_free (partition);
- 
-   fuse_partitions_with_similar_memory_accesses (rdg, partitions);
  }
  
  /* Dump to FILE the PARTITIONS.  */
--- 967,972 ----
*************** ldist_gen (struct loop *loop, struct gra
*** 1101,1111 ****
  			processed);
    BITMAP_FREE (processed);
  
    nbp = VEC_length (partition_t, partitions);
    if (nbp == 0
        || (nbp == 1
! 	  && !can_generate_builtin (rdg,
! 				    VEC_index (partition_t, partitions, 0)))
        || (nbp > 1
  	  && partition_contains_all_rw (rdg, partitions)))
      goto ldist_done;
--- 1095,1109 ----
  			processed);
    BITMAP_FREE (processed);
  
+   FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
+     classify_partition (loop, rdg, partition);
+ 
+   fuse_partitions_with_similar_memory_accesses (rdg, &partitions);
+ 
    nbp = VEC_length (partition_t, partitions);
    if (nbp == 0
        || (nbp == 1
! 	  && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
        || (nbp > 1
  	  && partition_contains_all_rw (rdg, partitions)))
      goto ldist_done;
*************** ldist_gen (struct loop *loop, struct gra
*** 1114,1121 ****
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     if (!generate_code_for_partition (loop, partition, i < nbp - 1))
!       goto ldist_done;
  
    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
    mark_sym_for_renaming (gimple_vop (cfun));
--- 1112,1118 ----
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     generate_code_for_partition (loop, partition, i < nbp - 1);
  
    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
    mark_sym_for_renaming (gimple_vop (cfun));


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]