This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][parloops] Modify parloops to allow code generation on SESE regions


Hi,

This patch modifies parloops to make it possible to generate code not
only for loops, but also for SESE regions. It changes the interface of
the functions to take ENTRY and EXIT edges of a SESE region of the CFG
instead of a loop for all but the top-level function, gen_parallel_loop.

This modification is necessary for the automatic streamization pass
(subsequent patches).

Trunk plus patch have been bootstrapped and tested on amd64-linux.

ChangeLog:

2008-04-22   Antoniu Pop  <antoniu.pop@gmail.com>
             Sebastian Pop  <sebastian.pop@amd.com>

	* tree-parloops.c (take_address_of, eliminate_local_variables_1,
	eliminate_local_variables_stmt, eliminate_local_variables,
	separate_decls_in_loop_name, separate_decls_in_loop_stmt,
	separate_decls_in_loop, gen_parallel_loop): Make them work on a region
	of code delimited by two edges in the CFG.
	(separate_decls_in_loop_name): Renamed separate_decls_in_region_name.
	(separate_decls_in_loop_stmt): Renamed separate_decls_in_region_stmt.
	(separate_decls_in_loop): Renamed separate_decls_in_region.  Isolate
	the case of parallelisation of reductions.

	* basic_block.h (recalculate_dominance_info): Declared.
	* dominance.c (recalculate_dominance_info): New.
	* tree-flow.h (gather_blocks_in_sese_region_exclusive,
	bb_inside_sese_region_p, expr_invariant_in_region_p): Declared.
	* tree-ssa-loop-ivopts.c (expr_invariant_in_region_p): New.
	* tree-cfg.c (gather_blocks_in_sese_region_exclusive,
	bb_inside_sese_region_p): New.


---------- Forwarded message ----------
From:  <apop@gcc12.fsffrance.org>
Date: Wed, Apr 23, 2008 at 4:15 PM
Subject: [regtest] Results for 2008_04_23_08_30_05_parloops_sese.diff
on x86_64-unknown-linux-gnu
To: antoniu.pop@gmail.com


Checker: (2008_04_23_14_15_33): (cat /home/apop/results/testing/patched/report
 there are no regressions with your patch.
 Checker: (2008_04_23_14_15_33): tac)
 Checker: (2008_04_23_14_15_33): FAILs with patched version:
 Checker: (2008_04_23_14_15_33): (cat /home/apop/results/testing/patched/failed
 gcc.sum gcc.dg/vect/vect-vfa-slp.c
 libstdc++.sum 22_locale/time_get/get_date/wchar_t/4.cc
 Checker: (2008_04_23_14_15_33): tac)
 Checker: (2008_04_23_14_15_33): FAILs with pristine version:
 Checker: (2008_04_23_14_15_33): (cat /home/apop/results//trunk/134583/failed
 gcc.sum gcc.dg/vect/vect-vfa-slp.c
 libstdc++.sum 22_locale/time_get/get_date/wchar_t/4.cc
 Checker: (2008_04_23_14_15_33): tac)
 Checker: (2008_04_23_14_15_33): The files used for the validation of
your patch are stored in
/home/apop/results/patched/2008_04_23_14_15_33 on the tester machine.
email:antoniu.pop@gmail.com
branch:trunk
revision:HEAD
configure:
make:
check:

Index: gcc/dominance.c
===================================================================
--- gcc/dominance.c	(revision 134558)
+++ gcc/dominance.c	(working copy)
@@ -693,6 +693,15 @@ free_dominance_info (enum cdi_direction 
   dom_computed[dir_index] = DOM_NONE;
 }
 
+/* Force the re-evaluation of the dominance information for the
+   direction DIR.  */
+void
+recalculate_dominance_info (enum cdi_direction dir)
+{
+  free_dominance_info (dir);
+  calculate_dominance_info (dir);
+}
+
 /* Return the immediate dominator of basic block BB.  */
 basic_block
 get_immediate_dominator (enum cdi_direction dir, basic_block bb)
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c	(revision 134558)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -1258,6 +1258,41 @@ find_interesting_uses_cond (struct ivopt
   record_use (data, cond_p, civ, stmt, USE_COMPARE);
 }
 
+/* Returns true if expression EXPR is not defined between ENTRY and
+   EXIT, i.e. if all its operands are defined outside of the region.  */
+
+bool
+expr_invariant_in_region_p (edge entry, edge exit, tree expr)
+{
+  basic_block entry_bb = entry->dest;
+  basic_block exit_bb = exit->src;
+  basic_block def_bb;
+  unsigned i, len;
+
+  if (is_gimple_min_invariant (expr))
+    return true;
+
+  if (TREE_CODE (expr) == SSA_NAME)
+    {
+      def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
+      if (def_bb
+	  && bb_inside_sese_region_p (entry_bb, exit_bb, def_bb))
+	return false;
+
+      return true;
+    }
+
+  if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
+    return false;
+
+  len = TREE_OPERAND_LENGTH (expr);
+  for (i = 0; i < len; i++)
+    if (!expr_invariant_in_region_p (entry, exit, TREE_OPERAND (expr, i)))
+      return false;
+
+  return true;
+}
+
 /* Returns true if expression EXPR is obviously invariant in LOOP,
    i.e. if all its operands are defined outside of the LOOP.  LOOP
    should not be the function body.  */
Index: gcc/tree-parloops.c
===================================================================
--- gcc/tree-parloops.c	(revision 134558)
+++ gcc/tree-parloops.c	(working copy)
@@ -455,18 +455,17 @@ loop_has_blocks_with_irreducible_flag (s
 }
 
 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
-   The assignment statement is placed before LOOP.  DECL_ADDRESS maps decls
+   The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
    to their addresses that can be reused.  The address of OBJ is known to
    be invariant in the whole function.  */
 
 static tree
-take_address_of (tree obj, tree type, struct loop *loop, htab_t decl_address)
+take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
 {
   int uid;
   void **dslot;
   struct int_tree_map ielt, *nielt;
   tree *var_p, name, bvar, stmt, addr;
-  edge entry = loop_preheader_edge (loop);
 
   /* Since the address of OBJ is invariant, the trees may be shared.
      Avoid rewriting unrelated parts of the code.  */
@@ -570,15 +569,16 @@ initialize_reductions (void **slot, void
 
 struct elv_data
 {
-  struct loop *loop;
+  edge entry;
   htab_t decl_address;
   bool changed;
 };
 
-/* Eliminates references to local variables in *TP out of LOOP.  DECL_ADDRESS
-   contains addresses of the references that had their address taken already.
-   If the expression is changed, CHANGED is set to true.  Callback for
-   walk_tree.  */
+/* Eliminates references to local variables in *TP out of the single
+   entry single exit region starting at DTA->ENTRY.
+   DECL_ADDRESS contains addresses of the references that had their
+   address taken already.  If the expression is changed, CHANGED is
+   set to true.  Callback for walk_tree.  */
 
 static tree
 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
@@ -595,7 +595,7 @@ eliminate_local_variables_1 (tree *tp, i
 
       type = TREE_TYPE (t);
       addr_type = build_pointer_type (type);
-      addr = take_address_of (t, addr_type, dta->loop, dta->decl_address);
+      addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
       *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
 
       dta->changed = true;
@@ -625,7 +625,7 @@ eliminate_local_variables_1 (tree *tp, i
 	return NULL_TREE;
 
       addr_type = TREE_TYPE (t);
-      addr = take_address_of (obj, addr_type, dta->loop, dta->decl_address);
+      addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
       *tp = addr;
 
       dta->changed = true;
@@ -638,17 +638,18 @@ eliminate_local_variables_1 (tree *tp, i
   return NULL_TREE;
 }
 
-/* Moves the references to local variables in STMT from LOOP.  DECL_ADDRESS
-   contains addresses for the references for that we have already taken
-   them.  */
+/* Moves the references to local variables in STMT out of the single
+   entry single exit region starting at ENTRY.  DECL_ADDRESS contains
+   addresses of the references that had their address taken
+   already.  */
 
 static void
-eliminate_local_variables_stmt (struct loop *loop, tree stmt,
+eliminate_local_variables_stmt (edge entry, tree stmt,
 				htab_t decl_address)
 {
   struct elv_data dta;
 
-  dta.loop = loop;
+  dta.entry = entry;
   dta.decl_address = decl_address;
   dta.changed = false;
 
@@ -658,31 +659,39 @@ eliminate_local_variables_stmt (struct l
     update_stmt (stmt);
 }
 
-/* Eliminates the references to local variables from LOOP.  
+/* Eliminates the references to local variables from the single entry
+   single exit region between the ENTRY and EXIT edges.
+  
    This includes:
    1) Taking address of a local variable -- these are moved out of the 
-   loop (and temporary variable is created to hold the address if 
+   region (and temporary variable is created to hold the address if 
    necessary).
+
    2) Dereferencing a local variable -- these are replaced with indirect
    references.  */
 
 static void
-eliminate_local_variables (struct loop *loop)
+eliminate_local_variables (edge entry, edge exit)
 {
-  basic_block bb, *body = get_loop_body (loop);
+  basic_block bb;
+  VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
   unsigned i;
   block_stmt_iterator bsi;
   htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
 				     free);
+  basic_block entry_bb = entry->src;
+  basic_block exit_bb = exit->dest;
 
-  /* Find and rename the ssa names defined outside of loop.  */
-  for (i = 0; i < loop->num_nodes; i++)
-    {
-      bb = body[i];
-
-      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
-	eliminate_local_variables_stmt (loop, bsi_stmt (bsi), decl_address);
-    }
+  recalculate_dominance_info (CDI_POST_DOMINATORS);
+  gcc_assert (dominated_by_p (CDI_DOMINATORS, exit_bb, entry_bb)
+	      && dominated_by_p (CDI_POST_DOMINATORS, entry_bb, exit_bb));
+
+  gather_blocks_in_sese_region_exclusive (entry_bb, exit_bb, &body);
+
+  for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
+    for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+      eliminate_local_variables_stmt (entry, bsi_stmt (bsi),
+				      decl_address);
 
   htab_delete (decl_address);
 }
@@ -695,9 +704,9 @@ eliminate_local_variables (struct loop *
    duplicated, storing the copies in DECL_COPIES.  */
 
 static tree
-separate_decls_in_loop_name (tree name,
-			     htab_t name_copies, htab_t decl_copies,
-			     bool copy_name_p)
+separate_decls_in_region_name (tree name,
+			       htab_t name_copies, htab_t decl_copies,
+			       bool copy_name_p)
 {
   tree copy, var, var_copy;
   unsigned idx, uid, nuid;
@@ -762,15 +771,16 @@ separate_decls_in_loop_name (tree name,
   return copy;
 }
 
-/* Finds the ssa names used in STMT that are defined outside of LOOP and
-   replaces such ssa names with their duplicates.  The duplicates are stored to
-   NAME_COPIES.  Base decls of all ssa names used in STMT
-   (including those defined in LOOP) are replaced with the new temporary
-   variables; the replacement decls are stored in DECL_COPIES.  */
+/* Finds the ssa names used in STMT that are defined outside the
+   region between ENTRY and EXIT and replaces such ssa names with
+   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
+   decls of all ssa names used in STMT (including those defined in
+   LOOP) are replaced with the new temporary variables; the
+   replacement decls are stored in DECL_COPIES.  */
 
 static void
-separate_decls_in_loop_stmt (struct loop *loop, tree stmt,
-			     htab_t name_copies, htab_t decl_copies)
+separate_decls_in_region_stmt (edge entry, edge exit, tree stmt,
+			       htab_t name_copies, htab_t decl_copies)
 {
   use_operand_p use;
   def_operand_p def;
@@ -784,8 +794,8 @@ separate_decls_in_loop_stmt (struct loop
   {
     name = DEF_FROM_PTR (def);
     gcc_assert (TREE_CODE (name) == SSA_NAME);
-    copy = separate_decls_in_loop_name (name, name_copies, decl_copies,
-					false);
+    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
+					  false);
     gcc_assert (copy == name);
   }
 
@@ -795,9 +805,9 @@ separate_decls_in_loop_stmt (struct loop
     if (TREE_CODE (name) != SSA_NAME)
       continue;
 
-    copy_name_p = expr_invariant_in_loop_p (loop, name);
-    copy = separate_decls_in_loop_name (name, name_copies, decl_copies,
-					copy_name_p);
+    copy_name_p = expr_invariant_in_region_p (entry, exit, name);
+    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
+					  copy_name_p);
     SET_USE (use, copy);
   }
 }
@@ -1118,36 +1128,46 @@ create_loads_and_stores_for_name (void *
    in LOOP.  */
 
 static void
-separate_decls_in_loop (struct loop *loop, htab_t reduction_list, 
-			tree * arg_struct, tree * new_arg_struct, 
-			struct clsn_data *ld_st_data)
+separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
+			  tree *arg_struct, tree *new_arg_struct, 
+			  struct clsn_data *ld_st_data)
 
 {
-  basic_block bb1 = split_edge (loop_preheader_edge (loop));
+  basic_block bb1 = split_edge (entry);
   basic_block bb0 = single_pred (bb1);
   htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
 				    name_to_copy_elt_eq, free);
   htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
 				    free);
-  basic_block bb, *body = get_loop_body (loop);
   unsigned i;
   tree phi, type, type_name, nvar;
   block_stmt_iterator bsi;
   struct clsn_data clsn_data;
+  VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
+  basic_block bb;
+  basic_block entry_bb = bb1;
+  basic_block exit_bb = exit->dest;
 
-  /* Find and rename the ssa names defined outside of loop.  */
-  for (i = 0; i < loop->num_nodes; i++)
-    {
-      bb = body[i];
+  entry = single_succ_edge(entry_bb);
+
+  recalculate_dominance_info (CDI_POST_DOMINATORS);
+  gcc_assert (dominated_by_p (CDI_DOMINATORS, exit_bb, entry_bb)
+	      && dominated_by_p (CDI_POST_DOMINATORS, entry_bb, exit_bb));
 
+  gather_blocks_in_sese_region_exclusive (entry_bb, exit_bb, &body);
+
+  for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
+    {
       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
-	separate_decls_in_loop_stmt (loop, phi, name_copies, decl_copies);
+	separate_decls_in_region_stmt (entry, exit, phi, name_copies,
+				       decl_copies);
 
       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
-	separate_decls_in_loop_stmt (loop, bsi_stmt (bsi), name_copies,
-				     decl_copies);
+	separate_decls_in_region_stmt (entry, exit, bsi_stmt (bsi),
+				       name_copies, decl_copies);
     }
-  free (body);
+
+  VEC_free (basic_block, heap, body);
 
   if (htab_elements (name_copies) == 0)
     {
@@ -1156,7 +1176,7 @@ separate_decls_in_loop (struct loop *loo
       *arg_struct = NULL;
       *new_arg_struct = NULL;
     }
-  else
+  else if (reduction_list)
     {
       /* Create the type for the structure to store the ssa names to.  */
       type = lang_hooks.types.make_type (RECORD_TYPE);
@@ -1195,11 +1215,37 @@ separate_decls_in_loop (struct loop *loo
 	  htab_traverse (reduction_list, create_stores_for_reduction,
                         ld_st_data); 
 	  clsn_data.load = make_ssa_name (nvar, NULL_TREE);
-	  clsn_data.load_bb = single_dom_exit (loop)->dest;
+	  clsn_data.load_bb = exit->dest;
 	  clsn_data.store = ld_st_data->store;
 	  create_final_loads_for_reduction (reduction_list, &clsn_data);
 	}
     }
+  else
+    {
+      /* Create the type for the structure to store the ssa names to.  */
+      type = lang_hooks.types.make_type (RECORD_TYPE);
+      type_name = build_decl (TYPE_DECL, create_tmp_var_name (".paral_data"),
+			      type);
+      TYPE_NAME (type) = type_name;
+
+      htab_traverse (name_copies, add_field_for_name, type);
+      layout_type (type);
+ 
+      /* Create the loads and stores.  */
+      *arg_struct = create_tmp_var (type, ".paral_data_store");
+      add_referenced_var (*arg_struct);
+      nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
+      add_referenced_var (nvar);
+      *new_arg_struct = make_ssa_name (nvar, NULL_TREE);
+
+      ld_st_data->store = *arg_struct;
+      ld_st_data->load = *new_arg_struct;
+      ld_st_data->store_bb = bb0;
+      ld_st_data->load_bb = bb1;
+
+      htab_traverse (name_copies, create_loads_and_stores_for_name,
+		     ld_st_data);
+    }
 
   htab_delete (decl_copies);
   htab_delete (name_copies);
@@ -1605,6 +1651,7 @@ gen_parallel_loop (struct loop *loop, ht
   tree many_iterations_cond, type, nit;
   tree stmts, arg_struct, new_arg_struct;
   basic_block parallel_head;
+  edge entry, exit;
   struct clsn_data clsn_data;
   unsigned prob;
 
@@ -1698,6 +1745,7 @@ gen_parallel_loop (struct loop *loop, ht
 
   /* Base all the induction variables in LOOP on a single control one.  */
   canonicalize_loop_ivs (loop, reduction_list, nit);
+  update_ssa (TODO_update_ssa);
 
   /* Ensure that the exit condition is the first statement in the loop.  */
   transform_to_exit_first_loop (loop, reduction_list, nit);
@@ -1709,11 +1757,15 @@ gen_parallel_loop (struct loop *loop, ht
     htab_traverse (reduction_list, initialize_reductions, loop);
 
   /* Eliminate the references to local variables from the loop.  */
-  eliminate_local_variables (loop);
+  gcc_assert (single_exit (loop));
+  entry = loop_preheader_edge (loop);
+  exit = single_dom_exit (loop);
 
+  eliminate_local_variables (entry, exit);
   /* In the old loop, move all variables non-local to the loop to a structure
      and back, and create separate decls for the variables used in loop.  */
-  separate_decls_in_loop (loop, reduction_list, &arg_struct, &new_arg_struct, &clsn_data);
+  separate_decls_in_region (entry, exit, reduction_list, &arg_struct, 
+			    &new_arg_struct, &clsn_data);
 
   /* Create the parallel constructs.  */
   parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
Index: gcc/tree-flow.h
===================================================================
--- gcc/tree-flow.h	(revision 134558)
+++ gcc/tree-flow.h	(working copy)
@@ -771,6 +771,10 @@ extern bool tree_duplicate_sese_region (
 					basic_block *);
 extern bool tree_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
 				      basic_block *);
+extern void gather_blocks_in_sese_region_exclusive (basic_block, basic_block,
+						    VEC(basic_block,heap) **);
+extern bool bb_inside_sese_region_p (basic_block, basic_block, 
+				     const_basic_block);
 extern void add_phi_args_after_copy_bb (basic_block);
 extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
 extern bool tree_purge_dead_abnormal_call_edges (basic_block);
@@ -1146,6 +1150,7 @@ extern void tree_check_data_deps (void);
 
 /* In tree-ssa-loop-ivopts.c  */
 bool expr_invariant_in_loop_p (struct loop *, tree);
+bool expr_invariant_in_region_p (edge, edge, tree);
 bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode);
 unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode);
 
Index: gcc/basic-block.h
===================================================================
--- gcc/basic-block.h	(revision 134558)
+++ gcc/basic-block.h	(working copy)
@@ -905,6 +905,7 @@ extern void set_dom_info_availability (e
 extern bool dom_info_available_p (enum cdi_direction);
 extern void calculate_dominance_info (enum cdi_direction);
 extern void free_dominance_info (enum cdi_direction);
+extern void recalculate_dominance_info (enum cdi_direction);
 extern basic_block nearest_common_dominator (enum cdi_direction,
 					     basic_block, basic_block);
 extern basic_block nearest_common_dominator_for_set (enum cdi_direction,
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(revision 134558)
+++ gcc/tree-cfg.c	(working copy)
@@ -5527,6 +5527,59 @@ gather_blocks_in_sese_region (basic_bloc
     }
 }
 
+/* Add all the blocks dominated by ENTRY to the array BBS_P.  Stop
+   adding blocks when the dominator traversal reaches EXIT.  This
+   function silently assumes that ENTRY strictly dominates EXIT.  This
+   function differs from gather_blocks_in_sese_region in that it will
+   not include either ENTRY or EXIT in the results.  */
+
+void
+gather_blocks_in_sese_region_exclusive (basic_block entry, basic_block exit,
+					VEC(basic_block,heap) **bbs_p)
+{
+  basic_block son;
+
+  for (son = first_dom_son (CDI_DOMINATORS, entry);
+       son;
+       son = next_dom_son (CDI_DOMINATORS, son))
+    {
+      if (son != exit) 
+	{
+	  VEC_safe_push (basic_block, heap, *bbs_p, son);
+	  gather_blocks_in_sese_region_exclusive (son, exit, bbs_p);
+	}
+    }
+}
+
+/* Return nonzero if basic block BB belongs to the SESE region between
+   ENTRY and EXIT.  */
+bool
+bb_inside_sese_region_p (basic_block entry, basic_block exit, 
+			 const_basic_block bb)
+{
+  basic_block son;
+
+  if (bb == ENTRY_BLOCK_PTR || bb == EXIT_BLOCK_PTR)
+    return false;
+
+  if (bb == entry)
+    return true;
+
+  for (son = first_dom_son (CDI_DOMINATORS, entry);
+       son;
+       son = next_dom_son (CDI_DOMINATORS, son))
+    {
+      if (bb == son)
+	return true;
+
+      if (son != exit
+	  && bb_inside_sese_region_p (son, exit, bb))
+	return true;
+    }
+  return false;
+}
+
+
 /* Replaces *TP with a duplicate (belonging to function TO_CONTEXT).
    The duplicates are recorded in VARS_MAP.  */
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]