This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PR78972, 80283: Extend TER with scheduling


If you look at certain testcases like the one for PR78972, you'll find that the code generated by TER is maximally pessimal in terms of register pressure: we can generate a large number of intermediate results, and defer all the statements that use them up.

Another observation one can make is that doing TER doesn't actually buy us anything for a large subset of the values it finds: only a handful of places in the expand phase actually make use of the information. In cases where we know we aren't going to be making use of it, we could move expressions freely without doing TER-style substitution.

This patch uses the information collected by TER about the moveability of statements and performs a mini scheduling pass with the aim of reducing register pressure. The heuristic is fairly simple: something that consumes more values than it produces is preferred. This could be tuned further, but it already produces pretty good results: for the 78972 testcase, the stack size is reduced from 2448 bytes to 288, and for PR80283, the stackframe of 496 bytes vanishes with the pass enabled.

In terms of benchmarks I've run SPEC a few times, and the last set of results showed not much of a change. Getting reproducible results has been tricky but all runs I made have been within 0%-1% improvement.

In this patch, the changed behaviour is gated with a -fschedule-ter option which is off by default; with that default it bootstraps and tests without regressions. The compiler also bootstraps with the option enabled, in that case there are some optimization issues. I'll address some of them with two followup patches, the remaining failures are:
 * a handful of guality/PR43077.c failures
   Debug insn generation is somewhat changed, and the peephole2 pass
   drops one of them on the floor.
 * three target/i386/bmi-* tests fail. These expect the combiner to
   build certain instruction patterns, and that turns out to be a
   little fragile. It would be nice to be able to use match.pd to
   produce target-specific patterns during expand.

Thoughts? Ok to apply?


Bernd
	PR middle-end/80283
	PR tree-optimization/78972
	* cfgexpand.c (should_forward_stmt_p, decide_schedule_stmt,
	resolve_deps, rank_ready_insns, add_dep, set_bb_uids, set_prios,
	floating_point_op_p, simple_schedule, expand_one_gimple_stmt):
	New static functions.
	(struct gimple_dep, struct gimple_sched_state): New structs.
	(sched_map): New static variable.
	(expand_gimple_basic_block): Use expand_one_gimple_state, and
	use the results from simple_schedule if flag_schedule_ter.
	* common.opt (fschedule-ter): New option.
	* toplev.c (process_options): Set flag_tree_ter if flag_schedule_ter.
	* doc/invoke.texi (Optimization Options): Add -fschedule-ter.
	(-fschedule-ter): Document.

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 66af699..e1b4898 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -5337,6 +5337,345 @@ expand_debug_locations (void)
   flag_strict_aliasing = save_strict_alias;
 }
 
+/* Determine whether TER decided that a statment should be forwarded.  */
+
+static bool
+should_forward_stmt_p (gimple *stmt)
+{
+  if (!SA.values)
+    return false;
+
+  def_operand_p def_p;
+  def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
+  if (def_p == NULL)
+    return false;
+
+  /* Forward this stmt if it is in the list of
+     replaceable expressions.  */
+  if (bitmap_bit_p (SA.values,
+		    SSA_NAME_VERSION (DEF_FROM_PTR (def_p))))
+    return true;
+
+  return false;
+}
+
+/* Decide whether STMT should be scheduled, or left as a TER replacement.
+   Clear it as a replacement if we decide to schedule it.  */
+
+static bool
+decide_schedule_stmt (gimple *stmt)
+{
+  if (!SA.values)
+    return false;
+
+  if (is_gimple_debug (stmt))
+    return true;
+
+  def_operand_p def_p;
+  def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
+
+  if (def_p == NULL)
+    return false;
+
+  tree def = DEF_FROM_PTR (def_p);
+  use_operand_p use_p;
+  gimple *use_stmt;
+  if (!single_imm_use (def, &use_p, &use_stmt))
+    return false;
+
+  /* There are only a few cases where expansion can actually make use of
+     replaceable SSA names.  Since TER is pessimal for scheduling in
+     general, we try to limit what we do to cases where we know it is
+     beneficial, and schedule insns for the other cases.  */
+  tree_code def_c = ERROR_MARK;
+  if (gimple_code (stmt) == GIMPLE_ASSIGN)
+    def_c = gimple_assign_rhs_code (stmt);
+
+  if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
+    {
+      tree_code c = gimple_assign_rhs_code (use_stmt);
+      if (TREE_CODE_CLASS (c) != tcc_comparison
+	  && c != FMA_EXPR
+	  && c != SSA_NAME
+	  && c != MEM_REF
+	  && c != TARGET_MEM_REF
+	  && def_c != VIEW_CONVERT_EXPR)
+	{
+	  if (bitmap_clear_bit (SA.values, SSA_NAME_VERSION (def)))
+	    return true;
+	}
+    }
+  return false;
+}
+
+struct gimple_dep;
+
+/* Record state for a single statement during simple_sched.  */
+struct gimple_sched_state
+{
+  gimple *stmt;
+  gimple_dep *deps, *back_deps;
+  /* The priority, computed by set_prios.  */
+  int pri;
+  /* A rough estimate of the number of registers dying in this stmt.
+     Statments with a high number should be preferred to limit register
+     pressure.  */
+  int n_dying;
+  /* Number of unresolved dependencies. An insn is put on
+     the ready list when this reaches zero.  */
+  int n_unresolved;
+  /* Number of unresolved forward backwards. Used when calculating
+     priorities.  */
+  int n_prio_deps;
+  /* A guess for the cost of the operation.  */
+  unsigned cost;
+};
+
+/* Describe a dependency between two statments.  */
+struct gimple_dep
+{
+  /* Linked lists chaining them for each of the two statements
+     involved.  */
+  struct gimple_dep *next_to, *next_from;
+  /* The statements involved in the dependency.  */
+  gimple_sched_state *from_insn, *to_insn;
+};
+
+/* An array that holds scheduling information about each of the
+   statements of a basic block to be expanded.  */
+static gimple_sched_state *sched_map;
+
+void
+resolve_deps (auto_vec<gimple_sched_state *> *ready_list, gimple_sched_state *to_resolve)
+{
+  gimple_dep *next = to_resolve->deps;
+  while (next)
+    {
+      gimple_dep *d = next;
+      gimple_sched_state *resolved = d->from_insn;
+      gcc_assert (resolved->n_unresolved > 0);
+      if (--resolved->n_unresolved == 0)
+	ready_list->safe_push (d->from_insn);
+      next = d->next_to;
+      free (d);
+    }
+}
+
+/* Compare elements PVA and PVB to sort the ready list.  Called
+   through qsort.*/
+
+static int
+rank_ready_insns (const void *pva, const void *pvb)
+{
+  const gimple_sched_state *a = *(const gimple_sched_state *const *)pva;
+  const gimple_sched_state *b = *(const gimple_sched_state *const *)pvb;
+
+  if (is_gimple_debug (a->stmt) != is_gimple_debug (b->stmt))
+    return is_gimple_debug (a->stmt) - is_gimple_debug (b->stmt);
+  if (a->n_dying != b->n_dying)
+    return a->n_dying - b->n_dying;
+  if (a->pri != b->pri)
+    return a->pri - b->pri;
+  if (a->cost != b->cost)
+    return a->cost - b->cost;
+  return gimple_uid (a->stmt) - gimple_uid (b->stmt);
+}
+
+/* Record that statement FROM depends on TO.  */
+
+static void
+add_dep (gimple_sched_state *from, gimple_sched_state *to)
+{
+  gimple_dep *d = (gimple_dep *)xmalloc (sizeof (struct gimple_dep));
+  d->from_insn = from;
+  d->to_insn = to;
+  d->next_to = to->deps;
+  to->deps = d;
+  d->next_from = from->back_deps;
+  from->back_deps = d;
+  from->n_unresolved++;
+  to->n_prio_deps++;
+}
+
+/* Call gimple_set_uid for all stmts starting at GSI.  */
+
+static int
+set_bb_uids (gimple_stmt_iterator gsi)
+{
+  int n = 0;
+
+  for (; !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      gimple_set_uid (stmt, n++);
+    }
+  return n;
+}
+
+/* Set priorities for the stmts we want to schedule, of which there
+   are N.  We do this by estimating the length of the chain depending
+   on each statement, and estimating its effect on register
+   pressure.  */
+
+static void
+set_prios (int n)
+{
+  auto_vec<gimple_sched_state *> worklist;
+  for (int i = 0; i < n; i++)
+    {
+      gimple_sched_state *ss = sched_map + i;
+      if (ss->n_prio_deps == 0)
+	{
+	  gcc_assert (ss->deps == NULL);
+	  worklist.safe_push (ss);
+	}
+    }
+  while (!worklist.is_empty ())
+    {
+      gimple_sched_state *this_ss = worklist.pop ();
+      int min_prio = 0;
+      for (gimple_dep *d = this_ss->deps; d; d = d->next_to)
+	{
+	  gimple_sched_state *dep_ss = d->from_insn;
+	  if (!is_gimple_debug (dep_ss->stmt) && dep_ss->pri > min_prio)
+	    min_prio = dep_ss->pri;
+	}
+      this_ss->pri = min_prio + 1;
+      for (gimple_dep *d = this_ss->back_deps; d; d = d->next_from)
+	{
+	  if (--d->to_insn->n_prio_deps == 0)
+	    worklist.safe_push (d->to_insn);
+	}
+    }
+}
+
+/* See if STMT is a floating point operation for scheduling purposes.
+   Not all dependencies are explicit for these and we must treat them
+   with a bit more care.  */
+
+static bool
+floating_point_op_p (gimple *stmt)
+{
+  if (gimple_code (stmt) != GIMPLE_ASSIGN)
+    return false;
+  tree lhs = gimple_assign_lhs (stmt);
+  return FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (lhs)));
+}
+
+/* Before emitting the STMTS for a basic block, of which there are N
+   and which start at START, try to find a better schedule for them to
+   somewhat reduce register pressure and record it in SCHEDULE.  We
+   use the fact that TER has produced a set of operations that can be
+   moved relative to each other; anything not in that set we leave in
+   its original order.  */
+
+static void
+simple_schedule (gimple_seq stmts, gimple_stmt_iterator start,
+		 int n, auto_vec<gimple *> &schedule)
+{
+  gimple *last_insn = NULL;
+
+  gimple_stmt_iterator gsi = gsi_last (stmts);
+  if (!gsi_end_p (gsi)
+      && gimple_code (gsi_stmt (gsi)) != GIMPLE_ASSIGN
+      && gimple_code (gsi_stmt (gsi)) != GIMPLE_DEBUG)
+    {
+      last_insn = gsi_stmt (gsi);
+      n--;
+    }
+
+  auto_vec<gimple_sched_state *> ready_list;
+
+  gimple_sched_state *last_unreordered = NULL;
+
+  /* Create dependencies.  */
+  int i = 0;
+  for (gsi = start; !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      unsigned cost;
+      gimple *stmt = gsi_stmt (gsi);
+
+      if (stmt == last_insn)
+	break;
+
+      gimple_sched_state *ss = sched_map + i;
+      /* We consider statements for scheduling when TER has marked them as
+	 replaceable.  */
+      bool can_reorder = decide_schedule_stmt (stmt);
+      if (!can_reorder)
+	{
+	  if (last_unreordered)
+	    add_dep (ss, last_unreordered);
+	  last_unreordered = ss;
+	}
+      else if (last_unreordered != NULL
+	       && (gimple_vuse (stmt)
+		   || is_gimple_debug (stmt)
+		   || floating_point_op_p (stmt)))
+	add_dep (ss, last_unreordered);
+
+      cost = estimate_num_insns (stmt, &eni_size_weights);
+      ss->cost = cost;
+      ss->stmt = stmt;
+
+      ssa_op_iter iter;
+      use_operand_p use_p;
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+	{
+	  tree use = USE_FROM_PTR (use_p);
+	  gimple *def_stmt;
+	  if (TREE_CODE (use) != SSA_NAME)
+	    continue;
+	  if (SA.values && bitmap_bit_p (SA.values, SSA_NAME_VERSION (use)))
+	    ss->n_dying++;
+
+	  def_stmt = SSA_NAME_DEF_STMT (use);
+	  if (!def_stmt
+	      || gimple_code (def_stmt) == GIMPLE_PHI
+	      || gimple_bb (def_stmt) != gimple_bb (stmt))
+	    continue;
+	  unsigned uid = gimple_uid (def_stmt);
+
+	  gimple_sched_state *def_ss = sched_map + uid;
+	  add_dep (ss, def_ss);
+	}
+      if (gimple_code (stmt) == GIMPLE_ASSIGN)
+	{
+	  tree lhs = gimple_assign_lhs (stmt);
+	  if (TREE_CODE (lhs) == SSA_NAME
+	      && SA.values && bitmap_bit_p (SA.values, SSA_NAME_VERSION (lhs)))
+	    ss->n_dying--;
+	}
+      i++;
+    }
+  gcc_assert (i == n);
+
+  set_prios (n);
+
+  for (int i = 0; i < n; i++)
+    {
+      gimple_sched_state *ss = sched_map + i;
+      if (ss->n_unresolved == 0)
+	ready_list.safe_push (ss);
+    }
+
+  int n_scheduled = 0;
+  while (n_scheduled < n)
+    {
+      gcc_assert (!ready_list.is_empty ());
+      ready_list.qsort (rank_ready_insns);
+      gimple_sched_state *next = ready_list.pop ();
+      if (dump_file)
+	fprintf (dump_file, "selecting insn with prio %d\n", next->pri);
+
+      resolve_deps (&ready_list, next);
+      schedule.safe_push (next->stmt);
+      n_scheduled++;
+    }
+  if (last_insn)
+    schedule.safe_push (last_insn);
+}
+
 /* Performs swapping operands of commutative operations to expand
    the expensive one first.  */
 
@@ -5416,6 +5755,239 @@ reorder_operands (basic_block bb)
   XDELETE (lattice);
 }
 
+/* Subroutine of expand_gimple_basic_block; expands one of the block's
+   statements.  The block is passed in PBB and could be modified by
+   this function if a new one needs to be started.  STMT is the
+   statement to be expanded.  A pointer to the last RTL insn generated
+   is returned through PLAST.  Tailcalls are disabled iff
+   DISABLE_TAIL_CALLS is true.  Return true if we should stop
+   expanding.  */
+
+static bool
+expand_one_gimple_stmt (basic_block *pbb, gimple *stmt,
+			rtx_insn **plast, bool disable_tail_calls)
+{
+  basic_block bb = *pbb;
+
+  /* If this statement is a non-debug one, and we generate debug
+     insns, then this one might be the last real use of a TERed
+     SSA_NAME, but where there are still some debug uses further
+     down.  Expanding the current SSA name in such further debug
+     uses by their RHS might lead to wrong debug info, as coalescing
+     might make the operands of such RHS be placed into the same
+     pseudo as something else.  Like so:
+     a_1 = a_0 + 1;   // Assume a_1 is TERed and a_0 is dead
+     use(a_1);
+     a_2 = ...
+     #DEBUG ... => a_1
+     As a_0 and a_2 don't overlap in lifetime, assume they are coalesced.
+     If we now would expand a_1 by it's RHS (a_0 + 1) in the debug use,
+     the write to a_2 would actually have clobbered the place which
+     formerly held a_0.
+
+     So, instead of that, we recognize the situation, and generate
+     debug temporaries at the last real use of TERed SSA names:
+     a_1 = a_0 + 1;
+     #DEBUG #D1 => a_1
+     use(a_1);
+     a_2 = ...
+     #DEBUG ... => #D1
+  */
+  if (MAY_HAVE_DEBUG_INSNS
+      && SA.values
+      && !is_gimple_debug (stmt))
+    {
+      ssa_op_iter iter;
+      tree op;
+      gimple *def;
+
+      location_t sloc = curr_insn_location ();
+
+      /* Look for SSA names that have their last use here (TERed
+	 names always have only one real use).  */
+      FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
+	if ((def = get_gimple_for_ssa_name (op)))
+	  {
+	    imm_use_iterator imm_iter;
+	    use_operand_p use_p;
+	    bool have_debug_uses = false;
+
+	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
+	      {
+		if (gimple_debug_bind_p (USE_STMT (use_p)))
+		  {
+		    have_debug_uses = true;
+		    break;
+		  }
+	      }
+
+	    if (have_debug_uses)
+	      {
+		/* OP is a TERed SSA name, with DEF its defining
+		   statement, and where OP is used in further debug
+		   instructions.  Generate a debug temporary, and
+		   replace all uses of OP in debug insns with that
+		   temporary.  */
+		gimple *debugstmt;
+		tree value = gimple_assign_rhs_to_tree (def);
+		tree vexpr = make_node (DEBUG_EXPR_DECL);
+		rtx val;
+		machine_mode mode;
+
+		set_curr_insn_location (gimple_location (def));
+
+		DECL_ARTIFICIAL (vexpr) = 1;
+		TREE_TYPE (vexpr) = TREE_TYPE (value);
+		if (DECL_P (value))
+		  mode = DECL_MODE (value);
+		else
+		  mode = TYPE_MODE (TREE_TYPE (value));
+		SET_DECL_MODE (vexpr, mode);
+
+		val = gen_rtx_VAR_LOCATION
+		  (mode, vexpr, (rtx)value, VAR_INIT_STATUS_INITIALIZED);
+
+		emit_debug_insn (val);
+
+		FOR_EACH_IMM_USE_STMT (debugstmt, imm_iter, op)
+		  {
+		    if (!gimple_debug_bind_p (debugstmt))
+		      continue;
+
+		    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+		      SET_USE (use_p, vexpr);
+
+		    update_stmt (debugstmt);
+		  }
+	      }
+	  }
+      set_curr_insn_location (sloc);
+    }
+
+  currently_expanding_gimple_stmt = stmt;
+
+  /* Expand this statement, then evaluate the resulting RTL and
+     fixup the CFG accordingly.  */
+  if (gimple_code (stmt) == GIMPLE_COND)
+    {
+      basic_block new_bb = expand_gimple_cond (bb, as_a <gcond *> (stmt));
+      if (new_bb)
+	{
+	  *pbb = new_bb;
+	  return true;
+	}
+    }
+  else if (gimple_debug_bind_p (stmt))
+    {
+      tree var = gimple_debug_bind_get_var (stmt);
+      tree value;
+      rtx val;
+      machine_mode mode;
+
+      if (TREE_CODE (var) != DEBUG_EXPR_DECL
+	  && TREE_CODE (var) != LABEL_DECL
+	  && !target_for_debug_bind (var))
+	goto delink_debug_stmt;
+
+      if (gimple_debug_bind_has_value_p (stmt))
+	value = gimple_debug_bind_get_value (stmt);
+      else
+	value = NULL_TREE;
+
+      *plast = get_last_insn ();
+
+      set_curr_insn_location (gimple_location (stmt));
+
+      if (DECL_P (var))
+	mode = DECL_MODE (var);
+      else
+	mode = TYPE_MODE (TREE_TYPE (var));
+
+      val = gen_rtx_VAR_LOCATION
+	(mode, var, (rtx)value, VAR_INIT_STATUS_INITIALIZED);
+
+      emit_debug_insn (val);
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  /* We can't dump the insn with a TREE where an RTX
+	     is expected.  */
+	  PAT_VAR_LOCATION_LOC (val) = const0_rtx;
+	  maybe_dump_rtl_for_gimple_stmt (stmt, *plast);
+	  PAT_VAR_LOCATION_LOC (val) = (rtx)value;
+	}
+
+    delink_debug_stmt:
+      /* In order not to generate too many debug temporaries,
+	 we delink all uses of debug statements we already expanded.
+	 Therefore debug statements between definition and real
+	 use of TERed SSA names will continue to use the SSA name,
+	 and not be replaced with debug temps.  */
+      delink_stmt_imm_use (stmt);
+    }
+  else if (gimple_debug_source_bind_p (stmt))
+    {
+      location_t sloc = curr_insn_location ();
+      tree var = gimple_debug_source_bind_get_var (stmt);
+      tree value = gimple_debug_source_bind_get_value (stmt);
+      rtx val;
+      machine_mode mode;
+
+      *plast = get_last_insn ();
+
+      set_curr_insn_location (gimple_location (stmt));
+
+      mode = DECL_MODE (var);
+
+      val = gen_rtx_VAR_LOCATION (mode, var, (rtx)value,
+				  VAR_INIT_STATUS_UNINITIALIZED);
+
+      emit_debug_insn (val);
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  /* We can't dump the insn with a TREE where an RTX
+	     is expected.  */
+	  PAT_VAR_LOCATION_LOC (val) = const0_rtx;
+	  maybe_dump_rtl_for_gimple_stmt (stmt, *plast);
+	  PAT_VAR_LOCATION_LOC (val) = (rtx)value;
+	}
+
+      set_curr_insn_location (sloc);
+    }
+  else
+    {
+      gcall *call_stmt = dyn_cast <gcall *> (stmt);
+      if (call_stmt
+	  && gimple_call_tail_p (call_stmt)
+	  && disable_tail_calls)
+	gimple_call_set_tail (call_stmt, false);
+
+      if (call_stmt && gimple_call_tail_p (call_stmt))
+	{
+	  bool can_fallthru;
+	  basic_block new_bb;
+	  new_bb = expand_gimple_tailcall (bb, call_stmt, &can_fallthru);
+	  if (new_bb)
+	    {
+	      if (can_fallthru)
+		*pbb = new_bb;
+	      else
+		return true;
+	    }
+	}
+      else
+	{
+	  if (should_forward_stmt_p (stmt))
+	    return false;
+
+	  *plast = expand_gimple_stmt (stmt);
+	  maybe_dump_rtl_for_gimple_stmt (stmt, *plast);
+	}
+    }
+  return false;
+}
+
 /* Expand basic block BB from GIMPLE trees to RTL.  */
 
 static basic_block
@@ -5502,249 +6074,62 @@ expand_gimple_basic_block (basic_block bb, bool disable_tail_calls)
 
   NOTE_BASIC_BLOCK (note) = bb;
 
-  for (; !gsi_end_p (gsi); gsi_next (&gsi))
-    {
-      basic_block new_bb;
-
-      stmt = gsi_stmt (gsi);
-
-      /* If this statement is a non-debug one, and we generate debug
-	 insns, then this one might be the last real use of a TERed
-	 SSA_NAME, but where there are still some debug uses further
-	 down.  Expanding the current SSA name in such further debug
-	 uses by their RHS might lead to wrong debug info, as coalescing
-	 might make the operands of such RHS be placed into the same
-	 pseudo as something else.  Like so:
-	   a_1 = a_0 + 1;   // Assume a_1 is TERed and a_0 is dead
-	   use(a_1);
-	   a_2 = ...
-           #DEBUG ... => a_1
-	 As a_0 and a_2 don't overlap in lifetime, assume they are coalesced.
-	 If we now would expand a_1 by it's RHS (a_0 + 1) in the debug use,
-	 the write to a_2 would actually have clobbered the place which
-	 formerly held a_0.
-
-	 So, instead of that, we recognize the situation, and generate
-	 debug temporaries at the last real use of TERed SSA names:
-	   a_1 = a_0 + 1;
-           #DEBUG #D1 => a_1
-	   use(a_1);
-	   a_2 = ...
-           #DEBUG ... => #D1
-	 */
-      if (MAY_HAVE_DEBUG_INSNS
-	  && SA.values
-	  && !is_gimple_debug (stmt))
-	{
-	  ssa_op_iter iter;
-	  tree op;
-	  gimple *def;
-
-	  location_t sloc = curr_insn_location ();
-
-	  /* Look for SSA names that have their last use here (TERed
-	     names always have only one real use).  */
-	  FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
-	    if ((def = get_gimple_for_ssa_name (op)))
-	      {
-		imm_use_iterator imm_iter;
-		use_operand_p use_p;
-		bool have_debug_uses = false;
-
-		FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
-		  {
-		    if (gimple_debug_bind_p (USE_STMT (use_p)))
-		      {
-			have_debug_uses = true;
-			break;
-		      }
-		  }
-
-		if (have_debug_uses)
-		  {
-		    /* OP is a TERed SSA name, with DEF its defining
-		       statement, and where OP is used in further debug
-		       instructions.  Generate a debug temporary, and
-		       replace all uses of OP in debug insns with that
-		       temporary.  */
-		    gimple *debugstmt;
-		    tree value = gimple_assign_rhs_to_tree (def);
-		    tree vexpr = make_node (DEBUG_EXPR_DECL);
-		    rtx val;
-		    machine_mode mode;
-
-		    set_curr_insn_location (gimple_location (def));
-
-		    DECL_ARTIFICIAL (vexpr) = 1;
-		    TREE_TYPE (vexpr) = TREE_TYPE (value);
-		    if (DECL_P (value))
-		      mode = DECL_MODE (value);
-		    else
-		      mode = TYPE_MODE (TREE_TYPE (value));
-		    SET_DECL_MODE (vexpr, mode);
-
-		    val = gen_rtx_VAR_LOCATION
-			(mode, vexpr, (rtx)value, VAR_INIT_STATUS_INITIALIZED);
-
-		    emit_debug_insn (val);
-
-		    FOR_EACH_IMM_USE_STMT (debugstmt, imm_iter, op)
-		      {
-			if (!gimple_debug_bind_p (debugstmt))
-			  continue;
-
-			FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
-			  SET_USE (use_p, vexpr);
-
-			update_stmt (debugstmt);
-		      }
-		  }
-	      }
-	  set_curr_insn_location (sloc);
-	}
+  int n = set_bb_uids (gsi);
+  bool expanding_debug_bind_p = false;
+  location_t saved_loc;
 
-      currently_expanding_gimple_stmt = stmt;
+  auto_vec<gimple *> schedule;
+  if (n > 0 && flag_schedule_ter)
+    {
+      sched_map = XCNEWVEC (gimple_sched_state, n);
+      simple_schedule (stmts, gsi, n, schedule);
 
-      /* Expand this statement, then evaluate the resulting RTL and
-	 fixup the CFG accordingly.  */
-      if (gimple_code (stmt) == GIMPLE_COND)
-	{
-	  new_bb = expand_gimple_cond (bb, as_a <gcond *> (stmt));
-	  if (new_bb)
-	    return new_bb;
-	}
-      else if (gimple_debug_bind_p (stmt))
+      int i;
+      FOR_EACH_VEC_ELT (schedule, i, stmt)
 	{
-	  location_t sloc = curr_insn_location ();
-	  gimple_stmt_iterator nsi = gsi;
-
-	  for (;;)
+	  if (gimple_debug_bind_p (stmt))
 	    {
-	      tree var = gimple_debug_bind_get_var (stmt);
-	      tree value;
-	      rtx val;
-	      machine_mode mode;
-
-	      if (TREE_CODE (var) != DEBUG_EXPR_DECL
-		  && TREE_CODE (var) != LABEL_DECL
-		  && !target_for_debug_bind (var))
-		goto delink_debug_stmt;
-
-	      if (gimple_debug_bind_has_value_p (stmt))
-		value = gimple_debug_bind_get_value (stmt);
-	      else
-		value = NULL_TREE;
-
-	      last = get_last_insn ();
-
-	      set_curr_insn_location (gimple_location (stmt));
-
-	      if (DECL_P (var))
-		mode = DECL_MODE (var);
-	      else
-		mode = TYPE_MODE (TREE_TYPE (var));
+	      if (!expanding_debug_bind_p)
+		saved_loc = curr_insn_location ();
 
-	      val = gen_rtx_VAR_LOCATION
-		(mode, var, (rtx)value, VAR_INIT_STATUS_INITIALIZED);
-
-	      emit_debug_insn (val);
-
-	      if (dump_file && (dump_flags & TDF_DETAILS))
-		{
-		  /* We can't dump the insn with a TREE where an RTX
-		     is expected.  */
-		  PAT_VAR_LOCATION_LOC (val) = const0_rtx;
-		  maybe_dump_rtl_for_gimple_stmt (stmt, last);
-		  PAT_VAR_LOCATION_LOC (val) = (rtx)value;
-		}
-
-	    delink_debug_stmt:
-	      /* In order not to generate too many debug temporaries,
-	         we delink all uses of debug statements we already expanded.
-		 Therefore debug statements between definition and real
-		 use of TERed SSA names will continue to use the SSA name,
-		 and not be replaced with debug temps.  */
-	      delink_stmt_imm_use (stmt);
-
-	      gsi = nsi;
-	      gsi_next (&nsi);
-	      if (gsi_end_p (nsi))
-		break;
-	      stmt = gsi_stmt (nsi);
-	      if (!gimple_debug_bind_p (stmt))
-		break;
+	      expanding_debug_bind_p = true;
 	    }
-
-	  set_curr_insn_location (sloc);
-	}
-      else if (gimple_debug_source_bind_p (stmt))
-	{
-	  location_t sloc = curr_insn_location ();
-	  tree var = gimple_debug_source_bind_get_var (stmt);
-	  tree value = gimple_debug_source_bind_get_value (stmt);
-	  rtx val;
-	  machine_mode mode;
-
-	  last = get_last_insn ();
-
-	  set_curr_insn_location (gimple_location (stmt));
-
-	  mode = DECL_MODE (var);
-
-	  val = gen_rtx_VAR_LOCATION (mode, var, (rtx)value,
-				      VAR_INIT_STATUS_UNINITIALIZED);
-
-	  emit_debug_insn (val);
-
-	  if (dump_file && (dump_flags & TDF_DETAILS))
+	  else if (expanding_debug_bind_p)
 	    {
-	      /* We can't dump the insn with a TREE where an RTX
-		 is expected.  */
-	      PAT_VAR_LOCATION_LOC (val) = const0_rtx;
-	      maybe_dump_rtl_for_gimple_stmt (stmt, last);
-	      PAT_VAR_LOCATION_LOC (val) = (rtx)value;
+	      expanding_debug_bind_p = false;
+	      set_curr_insn_location (saved_loc);
 	    }
-
-	  set_curr_insn_location (sloc);
+	  if (expand_one_gimple_stmt (&bb, stmt, &last, disable_tail_calls))
+	    return bb;
 	}
-      else
-	{
-	  gcall *call_stmt = dyn_cast <gcall *> (stmt);
-	  if (call_stmt
-	      && gimple_call_tail_p (call_stmt)
-	      && disable_tail_calls)
-	    gimple_call_set_tail (call_stmt, false);
+      XDELETE (sched_map);
+      sched_map = NULL;
+    }
+  else
+    for (; !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+	stmt = gsi_stmt (gsi);
+	if (gimple_debug_bind_p (stmt))
+	  {
+	    if (!expanding_debug_bind_p)
+	      saved_loc = curr_insn_location ();
 
-	  if (call_stmt && gimple_call_tail_p (call_stmt))
-	    {
-	      bool can_fallthru;
-	      new_bb = expand_gimple_tailcall (bb, call_stmt, &can_fallthru);
-	      if (new_bb)
-		{
-		  if (can_fallthru)
-		    bb = new_bb;
-		  else
-		    return new_bb;
-		}
-	    }
-	  else
-	    {
-	      def_operand_p def_p;
-	      def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
+	    expanding_debug_bind_p = true;
+	  }
+	else if (expanding_debug_bind_p)
+	  {
+	    expanding_debug_bind_p = false;
+	    set_curr_insn_location (saved_loc);
+	  }
+	if (expand_one_gimple_stmt (&bb, stmt, &last, disable_tail_calls))
+	  return bb;
+      }
 
-	      if (def_p != NULL)
-		{
-		  /* Ignore this stmt if it is in the list of
-		     replaceable expressions.  */
-		  if (SA.values
-		      && bitmap_bit_p (SA.values,
-				       SSA_NAME_VERSION (DEF_FROM_PTR (def_p))))
-		    continue;
-		}
-	      last = expand_gimple_stmt (stmt);
-	      maybe_dump_rtl_for_gimple_stmt (stmt, last);
-	    }
-	}
+
+  if (expanding_debug_bind_p)
+    {
+      expanding_debug_bind_p = false;
+      set_curr_insn_location (saved_loc);
     }
 
   currently_expanding_gimple_stmt = NULL;
diff --git a/gcc/common.opt b/gcc/common.opt
index a5c3aea..348e793 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2565,6 +2565,11 @@ ftree-ter
 Common Report Var(flag_tree_ter) Optimization
 Replace temporary expressions in the SSA->normal pass.
 
+fschedule-ter
+Common Report Var(flag_schedule_ter) Optimization
+Replace temporary expressions in the SSA->normal pass and perform
+scheduling on them.
+
 ftree-lrs
 Common Report Var(flag_tree_live_range_split) Optimization
 Perform live range splitting during the SSA->normal pass.
diff --git a/gcc/toplev.c b/gcc/toplev.c
index f1384fc..9c6a217 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1341,6 +1341,9 @@ process_options (void)
   if (flag_value_profile_transformations)
     flag_profile_values = 1;
 
+  if (flag_schedule_ter)
+    flag_tree_ter = 1;
+
   /* Warn about options that are not supported on this machine.  */
 #ifndef INSN_SCHEDULING
   if (flag_schedule_insns || flag_schedule_insns_after_reload)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 86e17fb..88df8d2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -410,8 +410,8 @@ Objective-C and Objective-C++ Dialects}.
 -fsched-group-heuristic  -fsched-critical-path-heuristic @gol
 -fsched-spec-insn-heuristic  -fsched-rank-heuristic @gol
 -fsched-last-insn-heuristic  -fsched-dep-count-heuristic @gol
--fschedule-fusion @gol
--fschedule-insns  -fschedule-insns2  -fsection-anchors @gol
+-fschedule-fusion  -fschedule-insns  -fschedule-insns2  -fschedule-ter @gol
+-fsection-anchors @gol
 -fselective-scheduling  -fselective-scheduling2 @gol
 -fsel-sched-pipelining  -fsel-sched-pipelining-outer-loops @gol
 -fsemantic-interposition  -fshrink-wrap  -fshrink-wrap-separate @gol
@@ -8393,6 +8393,12 @@ defining expression.  This results in non-GIMPLE code, but gives the expanders
 much more complex trees to work on resulting in better RTL generation.  This is
 enabled by default at @option{-O} and higher.
 
+@item -fschedule-ter
+@opindex fschedule-ter
+Like @option{-ftree-ter}, but adds a register-pressure sensitive scheduling
+phase rather than just placing every single use definition next to its use,
+with the aim to produce better initial RTL.
+
 @item -ftree-slsr
 @opindex ftree-slsr
 Perform straight-line strength reduction on trees.  This recognizes related

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]