This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Committed] reg-stack.c reorganization (part 3)


This is the next installment of my reorganization/clean-up of
reg-stack.c.  My apologies that yet again there should be no
functional change with this patch, but I assure you that I intend
to implement two additional classes of optimization, now that
reg-stack has been restructured to support them.

Now that reg-stack is a two phase algorithm, with the definition
of the stack permutations in the first phase independent of the
insertion of compensation code in the second phase, there is
room to improve/specialize each bit (or more precisely remove
some inefficiency).

The first observation is that once split, the first phase is
effectively basic block oriented (where each BB needs to be
visited to process its stack from beginning to end), and the
second phase is edge oriented (where every edge in the BB
needs to visited and compensation code inserted if necessary).
With that in mind, the original traversal implementation that we
still use for reaching every basic block, still attempts to
traverse unnecessary edges such as "DFS back edges" and forward
edges that are known to have reached their destination by an
alternate route.  Given that propagate_stack is a no-op if the
edge's destination has already been visited, we can avoid these
loops/traversals.

One benefit is that by doing so, propagate_edge is now only ever
called when the destination hasn't previously been visited,
simplifying its semantics (it now always propagates a register stack
across an edge).  Likewise, for each basic block we visit, we now
only have to select/choose a best incoming edge to propagate across
when the stack at the the start of this block hasn't been defined.

Two further changes hint at things to come.  Given that selection
of best incoming edge is conditionalized by this change, I took the
opportunity to split the frequency/criticality comparison code into
its own subroutine better_edge.  Secondly, (and the only slight of
hand in this patch), is that when selecting the best edge we'd
previously restrict our search/candidates to non-back-edges.
In the  change below we now consider any edge from a previously
visited (i.e. laid out) block.  At the moment the current traversal
algorithm ensures the semantics of these two tests are identical as
we're guaranteed to visit all predecessors except those from backedges
(which are guaranteed to be unvisited) before processing a block.
At some point in the future :), we may potentially visit blocks at
the end of loops before their latches or decide to process a block
before all its predecessors have been visited.


The following patch has been tested on i686-pc-linux-gnu with a
full "make bootstrap", all default languages, and regression tested
with a top-level "make -k check" with no new failures.

Committed to mainline CVS.  There should be no change in generated
code but some minor compile-time improvements.



2005-05-29  Roger Sayle  <roger@eyesopen.com>

	* reg-stack.c (propagate_stack): Always copy the source stack to
	the destination.  This routine is now only called when this is safe.
	(better_edge): New function split out from convert_regs_1 to
	determine which of two edges is better to propagate across.
	(convert_regs_1):  We need only search for a best edge if the
	stack layout hasn't been defined yet.  Use better_edge to help
	find beste.  No longer traverse unnecessary edges.


Index: reg-stack.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.180
diff -c -3 -p -r1.180 reg-stack.c
*** reg-stack.c	27 May 2005 22:57:39 -0000	1.180
--- reg-stack.c	29 May 2005 05:02:05 -0000
*************** convert_regs_exit (void)
*** 2578,2605 ****
      }
  }

! /* If the stack of the target block hasn't been processed yet,
!    copy the stack info from the source block.  */

  static void
  propagate_stack (edge e)
  {
!   basic_block dest = e->dest;
!   stack dest_stack = &BLOCK_INFO (dest)->stack_in;
!
!   if (dest_stack->top == -2)
!     {
!       basic_block src = e->src;
!       stack src_stack = &BLOCK_INFO (src)->stack_out;
!       int reg;

!       /* Preserve the order of the original stack, but check whether
! 	 any pops are needed.  */
!       dest_stack->top = -1;
!       for (reg = 0; reg <= src_stack->top; ++reg)
! 	if (TEST_HARD_REG_BIT (dest_stack->reg_set, src_stack->reg[reg]))
! 	  dest_stack->reg[++dest_stack->top] = src_stack->reg[reg];
!     }
  }


--- 2578,2599 ----
      }
  }

! /* Copy the stack info from the end of edge E's source block to the
!    start of E's destination block.  */

  static void
  propagate_stack (edge e)
  {
!   stack src_stack = &BLOCK_INFO (e->src)->stack_out;
!   stack dest_stack = &BLOCK_INFO (e->dest)->stack_in;
!   int reg;

!   /* Preserve the order of the original stack, but check whether
!      any pops are needed.  */
!   dest_stack->top = -1;
!   for (reg = 0; reg <= src_stack->top; ++reg)
!     if (TEST_HARD_REG_BIT (dest_stack->reg_set, src_stack->reg[reg]))
!       dest_stack->reg[++dest_stack->top] = src_stack->reg[reg];
  }


*************** compensate_edges (FILE *file)
*** 2738,2743 ****
--- 2732,2768 ----
    return inserted;
  }

+ /* Select the better of two edges E1 and E2 to use to determine the
+    stack layout for their shared destination basic block.  This is
+    typically the more frequently executed.  The edge E1 may be NULL
+    (in which case E2 is returned), but E2 is always non-NULL.  */
+
+ static edge
+ better_edge (edge e1, edge e2)
+ {
+   if (!e1)
+     return e2;
+
+   if (EDGE_FREQUENCY (e1) > EDGE_FREQUENCY (e2))
+     return e1;
+   if (EDGE_FREQUENCY (e1) < EDGE_FREQUENCY (e2))
+     return e2;
+
+   if (e1->count > e2->count)
+     return e1;
+   if (e1->count < e2->count)
+     return e2;
+
+   /* Prefer critical edges to minimize inserting compensation code on
+      critical edges.  */
+
+   if (EDGE_CRITICAL_P (e1) != EDGE_CRITICAL_P (e2))
+     return EDGE_CRITICAL_P (e1) ? e1 : e2;
+
+   /* Avoid non-deterministic behaviour.  */
+   return (e1->src->index < e2->src->index) ? e1 : e2;
+ }
+
  /* Convert stack register references in one block.  */

  static void
*************** convert_regs_1 (FILE *file, basic_block
*** 2747,2806 ****
    block_info bi = BLOCK_INFO (block);
    int reg;
    rtx insn, next;
-   edge e, beste = NULL;
    bool control_flow_insn_deleted = false;
-   edge_iterator ei;

    any_malformed_asm = false;

!   /* Find the edge we will copy stack from.  It should be the most frequent
!      one as it will get cheapest after compensation code is generated,
!      if multiple such exists, take one with largest count, prefer critical
!      one (as splitting critical edges is more expensive), or one with lowest
!      index, to avoid random changes with different orders of the edges.  */
!   FOR_EACH_EDGE (e, ei, block->preds)
!     {
!       if (e->flags & EDGE_DFS_BACK)
! 	;
!       else if (! beste)
! 	beste = e;
!       else if (EDGE_FREQUENCY (beste) < EDGE_FREQUENCY (e))
! 	beste = e;
!       else if (EDGE_FREQUENCY (beste) > EDGE_FREQUENCY (e))
! 	;
!       else if (beste->count < e->count)
! 	beste = e;
!       else if (beste->count > e->count)
! 	;
!       else if ((EDGE_CRITICAL_P (e) != 0)
! 	       != (EDGE_CRITICAL_P (beste) != 0))
! 	{
! 	  if (EDGE_CRITICAL_P (e))
! 	    beste = e;
! 	}
!       else if (e->src->index < beste->src->index)
! 	beste = e;
!     }
!
!   /* Initialize stack at block entry.  */
    if (bi->stack_in.top == -2)
      {
        if (beste)
  	propagate_stack (beste);
        else
  	{
  	  /* No predecessors.  Create an arbitrary input stack.  */
- 	  int reg;
-
  	  bi->stack_in.top = -1;
  	  for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg)
  	    if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg))
  	      bi->stack_in.reg[++bi->stack_in.top] = reg;
  	}
      }
-   else
-     /* Entry blocks do have stack already initialized.  */
-     beste = NULL;

    current_block = block;

--- 2772,2804 ----
    block_info bi = BLOCK_INFO (block);
    int reg;
    rtx insn, next;
    bool control_flow_insn_deleted = false;

    any_malformed_asm = false;

!   /* Choose an initial stack layout, if one hasn't already been chosen.  */
    if (bi->stack_in.top == -2)
      {
+       edge e, beste = NULL;
+       edge_iterator ei;
+
+       /* Select the best incoming edge (typically the most frequent) to
+ 	 use as a template for this basic block.  */
+       FOR_EACH_EDGE (e, ei, block->preds)
+ 	if (BLOCK_INFO (e->src)->done)
+ 	  beste = better_edge (beste, e);
+
        if (beste)
  	propagate_stack (beste);
        else
  	{
  	  /* No predecessors.  Create an arbitrary input stack.  */
  	  bi->stack_in.top = -1;
  	  for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg)
  	    if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg))
  	      bi->stack_in.reg[++bi->stack_in.top] = reg;
  	}
      }

    current_block = block;

*************** convert_regs_1 (FILE *file, basic_block
*** 2901,2928 ****
    gcc_assert (any_malformed_asm);
   win:
    bi->stack_out = regstack;
-
-   /* Compensate the back edges, as those wasn't visited yet.  */
-   FOR_EACH_EDGE (e, ei, block->succs)
-     {
-       if (e->flags & EDGE_DFS_BACK
- 	  || (e->dest == EXIT_BLOCK_PTR))
- 	{
- 	  gcc_assert (BLOCK_INFO (e->dest)->done
- 		      || e->dest == block);
- 	  propagate_stack (e);
- 	}
-     }
-
-   FOR_EACH_EDGE (e, ei, block->preds)
-     {
-       if (e != beste && !(e->flags & EDGE_DFS_BACK)
- 	  && e->src != ENTRY_BLOCK_PTR)
- 	{
- 	  gcc_assert (BLOCK_INFO (e->src)->done);
- 	  propagate_stack (e);
- 	}
-     }
  }

  /* Convert registers in all blocks reachable from BLOCK.  */
--- 2899,2904 ----


Roger
--


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]