This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Committed] Fix minor performance regression in ifcvt.c


The following patch fixes a performance/size regression on mainline
(relative to gcc 3.2 and earlier) that I came across whilst browsing
the assembly language output for SPEC's 179.art.

Given code such as

double foo(int i)
{
  if (i)
    return 0.0;
  ...
}

the IF-CASE-1 transformation in RTL if-conversion triggers, noticing
that the load of 0.0 is dead on the other branch and cheaper than a
unconditional jump [which it can eliminate by simplifying the CFG].

The problem/oversight is that x87 stack registers have a "hidden"
cost, which is the additional instruction required to pop dead
values off of the stack.  Hence for the code above we transform

	...
	jcond L1
	fldz
	jmp L2
L1:	...

into

	...
	fldz
	jncond L2
	fstp %st(0)
L1:	...


Clearly, the current code in if-cvt.c isn't taking into account this
additional instruction when assessing the cost of speculatively executing
a basic block.  This can lead to a code size and/or performance
pessimization.  Indeed compiling 179.art with just -O2 on x86 shows
that mainline currently performs this move, but that gcc 3.2, the Intel
compiler and mainline with the patch below decide that hoisting the
floating point load before the conditional isn't worth it.  I've also
confirmed that this patch improves CSiBE with -Os by a few bytes.

This change, of adding COSTS_N_INSNS(1) to the rtx_cost of setting
a stack register, doesn't prohibit us from hoisting floating point
loads, it just requires the benefits of the move to be higher.


Whilst fixing this problem, I also decided to improve the performance
of if-conversion's use of total_bb_rtx_cost.  The current idiom is that
this function scans the entire basic block adding up the rtx_cost for
each instruction before returning the total (or "infinite") value.
For the vast majority of cases, we can avoid scanning the entire basic
block by passing in the upper bound that we're interested in, allowing
this routine to return early once that threshold has been exceeded.
Typically, we're only able to speculatively execute a basic block of
one or two simple instructions, but previously this routine would
carefully attempt to "cost" large basic blocks of thousands of insns.



The following patch has been tested on i686-pc-linux-gnu with a full
"make bootstrap", all default languages, and regression tested with a
top-level "make -k check" with no new failures.

Committed to mainline CVS.



2004-12-18  Roger Sayle  <roger@eyesopen.com>

	* ifcvt.c (total_bb_rtx_cost): Rename function to cheap_bb_rtx_cost_p.
	Take a max_cost argument to avoid scanning large blocks, by returning
	a Boolean instead of a total.  Include the cost of a "fstp %st(0)"
	instruction required to pop dead values off of a register stack.


Index: ifcvt.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/ifcvt.c,v
retrieving revision 1.173
diff -c -3 -p -r1.173 ifcvt.c
*** ifcvt.c	25 Nov 2004 09:30:03 -0000	1.173
--- ifcvt.c	17 Dec 2004 22:57:05 -0000
*************** static bool life_data_ok;
*** 86,92 ****

  /* Forward references.  */
  static int count_bb_insns (basic_block);
! static int total_bb_rtx_cost (basic_block);
  static rtx first_active_insn (basic_block);
  static rtx last_active_insn (basic_block, int);
  static basic_block block_fallthru (basic_block);
--- 86,92 ----

  /* Forward references.  */
  static int count_bb_insns (basic_block);
! static bool cheap_bb_rtx_cost_p (basic_block, int);
  static rtx first_active_insn (basic_block);
  static rtx last_active_insn (basic_block, int);
  static basic_block block_fallthru (basic_block);
*************** count_bb_insns (basic_block bb)
*** 162,173 ****
    return count;
  }

! /* Count the total insn_rtx_cost of non-jump active insns in BB.
!    This function returns -1, if the cost of any instruction could
!    not be estimated.  */

! static int
! total_bb_rtx_cost (basic_block bb)
  {
    int count = 0;
    rtx insn = BB_HEAD (bb);
--- 162,173 ----
    return count;
  }

! /* Determine whether the total insn_rtx_cost on non-jump insns in
!    basic block BB is less than MAX_COST.  This function returns
!    false if the cost of any instruction could not be estimated.  */

! static bool
! cheap_bb_rtx_cost_p (basic_block bb, int max_cost)
  {
    int count = 0;
    rtx insn = BB_HEAD (bb);
*************** total_bb_rtx_cost (basic_block bb)
*** 178,195 ****
  	{
  	  int cost = insn_rtx_cost (PATTERN (insn));
  	  if (cost == 0)
! 	    return -1;
  	  count += cost;
  	}
        else if (CALL_P (insn))
! 	return -1;

        if (insn == BB_END (bb))
  	break;
        insn = NEXT_INSN (insn);
      }

!   return count;
  }

  /* Return the first non-jump active insn in the basic block.  */
--- 178,211 ----
  	{
  	  int cost = insn_rtx_cost (PATTERN (insn));
  	  if (cost == 0)
! 	    return false;
!
! 	  /* If this instruction is the load or set of a "stack" register,
! 	     such as a floating point register on x87, then the cost of
! 	     speculatively executing this instruction needs to include
! 	     the additional cost of popping this register off of the
! 	     register stack.  */
! #ifdef STACK_REGS
! 	  {
! 	    rtx set = single_set (insn);
! 	    if (set && STACK_REG_P (SET_DEST (set)))
! 	      cost += COSTS_N_INSNS (1);
! 	  }
! #endif
!
  	  count += cost;
+ 	  if (count >= max_cost)
+ 	    return false;
  	}
        else if (CALL_P (insn))
! 	return false;

        if (insn == BB_END (bb))
  	break;
        insn = NEXT_INSN (insn);
      }

!   return true;
  }

  /* Return the first non-jump active insn in the basic block.  */
*************** find_if_case_1 (basic_block test_bb, edg
*** 2853,2859 ****
  {
    basic_block then_bb = then_edge->dest;
    basic_block else_bb = else_edge->dest, new_bb;
!   int then_bb_index, bb_cost;

    /* If we are partitioning hot/cold basic blocks, we don't want to
       mess up unconditional or indirect jumps that cross between hot
--- 2869,2875 ----
  {
    basic_block then_bb = then_edge->dest;
    basic_block else_bb = else_edge->dest, new_bb;
!   int then_bb_index;

    /* If we are partitioning hot/cold basic blocks, we don't want to
       mess up unconditional or indirect jumps that cross between hot
*************** find_if_case_1 (basic_block test_bb, edg
*** 2896,2903 ****
  	     test_bb->index, then_bb->index);

    /* THEN is small.  */
!   bb_cost = total_bb_rtx_cost (then_bb);
!   if (bb_cost < 0 || bb_cost >= COSTS_N_INSNS (BRANCH_COST))
      return FALSE;

    /* Registers set are dead, or are predicable.  */
--- 2912,2918 ----
  	     test_bb->index, then_bb->index);

    /* THEN is small.  */
!   if (! cheap_bb_rtx_cost_p (then_bb, COSTS_N_INSNS (BRANCH_COST)))
      return FALSE;

    /* Registers set are dead, or are predicable.  */
*************** find_if_case_2 (basic_block test_bb, edg
*** 2944,2950 ****
    basic_block then_bb = then_edge->dest;
    basic_block else_bb = else_edge->dest;
    edge else_succ;
-   int bb_cost;
    rtx note;

    /* If we are partitioning hot/cold basic blocks, we don't want to
--- 2959,2964 ----
*************** find_if_case_2 (basic_block test_bb, edg
*** 3001,3008 ****
  	     test_bb->index, else_bb->index);

    /* ELSE is small.  */
!   bb_cost = total_bb_rtx_cost (else_bb);
!   if (bb_cost < 0 || bb_cost >= COSTS_N_INSNS (BRANCH_COST))
      return FALSE;

    /* Registers set are dead, or are predicable.  */
--- 3015,3021 ----
  	     test_bb->index, else_bb->index);

    /* ELSE is small.  */
!   if (! cheap_bb_rtx_cost_p (else_bb, COSTS_N_INSNS (BRANCH_COST)))
      return FALSE;

    /* Registers set are dead, or are predicable.  */


Roger
--
Roger Sayle,                         E-mail: roger@eyesopen.com
OpenEye Scientific Software,         WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road,     Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507.         Fax: (+1) 505-473-0833


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]