This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Pass info about hotness of instruction to RTL expanders


Hi,
this patch adds way to use information about hot basic blocks in instruction
expansion.  This is done by introducing variable maybe_hot_insn_p that is
set by cfgexpand according to currently expanded block. After that it is reset
to true so all other expansion done by later RTL optimizers shall be done
in optimizing for speed (that is conservative).

The patch also makes some trivial use on i386 side disabling code
expanding string functions (excpet for memcpy/memset I am going to
address by separate patch) that is about canonical example how I imagine
it being used.  It is not as easy as mechanically converting all
optimize_size tests to the maybe_hot_insn_p  (we probably can invent
optimize_insn_for_speed () macro that expands to
(maybe_hot_insn_p && !optimize_size)), since some of the code is written
with expectation that insn patterns will behave consistently, but it is
fairly straighforward in most cases.

I am not quite sure about the interface, but this one is about least painful
from ones I can think of.
I think it is not very good idea to pass the information about hotness in all
the expansion machinery, alternatively I can hide the variable in cfun->emit
that holds another global insn emitting info and/or hide the variable in
function predicate so it is slightly more abstracted and/or
make gen_* functions accept this as an argument and update all GEN_FCN
calls to pass it around so it can be less easilly misused when machine
description is entered via other interface that don't care about hotness.

Once we settle down the interface, I would like to cover insn
splitting/outputting too, that is slightly more tricky because of the few
backends killing CFG too early.  Either this can be done by flagging cold
instructions somehow before expansion or by simply adding CFG/non-CFG paths.

:ADDPATCH middle-end:
Bootstrapped/regtested i686-linux, OK?

Honza

2006-10-26  Jan Hubicka  <jh@suse.cz>
	* expr.h (maybe_hot_insn_p): Declare.
	* cfgexpand.c (maybe_hot_insn_p): New global var.
	(expand_gimple_basic_block): Use it.
	* i386.c (standard_80387_constant_p, ix86_expand_clear,
	ix86_expand_vector_move_misalign, ix86_expand_branch,
	ix86_split_long_move, ix86_expand_strlen): Optimize for size
	when expanding cold function.
Index: expr.h
===================================================================
*** expr.h	(revision 115583)
--- expr.h	(working copy)
*************** extern void emit_indirect_jump (rtx);
*** 296,301 ****
--- 296,306 ----
  /* Generate a conditional trap instruction.  */
  extern rtx gen_cond_trap (enum rtx_code, rtx, rtx, rtx);
  
+ /* Set during CFG expansion when the current basic block is being
+    hot.  */
+ 
+ extern bool maybe_hot_insn_p;
+ 
  #include "insn-config.h"
  
  #ifdef HAVE_conditional_move
Index: cfgexpand.c
===================================================================
*** cfgexpand.c	(revision 115583)
--- cfgexpand.c	(working copy)
*************** Boston, MA 02110-1301, USA.  */
*** 40,45 ****
--- 40,50 ----
  #include "debug.h"
  #include "params.h"
  
+ /* Set during CFG expansion when the current basic block is being
+    hot.  */
+ 
+ bool maybe_hot_insn_p = true;
+ 
  /* Verify that there is exactly single jump instruction since last and attach
     REG_BR_PROB note specifying probability.
     ??? We really ought to pass the probability down to RTL expanders and let it
*************** expand_gimple_basic_block (basic_block b
*** 1290,1295 ****
--- 1295,1302 ----
  	       bb->index);
      }
  
+   maybe_hot_insn_p = maybe_hot_bb_p (bb);
+ 
    init_rtl_bb_info (bb);
    bb->flags |= BB_RTL;
  
*************** expand_gimple_basic_block (basic_block b
*** 1345,1351 ****
  	{
  	  new_bb = expand_gimple_cond_expr (bb, stmt);
  	  if (new_bb)
! 	    return new_bb;
  	}
        else
  	{
--- 1352,1361 ----
  	{
  	  new_bb = expand_gimple_cond_expr (bb, stmt);
  	  if (new_bb)
! 	    {
! 	      maybe_hot_insn_p = true;
! 	      return new_bb;
! 	    }
  	}
        else
  	{
*************** expand_gimple_basic_block (basic_block b
*** 1359,1365 ****
  		  if (can_fallthru)
  		    bb = new_bb;
  		  else
! 		    return new_bb;
  		}
  	    }
  	  else
--- 1369,1378 ----
  		  if (can_fallthru)
  		    bb = new_bb;
  		  else
! 		    {
! 		      maybe_hot_insn_p = true;
! 		      return new_bb;
! 		    }
  		}
  	    }
  	  else
*************** expand_gimple_basic_block (basic_block b
*** 1384,1389 ****
--- 1397,1403 ----
  
    update_bb_for_insn (bb);
  
+   maybe_hot_insn_p = true;
    return bb;
  }
  
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 115583)
--- config/i386/i386.c	(working copy)
*************** standard_80387_constant_p (rtx x)
*** 4548,4554 ****
    /* For XFmode constants, try to find a special 80387 instruction when
       optimizing for size or on those CPUs that benefit from them.  */
    if (GET_MODE (x) == XFmode
!       && (optimize_size || x86_ext_80387_constants & TUNEMASK))
      {
        REAL_VALUE_TYPE r;
        int i;
--- 4634,4641 ----
    /* For XFmode constants, try to find a special 80387 instruction when
       optimizing for size or on those CPUs that benefit from them.  */
    if (GET_MODE (x) == XFmode
!       && (optimize_size || !maybe_hot_insn_p
! 	  || (x86_ext_80387_constants & TUNEMASK)))
      {
        REAL_VALUE_TYPE r;
        int i;
*************** ix86_expand_clear (rtx dest)
*** 8843,8849 ****
    tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
  
    /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
!   if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
      {
        rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
        tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
--- 8930,8937 ----
    tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
  
    /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
!   if (reload_completed
!       && (!TARGET_USE_MOV0 || optimize_size || !maybe_hot_insn_p))
      {
        rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
        tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
*************** ix86_expand_vector_move_misalign (enum m
*** 9015,9021 ****
    if (MEM_P (op1))
      {
        /* If we're optimizing for size, movups is the smallest.  */
!       if (optimize_size)
  	{
  	  op0 = gen_lowpart (V4SFmode, op0);
  	  op1 = gen_lowpart (V4SFmode, op1);
--- 9103,9109 ----
    if (MEM_P (op1))
      {
        /* If we're optimizing for size, movups is the smallest.  */
!       if (optimize_size || maybe_hot_insn_p)
  	{
  	  op0 = gen_lowpart (V4SFmode, op0);
  	  op1 = gen_lowpart (V4SFmode, op1);
*************** ix86_expand_vector_move_misalign (enum m
*** 9081,9087 ****
    else if (MEM_P (op0))
      {
        /* If we're optimizing for size, movups is the smallest.  */
!       if (optimize_size)
  	{
  	  op0 = gen_lowpart (V4SFmode, op0);
  	  op1 = gen_lowpart (V4SFmode, op1);
--- 9169,9175 ----
    else if (MEM_P (op0))
      {
        /* If we're optimizing for size, movups is the smallest.  */
!       if (optimize_size || maybe_hot_insn_p)
  	{
  	  op0 = gen_lowpart (V4SFmode, op0);
  	  op1 = gen_lowpart (V4SFmode, op1);
*************** ix86_expand_branch (enum rtx_code code, 
*** 10406,10412 ****
  	   optimizing for size.  */
  
  	if ((code == EQ || code == NE)
! 	    && (!optimize_size
  	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
  	  {
  	    rtx xor0, xor1;
--- 10494,10500 ----
  	   optimizing for size.  */
  
  	if ((code == EQ || code == NE)
! 	    && ((!optimize_size && maybe_hot_insn_p)
  	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
  	  {
  	    rtx xor0, xor1;
*************** ix86_expand_ashl_const (rtx operand, int
*** 12148,12154 ****
  		  ? gen_addsi3
  		  : gen_adddi3) (operand, operand, operand));
      }
!   else if (!optimize_size
  	   && count * ix86_cost->add <= ix86_cost->shift_const)
      {
        int i;
--- 12236,12242 ----
  		  ? gen_addsi3
  		  : gen_adddi3) (operand, operand, operand));
      }
!   else if (!optimize_size && maybe_hot_insn_p
  	   && count * ix86_cost->add <= ix86_cost->shift_const)
      {
        int i;
*************** ix86_expand_strlen (rtx out, rtx src, rt
*** 13063,13069 ****
  
    if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
        && !TARGET_INLINE_ALL_STRINGOPS
!       && !optimize_size
        && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
      return 0;
  
--- 13824,13830 ----
  
    if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
        && !TARGET_INLINE_ALL_STRINGOPS
!       && !optimize_size && maybe_hot_insn_p
        && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
      return 0;
  
*************** ix86_expand_strlen (rtx out, rtx src, rt
*** 13071,13077 ****
    scratch1 = gen_reg_rtx (Pmode);
  
    if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
!       && !optimize_size)
      {
        /* Well it seems that some optimizer does not combine a call like
           foo(strlen(bar), strlen(bar));
--- 13832,13838 ----
    scratch1 = gen_reg_rtx (Pmode);
  
    if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
!       && !optimize_size && maybe_hot_insn_p)
      {
        /* Well it seems that some optimizer does not combine a call like
           foo(strlen(bar), strlen(bar));


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]