Profile predicates housekeeping

Jan Hubicka hubicka@ucw.cz
Mon May 13 12:32:00 GMT 2013


Hi,
this patch makes i386.c to use correct predicates for hot/cold decisions. It also makes
mode-switching to set correct RTL profile when emitting the initialization code.

Honza

Bootstrapped/regtested x86_64-linux, comitted.

	* mode-switching.c (optimize_mode_switching): Set correct RTL profile.
	* config/i386/i386.c (ix86_compute_frame_layout,
	ix86_expand_epilogue, emit_i387_cw_initialization, ix86_expand_vector_move_misalign,
	ix86_fp_comparison_strategy, ix86_local_alignment): Fix use of size/speed predicates.
Index: mode-switching.c
===================================================================
*** mode-switching.c	(revision 198821)
--- mode-switching.c	(working copy)
*************** optimize_mode_switching (void)
*** 667,676 ****
--- 667,678 ----
  
  	      REG_SET_TO_HARD_REG_SET (live_at_edge, df_get_live_out (src_bb));
  
+ 	      rtl_profile_for_edge (eg);
  	      start_sequence ();
  	      EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
  	      mode_set = get_insns ();
  	      end_sequence ();
+ 	      default_rtl_profile ();
  
  	      /* Do not bother to insert empty sequence.  */
  	      if (mode_set == NULL_RTX)
*************** optimize_mode_switching (void)
*** 713,718 ****
--- 715,721 ----
  		{
  		  rtx mode_set;
  
+ 		  rtl_profile_for_bb (bb);
  		  start_sequence ();
  		  EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
  		  mode_set = get_insns ();
*************** optimize_mode_switching (void)
*** 727,732 ****
--- 730,737 ----
  		      else
  			emit_insn_before (mode_set, ptr->insn_ptr);
  		    }
+ 
+ 		  default_rtl_profile ();
  		}
  
  	      free (ptr);
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 198821)
--- config/i386/i386.c	(working copy)
*************** ix86_compute_frame_layout (struct ix86_f
*** 9003,9009 ****
       Recompute the value as needed.  Do not recompute when amount of registers
       didn't change as reload does multiple calls to the function and does not
       expect the decision to change within single iteration.  */
!   else if (!optimize_function_for_size_p (cfun)
             && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
      {
        int count = frame->nregs;
--- 9003,9009 ----
       Recompute the value as needed.  Do not recompute when amount of registers
       didn't change as reload does multiple calls to the function and does not
       expect the decision to change within single iteration.  */
!   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR)
             && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
      {
        int count = frame->nregs;
*************** ix86_expand_epilogue (int style)
*** 11071,11077 ****
        /* Leave results in shorter dependency chains on CPUs that are
  	 able to grok it fast.  */
        else if (TARGET_USE_LEAVE
! 	       || optimize_function_for_size_p (cfun)
  	       || !cfun->machine->use_fast_prologue_epilogue)
  	ix86_emit_leave ();
        else
--- 11071,11077 ----
        /* Leave results in shorter dependency chains on CPUs that are
  	 able to grok it fast.  */
        else if (TARGET_USE_LEAVE
! 	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR)
  	       || !cfun->machine->use_fast_prologue_epilogue)
  	ix86_emit_leave ();
        else
*************** emit_i387_cw_initialization (int mode)
*** 15668,15674 ****
    emit_move_insn (reg, copy_rtx (stored_mode));
  
    if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
!       || optimize_function_for_size_p (cfun))
      {
        switch (mode)
  	{
--- 15668,15674 ----
    emit_move_insn (reg, copy_rtx (stored_mode));
  
    if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
!       || optimize_insn_for_size_p ())
      {
        switch (mode)
  	{
*************** ix86_expand_vector_move_misalign (enum m
*** 16426,16432 ****
  	  if (TARGET_AVX
  	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
  	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
! 	      || optimize_function_for_size_p (cfun))
  	    {
  	      /* We will eventually emit movups based on insn attributes.  */
  	      emit_insn (gen_sse2_loadupd (op0, op1));
--- 16426,16432 ----
  	  if (TARGET_AVX
  	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
  	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
! 	      || optimize_insn_for_size_p ())
  	    {
  	      /* We will eventually emit movups based on insn attributes.  */
  	      emit_insn (gen_sse2_loadupd (op0, op1));
*************** ix86_expand_vector_move_misalign (enum m
*** 16463,16469 ****
  	  if (TARGET_AVX
  	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
  	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
! 	      || optimize_function_for_size_p (cfun))
  	    {
  	      op0 = gen_lowpart (V4SFmode, op0);
  	      op1 = gen_lowpart (V4SFmode, op1);
--- 16463,16469 ----
  	  if (TARGET_AVX
  	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
  	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
! 	      || optimize_insn_for_size_p ())
  	    {
  	      op0 = gen_lowpart (V4SFmode, op0);
  	      op1 = gen_lowpart (V4SFmode, op1);
*************** ix86_expand_vector_move_misalign (enum m
*** 16499,16505 ****
  	  if (TARGET_AVX
  	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
  	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
! 	      || optimize_function_for_size_p (cfun))
  	    /* We will eventually emit movups based on insn attributes.  */
  	    emit_insn (gen_sse2_storeupd (op0, op1));
  	  else
--- 16499,16505 ----
  	  if (TARGET_AVX
  	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
  	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
! 	      || optimize_insn_for_size_p ())
  	    /* We will eventually emit movups based on insn attributes.  */
  	    emit_insn (gen_sse2_storeupd (op0, op1));
  	  else
*************** ix86_expand_vector_move_misalign (enum m
*** 16518,16524 ****
  	  if (TARGET_AVX
  	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
  	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
! 	      || optimize_function_for_size_p (cfun))
  	    {
  	      op0 = gen_lowpart (V4SFmode, op0);
  	      emit_insn (gen_sse_storeups (op0, op1));
--- 16518,16524 ----
  	  if (TARGET_AVX
  	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
  	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
! 	      || optimize_insn_for_size_p ())
  	    {
  	      op0 = gen_lowpart (V4SFmode, op0);
  	      emit_insn (gen_sse_storeups (op0, op1));
*************** ix86_fp_comparison_strategy (enum rtx_co
*** 18764,18770 ****
    if (TARGET_CMOVE)
      return IX86_FPCMP_COMI;
  
!   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
      return IX86_FPCMP_SAHF;
  
    return IX86_FPCMP_ARITH;
--- 18764,18770 ----
    if (TARGET_CMOVE)
      return IX86_FPCMP_COMI;
  
!   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
      return IX86_FPCMP_SAHF;
  
    return IX86_FPCMP_ARITH;
*************** ix86_local_alignment (tree exp, enum mac
*** 25193,25199 ****
       other unit can not rely on the alignment.
  
       Exclude va_list type.  It is the common case of local array where
!      we can not benefit from the alignment.  */
    if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
        && TARGET_SSE)
      {
--- 25193,25201 ----
       other unit can not rely on the alignment.
  
       Exclude va_list type.  It is the common case of local array where
!      we can not benefit from the alignment.  
! 
!      TODO: Probably one should optimize for size only when var is not escaping.  */
    if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
        && TARGET_SSE)
      {



More information about the Gcc-patches mailing list