This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

pentium4 branch prediction hints support


Hi,
this patch adds basic support for the Pentium4 branch prediction hints feature.
The chip recognizes existing prefixes 'cs' and 'ds' as 'not taken' versus
'taken' hints.  These may be used to overwrite default static prediction
heruistics predicting forward branches as not taken and backward as taken.

Patch takes care to avoid emmiting unneeded hints.

Currently the code seems to bring notable hits with profile feedback and be
hit/miss without suggesting that our branch prediction algorithm is broken.  In
fact I have patches for this sitting in queue, so I will update them and send
shortly.

Honza

So čen  2 18:38:07 CEST 2001  Jan Hubicka  <jh@suse.cz>

	* i386.c (x86_branch_hints): New global variable
	(print_operand): Support outputting of branch prediction hints.
	* i386.md (conditional jump patterns): Add branch prediction hints
	to the template.
	* i386.h (x86_branch_hints): Declare
	(TARGET_BRANCH_PREDICTION_HINTS): New macro.
	(PRINT_OPERAND_FUNCT_VALID_P): New.
	* flow.c (final_forward_branch_p): New function.

*** i386.c.old	Sat Jun  2 16:49:38 2001
--- i386.c	Sat Jun  2 18:35:54 2001
*************** const int x86_use_bit_test = m_386;
*** 290,295 ****
--- 290,296 ----
  const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
  const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
  const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
+ const int x86_branch_hints = m_PENT4;
  const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
  const int x86_partial_reg_stall = m_PPRO;
  const int x86_use_loop = m_K6;
*************** print_operand (file, x, code)
*** 4069,4075 ****
--- 4067,4105 ----
  	case 'f':
  	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
  	  return;
+ 	case '+':
+ 	  {
+ 	    rtx x;
  
+ 	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
+ 	      return;
+ 	    
+ 	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+ 	    if (x)
+ 	      {
+ 		int pred_val = INTVAL (XEXP (x, 0));
+ 
+ 		if (pred_val < REG_BR_PROB_BASE * 45 / 100
+ 		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
+ 		  {
+ 		    int taken = pred_val > REG_BR_PROB_BASE / 2;
+ 		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
+ 
+ 		    /* Emit hints only in the case default branch prediction
+ 		       heruistics would fail.  */
+ 		    if (taken != cputaken)
+ 		      {
+ 			/* We use 3e (DS) prefix for taken branches and
+ 			   2e (CS) prefix for not taken branches.  */
+ 			if (taken)
+ 			  fputs (ASSEMBLER_DIALECT ?  "ds: " : "ds ; ", file);
+ 			else
+ 			  fputs (ASSEMBLER_DIALECT ?  "cs: " : "cs ; ", file);
+ 		      }
+ 		  }
+ 	      }
+ 	    return;
+ 	  }
  	default:
  	  {
  	    char str[50];
*** i386.md.old	Sat Jun  2 16:49:24 2001
--- i386.md	Sat Jun  2 16:52:32 2001
***************
*** 12462,12468 ****
  		      (label_ref (match_operand 0 "" ""))
  		      (pc)))]
    ""
!   "j%C1\\t%l0"
    [(set_attr "type" "ibr")
     (set (attr "prefix_0f")
  	   (if_then_else (and (ge (minus (match_dup 0) (pc))
--- 12462,12468 ----
  		      (label_ref (match_operand 0 "" ""))
  		      (pc)))]
    ""
!   "%+j%C1\\t%l0"
    [(set_attr "type" "ibr")
     (set (attr "prefix_0f")
  	   (if_then_else (and (ge (minus (match_dup 0) (pc))
***************
*** 12479,12485 ****
  		      (pc)
  		      (label_ref (match_operand 0 "" ""))))]
    ""
!   "j%c1\\t%l0"
    [(set_attr "type" "ibr")
     (set (attr "prefix_0f")
  	   (if_then_else (and (ge (minus (match_dup 0) (pc))
--- 12479,12485 ----
  		      (pc)
  		      (label_ref (match_operand 0 "" ""))))]
    ""
!   "%+j%c1\\t%l0"
    [(set_attr "type" "ibr")
     (set (attr "prefix_0f")
  	   (if_then_else (and (ge (minus (match_dup 0) (pc))
***************
*** 12822,12830 ****
    if (which_alternative != 0)
      return \"#\";
    if (get_attr_length (insn) == 2)
!     return \"loop\\t%l0\";
    else
!     return \"dec{l}\\t%1\;jne\\t%l0\";
  }"
    [(set_attr "ppro_uops" "many")
     (set (attr "type")
--- 12822,12830 ----
    if (which_alternative != 0)
      return \"#\";
    if (get_attr_length (insn) == 2)
!     return \"%+loop\\t%l0\";
    else
!     return \"dec{l}\\t%1\;%+jne\\t%l0\";
  }"
    [(set_attr "ppro_uops" "many")
     (set (attr "type")
*** ../../final.c.old	Sat Jun  2 16:38:22 2001
--- ../../final.c	Sat Jun  2 16:42:52 2001
*************** leaf_function_p ()
*** 4112,4117 ****
--- 4112,4136 ----
    return 1;
  }
  
+ /* Return 1 if branch is an forward branch.
+    Uses insn_shuid array, so it works only in the final pass.  May be used by
+    output templates to customary add branch prediction hints.
+  */
+ int
+ final_forward_branch_p (insn)
+      rtx insn;
+ {
+   int insn_id, label_id;
+   if (!uid_shuid)
+     abort ();
+   insn_id = INSN_SHUID (insn);
+   label_id = INSN_SHUID (JUMP_LABEL (insn));
+   /* We've hit some insns that does not have id information available.  */
+   if (!insn_id || !label_id)
+     abort ();
+   return insn_id < label_id;
+ }
+ 
  /* On some machines, a function with no call insns
     can run faster if it doesn't create its own register window.
     When output, the leaf function should use only the "output"
*** i386.h.old	Sat Jun  2 16:56:19 2001
--- i386.h	Sat Jun  2 18:36:03 2001
*************** extern int target_flags;
*** 194,200 ****
  #define CPUMASK (1 << ix86_cpu)
  extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
  extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
! extern const int x86_unroll_strlen;
  extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx;
  extern const int x86_use_loop, x86_use_fiop, x86_use_mov0;
  extern const int x86_use_cltd, x86_read_modify_write;
--- 194,200 ----
  #define CPUMASK (1 << ix86_cpu)
  extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
  extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
! extern const int x86_branch_hints, x86_unroll_strlen;
  extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx;
  extern const int x86_use_loop, x86_use_fiop, x86_use_mov0;
  extern const int x86_use_cltd, x86_read_modify_write;
*************** extern const int x86_partial_reg_depende
*** 214,219 ****
--- 214,220 ----
     safe to enable all CMOVE instructions.  */
  #define TARGET_CMOVE ((x86_cmove & (1 << ix86_arch)) || TARGET_SSE)
  #define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & CPUMASK)
+ #define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & CPUMASK)
  #define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & CPUMASK)
  #define TARGET_USE_SAHF ((x86_use_sahf & CPUMASK) && !TARGET_64BIT)
  #define TARGET_MOVX (x86_movx & CPUMASK)
*************** do { long l;						\
*** 2912,2918 ****
     print_operand function.  */
  
  #define PRINT_OPERAND_PUNCT_VALID_P(CODE)				\
!   ((CODE) == '*')
  
  /* Print the name of a register based on its machine mode and number.
     If CODE is 'w', pretend the mode is HImode.
--- 2912,2918 ----
     print_operand function.  */
  
  #define PRINT_OPERAND_PUNCT_VALID_P(CODE)				\
!   ((CODE) == '*' || (CODE) == '+')
  
  /* Print the name of a register based on its machine mode and number.
     If CODE is 'w', pretend the mode is HImode.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]