This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

fix x86 setcc partial reg stall


> http://gcc.gnu.org/ml/gcc/2001-09/msg00217.html

The bit to fix the 

	seta	%al
	andl	$1, %eax

partial register stall is the test splitter change to look for and
simplify subregs.  But that still leaves us with

	seta	%al
	testb	$1, %al
	je	Lfoo

which is just plain stupid.  Fixing that is a bit more involved.

The issue there is that the "seta" is actually

	(ge:QI (reg:CCFPU 17) (const_int 0))

i.e. a floating point comparison, and that generically it is not
safe to assume too much about CCmode operations.  So the code in
simplify_comparison gives up before figuring out that

	(eq (ge:QI (reg:CCFPU 17) (const_int 0))
	    (const_int 0))

can be simplified to

	(unlt (reg:CCFPU 17) (const_int 0))

For that, I created two new splitters that combine can use to
merge the three instructions.  At the same time I had to correct
an oversight in combine that would prevent a split against a jump
insn from matching at all.

Which, for the test case at hand, leaves us with 

        fnstsw  %ax
        sahf
-       seta    %al
-       andl    $1, %eax
-       je      .L12
+       jbe     .L12

which is most definitely better.

Bootstrap is still underway, 


r~


	* combine.c (try_combine): Handle a SEQUENCE of one insn.

	* i386.c (test splitter): Narrow tests vs paradoxical subregs.
	(jcc splitters): Add two splitters to simplify compound compares
	that simplify_comparison can't handle.

Index: combine.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/combine.c,v
retrieving revision 1.236
diff -c -p -d -r1.236 combine.c
*** combine.c	2001/10/10 11:33:05	1.236
--- combine.c	2001/10/11 02:03:12
*************** try_combine (i3, i2, i1, new_direct_jump
*** 2132,2137 ****
--- 2132,2143 ----
  	    }
  	}
  
+       /* If we've split a jump pattern, we'll wind up with a sequence even
+ 	 with one instruction.  We can handle that below, so extract it.  */
+       if (m_split && GET_CODE (m_split) == SEQUENCE
+ 	  && XVECLEN (m_split, 0) == 1)
+ 	m_split = PATTERN (XVECEXP (m_split, 0, 0));
+ 
        if (m_split && GET_CODE (m_split) != SEQUENCE)
  	{
  	  insn_code_number = recog_for_combine (&m_split, i3, &new_i3_notes);
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.300
diff -c -p -d -r1.300 i386.md
*** i386.md	2001/10/08 22:38:45	1.300
--- i386.md	2001/10/11 02:03:13
***************
*** 8125,8131 ****
    HOST_WIDE_INT len = INTVAL (operands[1]);
    HOST_WIDE_INT pos = INTVAL (operands[2]);
    HOST_WIDE_INT mask;
!   enum machine_mode mode;
  
    mode = GET_MODE (operands[0]);
    if (GET_CODE (operands[0]) == MEM)
--- 8125,8131 ----
    HOST_WIDE_INT len = INTVAL (operands[1]);
    HOST_WIDE_INT pos = INTVAL (operands[2]);
    HOST_WIDE_INT mask;
!   enum machine_mode mode, submode;
  
    mode = GET_MODE (operands[0]);
    if (GET_CODE (operands[0]) == MEM)
***************
*** 8138,8143 ****
--- 8138,8152 ----
  	  operands[0] = adjust_address (operands[0], mode, 0);
  	}
      }
+   else if (GET_CODE (operands[0]) == SUBREG
+ 	   && (submode = GET_MODE (SUBREG_REG (operands[0])),
+ 	       GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode))
+ 	   && pos + len <= GET_MODE_BITSIZE (submode))
+     {
+       /* Narrow a paradoxical subreg to prevent partial register stalls.  */
+       mode = submode;
+       operands[0] = SUBREG_REG (operands[0]);
+     }
    else if (mode == HImode && pos + len <= 8)
      {
        /* Small HImode tests can be converted to QImode.  */
***************
*** 12744,12749 ****
--- 12753,12807 ----
  				  (const_int 124)))
  	     (const_int 0)
  	     (const_int 1)))])
+ 
+ ;; In general it is not safe to assume too much about CCmode registers,
+ ;; so simplify-rtx stops when it sees a second one.  Under certain 
+ ;; conditions this is safe on x86, so help combine not create
+ ;;
+ ;;	seta	%al
+ ;;	testb	%al, %al
+ ;;	je	Lfoo
+ 
+ (define_split 
+   [(set (pc)
+ 	(if_then_else (ne (match_operator 0 "ix86_comparison_operator"
+ 				      [(reg 17) (const_int 0)])
+ 			  (const_int 0))
+ 		      (label_ref (match_operand 1 "" ""))
+ 		      (pc)))]
+   ""
+   [(set (pc)
+ 	(if_then_else (match_dup 0)
+ 		      (label_ref (match_dup 1))
+ 		      (pc)))]
+ {
+   PUT_MODE (operands[0], VOIDmode);
+ })
+   
+ (define_split 
+   [(set (pc)
+ 	(if_then_else (eq (match_operator 0 "ix86_comparison_operator"
+ 				      [(reg 17) (const_int 0)])
+ 			  (const_int 0))
+ 		      (label_ref (match_operand 1 "" ""))
+ 		      (pc)))]
+   ""
+   [(set (pc)
+ 	(if_then_else (match_dup 0)
+ 		      (label_ref (match_dup 1))
+ 		      (pc)))]
+ {
+   rtx new_op0 = copy_rtx (operands[0]);
+   operands[0] = new_op0;
+   PUT_MODE (new_op0, VOIDmode);
+   PUT_CODE (new_op0, REVERSE_CONDITION (GET_CODE (new_op0),
+ 					GET_MODE (XEXP (new_op0, 0))));
+ 
+   /* Make sure that (a) the CCmode we have for the flags is strong
+      enough for the reversed compare or (b) we have a valid FP compare.  */
+   if (! ix86_comparison_operator (new_op0, VOIDmode))
+     FAIL;
+ })
  
  ;; Define combination compare-and-branch fp compare instructions to use
  ;; during early optimization.  Splitting the operation apart early makes


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]