This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

i386 conditional move tweeks


Hi,
I've noticed that we generate no conditional moves for QImode operands, no
setcc based code for HImode and suboptimal sequences for TImode.  The following
testcases fails on BIB branch now:

/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -march=athlon" } */
/* { dg-final { scan-assembler "sar.*magic_namea" } } */
/* { dg-final { scan-assembler "sar.*magic_nameb" } } */
/* { dg-final { scan-assembler "sar.*magic_namec" } } */
/* { dg-final { scan-assembler "shr.*magic_named" } } */
/* { dg-final { scan-assembler "shr.*magic_namee" } } */
/* { dg-final { scan-assembler "shr.*magic_namef" } } */

/* Check code generation for several conditional moves doable by single arithmetics.  */

int magic_namea;
char magic_nameb;
short magic_namec;
int magic_named;
char magic_namee;
short magic_namef;

unsigned int gen;
m()
{
  magic_namec=magic_namec>=0?0:-1;
  magic_namea=magic_namea>=0?0:-1;
  magic_nameb=magic_nameb>=0?0:-1;
  magic_named=magic_named>=0?0:1;
  magic_namee=magic_namee>=0?0:1;
  magic_namef=magic_namef>=0?0:1;
}


/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -march=athlon" } */
/* { dg-final { scan-assembler "sbb" } } */

/* This conditional move is fastest to be done using sbb.  */
t(unsigned int a, unsigned int b)
{
  return (a<=b?5:-5);
}

/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -march=athlon" } */
/* { dg-final { scan-assembler "cmov" } } */

/* This conditional move is fastest to be done using cmov.  */
t(int a, int b)
{
  return (a<=b?5:-5);
}

This is my attempt to improve the situation somewhat.
Regtested/bootstraped BIB branch.  OK?

Fri Nov 22 01:33:23 CET 2002  Jan Hubicka  <jh@suse.cz>
	* i386.md (movhicc): Allow general operand.
	(movqicc): New expander.
	(movqicc_noc): New pattern.
	* i386.c (ix86_expand_carry_flag_compare): New function.
	(ix86_expand_int_movcc): Optimize harder using sbb; support more
	HImode conversion; support QImode conditional moves
*** i386.md.old	Thu Nov 21 23:10:32 2002
--- i386.md	Fri Nov 22 01:04:57 2002
***************
*** 16317,16325 ****
  (define_expand "movhicc"
    [(set (match_operand:HI 0 "register_operand" "")
  	(if_then_else:HI (match_operand 1 "comparison_operator" "")
! 			 (match_operand:HI 2 "nonimmediate_operand" "")
! 			 (match_operand:HI 3 "nonimmediate_operand" "")))]
!   "TARGET_CMOVE && TARGET_HIMODE_MATH"
    "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
  
  (define_insn "*movhicc_noc"
--- 16367,16375 ----
  (define_expand "movhicc"
    [(set (match_operand:HI 0 "register_operand" "")
  	(if_then_else:HI (match_operand 1 "comparison_operator" "")
! 			 (match_operand:HI 2 "general_operand" "")
! 			 (match_operand:HI 3 "general_operand" "")))]
!   "TARGET_HIMODE_MATH"
    "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
  
  (define_insn "*movhicc_noc"
***************
*** 16336,16341 ****
--- 16386,16412 ----
    [(set_attr "type" "icmov")
     (set_attr "mode" "HI")])
  
+ (define_expand "movqicc"
+   [(set (match_operand:QI 0 "register_operand" "")
+ 	(if_then_else:QI (match_operand 1 "comparison_operator" "")
+ 			 (match_operand:QI 2 "general_operand" "")
+ 			 (match_operand:QI 3 "general_operand" "")))]
+   "TARGET_QIMODE_MATH"
+   "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+ 
+ (define_insn "*movqicc_noc"
+   [(set (match_operand:QI 0 "register_operand" "=r,r")
+ 	(if_then_else:QI (match_operator 1 "ix86_comparison_operator" 
+ 				[(reg 17) (const_int 0)])
+ 		      (match_operand:QI 2 "register_operand" "r,0")
+ 		      (match_operand:QI 3 "register_operand" "0,r")))]
+   "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+   "@
+    cmov%O2%C1\t{%2, %0|%0, %2}
+    cmov%O2%c1\t{%3, %0|%0, %3}"
+   [(set_attr "type" "icmov")
+    (set_attr "mode" "HI")])
+ 
  (define_expand "movsfcc"
    [(set (match_operand:SF 0 "register_operand" "")
  	(if_then_else:SF (match_operand 1 "comparison_operator" "")
*** i386.c.old	Thu Nov 21 23:10:28 2002
--- i386.c	Fri Nov 22 01:19:02 2002
*************** static void x86_output_mi_thunk PARAMS (
*** 816,821 ****
--- 816,822 ----
  					 HOST_WIDE_INT, tree));
  static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
  					     HOST_WIDE_INT, tree));
+ bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
  
  struct ix86_address
  {
*************** ix86_expand_setcc (code, dest)
*** 9161,9166 ****
--- 9162,9245 ----
    return 1; /* DONE */
  }
  
+ /* Expand comparison setting or clearing carry flag.  Return true when sucesfull
+    and set pop for the operation.  */
+ bool
+ ix86_expand_carry_flag_compare (code, op0, op1, pop)
+      rtx op0, op1, *pop;
+      enum rtx_code code;
+ {
+   enum machine_mode mode =
+     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
+ 
+   /* Do not handle DImode compares that go trought special path.  Also we can't
+      deal with FP compares yet.  This is possible to add.   */
+   if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
+     return false;
+   switch (code)
+     {
+     case LTU:
+     case GEU:
+       break;
+ 
+     /* Convert a==0 into (unsigned)a<1.  */
+     case EQ:
+     case NE:
+       if (op1 != const0_rtx)
+ 	return false;
+       op1 = const1_rtx;
+       code = (code == EQ ? LTU : GEU);
+       break;
+ 
+     /* Convert a>b into b<a or a>=b-1.  */
+     case GTU:
+     case LEU:
+       if (GET_CODE (op1) == CONST_INT)
+ 	{
+ 	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
+ 	  /* Bail out on overflow.  We still can swap operands but that
+ 	     would force loading of the constant into register. */
+ 	  if (op1 == const0_rtx
+ 	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
+ 	    return false;
+ 	  code = (code == GTU ? GEU : LTU);
+ 	}
+       else
+ 	{
+ 	  rtx tmp = op1;
+ 	  op1 = op0;
+ 	  op0 = tmp;
+ 	  code = (code == GTU ? LTU : GEU);
+ 	}
+       break;
+ 
+     /* Convert a>0 into (unsigned)a<0x7fffffff.  */
+     case LT:
+     case GE:
+       if (mode == DImode || op1 != const0_rtx)
+ 	return false;
+       op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
+       code = (code == LT ? GEU : LTU);
+       break;
+     case LE:
+     case GT:
+       if (mode == DImode || op1 != constm1_rtx)
+ 	return false;
+       op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
+       code = (code == LE ? GEU : LTU);
+       break;
+ 
+     default:
+       return false;
+     }
+   ix86_compare_op0 = op0;
+   ix86_compare_op1 = op1;
+   *pop = ix86_expand_compare (code, NULL, NULL);
+   if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
+     abort ();
+   return true;
+ }
+ 
  int
  ix86_expand_int_movcc (operands)
       rtx operands[];
*************** ix86_expand_int_movcc (operands)
*** 9169,9198 ****
    rtx compare_seq, compare_op;
    rtx second_test, bypass_test;
    enum machine_mode mode = GET_MODE (operands[0]);
! 
!   /* When the compare code is not LTU or GEU, we can not use sbbl case.
!      In case comparsion is done with immediate, we can convert it to LTU or
!      GEU by altering the integer.  */
! 
!   if ((code == LEU || code == GTU)
!       && GET_CODE (ix86_compare_op1) == CONST_INT
!       && mode != HImode
!       && INTVAL (ix86_compare_op1) != -1
!       /* For x86-64, the immediate field in the instruction is 32-bit
! 	 signed, so we can't increment a DImode value above 0x7fffffff.  */
!       && (!TARGET_64BIT
! 	  || GET_MODE (ix86_compare_op0) != DImode
! 	  || INTVAL (ix86_compare_op1) != 0x7fffffff)
!       && GET_CODE (operands[2]) == CONST_INT
!       && GET_CODE (operands[3]) == CONST_INT)
!     {
!       if (code == LEU)
! 	code = LTU;
!       else
! 	code = GEU;
!       ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
! 				       GET_MODE (ix86_compare_op0));
!     }
  
    start_sequence ();
    compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
--- 9248,9254 ----
    rtx compare_seq, compare_op;
    rtx second_test, bypass_test;
    enum machine_mode mode = GET_MODE (operands[0]);
!   bool sign_bit_compare_p = false;;
  
    start_sequence ();
    compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
*************** ix86_expand_int_movcc (operands)
*** 9201,9210 ****
  
    compare_code = GET_CODE (compare_op);
  
    /* Don't attempt mode expansion here -- if we had to expand 5 or 6
       HImode insns, we'd be swallowed in word prefix ops.  */
  
!   if (mode != HImode
        && (mode != DImode || TARGET_64BIT)
        && GET_CODE (operands[2]) == CONST_INT
        && GET_CODE (operands[3]) == CONST_INT)
--- 9257,9270 ----
  
    compare_code = GET_CODE (compare_op);
  
+   if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
+       || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
+     sign_bit_compare_p = true;
+ 
    /* Don't attempt mode expansion here -- if we had to expand 5 or 6
       HImode insns, we'd be swallowed in word prefix ops.  */
  
!   if ((mode != HImode || TARGET_FAST_PREFIX)
        && (mode != DImode || TARGET_64BIT)
        && GET_CODE (operands[2]) == CONST_INT
        && GET_CODE (operands[3]) == CONST_INT)
*************** ix86_expand_int_movcc (operands)
*** 9214,9245 ****
        HOST_WIDE_INT cf = INTVAL (operands[3]);
        HOST_WIDE_INT diff;
  
!       if ((compare_code == LTU || compare_code == GEU)
! 	  && !second_test && !bypass_test)
  	{
  	  /* Detect overlap between destination and compare sources.  */
  	  rtx tmp = out;
  
! 	  /* To simplify rest of code, restrict to the GEU case.  */
! 	  if (compare_code == LTU)
  	    {
! 	      int tmp = ct;
! 	      ct = cf;
! 	      cf = tmp;
! 	      compare_code = reverse_condition (compare_code);
! 	      code = reverse_condition (code);
! 	    }
! 	  diff = ct - cf;
  
! 	  if (reg_overlap_mentioned_p (out, ix86_compare_op0)
! 	      || reg_overlap_mentioned_p (out, ix86_compare_op1))
! 	    tmp = gen_reg_rtx (mode);
  
! 	  emit_insn (compare_seq);
! 	  if (mode == DImode)
! 	    emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
  	  else
! 	    emit_insn (gen_x86_movsicc_0_m1 (tmp));
  
  	  if (diff == 1)
  	    {
--- 9274,9326 ----
        HOST_WIDE_INT cf = INTVAL (operands[3]);
        HOST_WIDE_INT diff;
  
!       diff = ct - cf;
!       /*  Sign bit compares are better done using shifts than we do by using
!  	  sbb.  */
!       if (sign_bit_compare_p
! 	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
! 					     ix86_compare_op1, &compare_op))
  	{
  	  /* Detect overlap between destination and compare sources.  */
  	  rtx tmp = out;
  
!           if (!sign_bit_compare_p)
  	    {
! 	      compare_code = GET_CODE (compare_op);
! 
! 	      /* To simplify rest of code, restrict to the GEU case.  */
! 	      if (compare_code == LTU)
! 		{
! 		  int tmp = ct;
! 		  ct = cf;
! 		  cf = tmp;
! 		  compare_code = reverse_condition (compare_code);
! 		  code = reverse_condition (code);
! 		}
! 	      diff = ct - cf;
  
! 	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
! 		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
! 		tmp = gen_reg_rtx (mode);
  
! 	      if (mode == DImode)
! 		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
! 	      else
! 		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
! 	    }
  	  else
! 	    {
! 	      if (code == GT || code == GE)
! 		code = reverse_condition (code);
! 	      else
! 		{
! 		  int tmp = ct;
! 		  ct = cf;
! 		  cf = tmp;
! 		}
! 	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
! 				     ix86_compare_op1, VOIDmode, 0, -1);
! 	    }
  
  	  if (diff == 1)
  	    {
*************** ix86_expand_int_movcc (operands)
*** 9319,9325 ****
  	  return 1; /* DONE */
  	}
  
-       diff = ct - cf;
        if (diff < 0)
  	{
  	  HOST_WIDE_INT tmp;
--- 9400,9405 ----
*************** ix86_expand_int_movcc (operands)
*** 9395,9400 ****
--- 9475,9481 ----
  	    }
  	}
  
+ 
        if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
  	   || diff == 3 || diff == 5 || diff == 9)
  	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf), 0)))
*************** ix86_expand_int_movcc (operands)
*** 9477,9483 ****
         * for space.
         */
  
!       if (!TARGET_CMOVE && BRANCH_COST >= 2)
  	{
  	  if (cf == 0)
  	    {
--- 9558,9565 ----
         * for space.
         */
  
!       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
! 	  && BRANCH_COST >= 2)
  	{
  	  if (cf == 0)
  	    {
*************** ix86_expand_int_movcc (operands)
*** 9548,9554 ****
  	}
      }
  
!   if (!TARGET_CMOVE)
      {
        /* Try a few things more with specific constants and a variable.  */
  
--- 9630,9636 ----
  	}
      }
  
!   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
      {
        /* Try a few things more with specific constants and a variable.  */
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]