This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

SSE based conditional moves


Hi
This patch finally adds the full support for conditional moves based
on cmp instructions.
This is done by defining fcmov placeholder and splitting whole beast after
reload.  I guess we will not lose too many optimizations here, since I don't
see much possibilities for elimination of the logicals, except for case one of
cmov arms is 0 I am handling already.

Richard: What is the state of your considered tought concerning the reg-class
tweek to avoid those '#'?  It is getting deadly.

Honza
Tue Feb 27 14:13:03 CET 2001  Jan Hubicka  <jh@suse.cz>
	* i386.md (sse_mov?fcc*): New patterns and splitters.
	* i386.c (ix86_expand_movcc): Work post-reload; recognize
	the SSE based conditional moves.

Index: i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.220
diff -c -3 -p -r1.220 i386.md
*** i386.md	2001/02/25 13:33:59	1.220
--- i386.md	2001/02/27 12:56:00
***************
*** 12166,12171 ****
--- 12603,12812 ----
  	      (const_string "lea")))
     (set_attr "mode" "SI")])
  
+ ;; Placeholder for the conditional moves.  This one is split eighter to SSE
+ ;; based moves emulation or to usual cmove sequence.  Little bit unfortunate
+ ;; fact is that compares supported by the cmp??ss instructions are exactly
+ ;; swapped of those supported by cmove sequence.
+ 
+ (define_insn "sse_movsfcc"
+   [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf")
+ 	(if_then_else:SF (match_operator 1 "sse_comparison_operator"
+ 			[(match_operand:SF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f")
+ 			 (match_operand:SF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")])
+ 		      (match_operand:SF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx")
+ 		      (match_operand:SF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx")))
+    (clobber (match_scratch:SF 6 "=2,&5,X,X,X,X,X,X,X,X"))
+    (clobber (reg:CC 17))]
+   "TARGET_SSE
+    && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+   "#")
+ 
+ (define_insn "sse_movdfcc"
+   [(set (match_operand:DF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf")
+ 	(if_then_else:DF (match_operator 1 "sse_comparison_operator"
+ 			[(match_operand:DF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f")
+ 			 (match_operand:DF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")])
+ 		      (match_operand:DF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx")
+ 		      (match_operand:DF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx")))
+    (clobber (match_scratch:DF 6 "=2,&5,X,X,X,X,X,X,X,X"))
+    (clobber (reg:CC 17))]
+   "TARGET_SSE2
+    && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+   "#")
+ 
+ ;; For non-sse moves just expand the usual cmove sequence.
+ (define_split
+   [(set (match_operand 0 "register_operand" "")
+ 	(if_then_else (match_operator 1 "comparison_operator"
+ 			[(match_operand 4 "nonimmediate_operand" "")
+ 			 (match_operand 5 "register_operand" "")])
+ 		      (match_operand 2 "nonimmediate_operand" "")
+ 		      (match_operand 3 "nonimmediate_operand" "")))
+    (clobber (match_operand 6 "" ""))
+    (clobber (reg:CC 17))]
+   "!SSE_REG_P (operands[0]) && reload_completed
+    && VALID_SSE_REG_MODE (GET_MODE (operands[0]))"
+   [(const_int 0)]
+   "
+ {
+    ix86_compare_op0 = operands[5];
+    ix86_compare_op1 = operands[4];
+    operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
+ 				 VOIDmode, operands[5], operands[4]);
+    ix86_expand_fp_movcc (operands);
+    DONE;
+ }")
+ 
+ ;; Split SSE based conditional move into seqence:
+ ;; cmpCC op0, op4   -  set op0 to 0 or ffffffff depending on the comparison
+ ;; and   op2, op0   -  zero op2 if comparison was false
+ ;; nand  op0, op3   -  load op3 to op0 if comparison was false
+ ;; or	 op2, op0   -  get the non-zero one into the result.
+ (define_split
+   [(set (match_operand 0 "register_operand" "")
+ 	(if_then_else (match_operator 1 "sse_comparison_operator"
+ 			[(match_operand 4 "register_operand" "")
+ 			 (match_operand 5 "nonimmediate_operand" "")])
+ 		      (match_operand 2 "register_operand" "")
+ 		      (match_operand 3 "register_operand" "")))
+    (clobber (match_dup 2))
+    (clobber (reg:CC 17))]
+   "SSE_REG_P (operands[0]) && reload_completed"
+   [(set (match_dup 4) (match_op_dup 1 [(match_dup 4) (match_dup 5)]))
+    (set (subreg:TI (match_dup 2) 0) (and:TI (subreg:TI (match_dup 2) 0)
+ 					    (subreg:TI (match_dup 0) 0)))
+    (set (subreg:TI (match_dup 4) 0) (and:TI (not:TI (subreg:TI (match_dup 0) 0))
+ 					    (subreg:TI (match_dup 3) 0)))
+    (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0)
+ 					    (subreg:TI (match_dup 7) 0)))]
+   "
+ {
+   PUT_MODE (operands[1], GET_MODE (operands[0]));
+   if (rtx_equal_p (operands[0], operands[4]))
+     operands[6] = operands[4], operands[7] = operands[2];
+   else
+     operands[6] = operands[2], operands[7] = operands[0];
+ }")
+ 
+ ;; Special case of conditional move we can handle effectivly.
+ ;; Do not brother with the integer/floating point case, since these are
+ ;; bot considerably slower, unlike in the generic case.
+ (define_insn "*sse_movsfcc_const0_1"
+   [(set (match_operand:SF 0 "register_operand" "=x")
+ 	(if_then_else:SF (match_operator 1 "sse_comparison_operator"
+ 			[(match_operand:SF 4 "register_operand" "0")
+ 			 (match_operand:SF 5 "nonimmediate_operand" "xm")])
+ 		      (match_operand:SF 2 "register_operand" "x")
+ 		      (match_operand:SF 3 "const0_operand" "X")))]
+   "TARGET_SSE"
+   "#")
+ 
+ (define_insn "*sse_movsfcc_const0_2"
+   [(set (match_operand:SF 0 "register_operand" "=x")
+ 	(if_then_else:SF (match_operator 1 "sse_comparison_operator"
+ 			[(match_operand:SF 4 "register_operand" "0")
+ 			 (match_operand:SF 5 "nonimmediate_operand" "xm")])
+ 		      (match_operand:SF 2 "const0_operand" "x")
+ 		      (match_operand:SF 3 "register_operand" "X")))]
+   "TARGET_SSE"
+   "#")
+ 
+ (define_insn "*sse_movsfcc_const0_3"
+   [(set (match_operand:SF 0 "register_operand" "=x")
+ 	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+ 			[(match_operand:SF 4 "nonimmediate_operand" "xm")
+ 			 (match_operand:SF 5 "register_operand" "0")])
+ 		      (match_operand:SF 2 "register_operand" "x")
+ 		      (match_operand:SF 3 "const0_operand" "X")))]
+   "TARGET_SSE"
+   "#")
+ 
+ (define_insn "*sse_movsfcc_const0_4"
+   [(set (match_operand:SF 0 "register_operand" "=x")
+ 	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+ 			[(match_operand:SF 4 "nonimmediate_operand" "xm")
+ 			 (match_operand:SF 5 "register_operand" "0")])
+ 		      (match_operand:SF 2 "const0_operand" "x")
+ 		      (match_operand:SF 3 "register_operand" "X")))]
+   "TARGET_SSE"
+   "#")
+ 
+ (define_insn "*sse_movdfcc_const0_1"
+   [(set (match_operand:SF 0 "register_operand" "=x")
+ 	(if_then_else:SF (match_operator 1 "sse_comparison_operator"
+ 			[(match_operand:SF 4 "register_operand" "0")
+ 			 (match_operand:SF 5 "nonimmediate_operand" "xm")])
+ 		      (match_operand:SF 2 "register_operand" "x")
+ 		      (match_operand:SF 3 "const0_operand" "X")))]
+   "TARGET_SSE2"
+   "#")
+ 
+ (define_insn "*sse_movdfcc_const0_2"
+   [(set (match_operand:SF 0 "register_operand" "=x")
+ 	(if_then_else:SF (match_operator 1 "sse_comparison_operator"
+ 			[(match_operand:SF 4 "register_operand" "0")
+ 			 (match_operand:SF 5 "nonimmediate_operand" "xm")])
+ 		      (match_operand:SF 2 "const0_operand" "x")
+ 		      (match_operand:SF 3 "register_operand" "X")))]
+   "TARGET_SSE2"
+   "#")
+ 
+ (define_insn "*sse_movdfcc_const0_3"
+   [(set (match_operand:SF 0 "register_operand" "=x")
+ 	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+ 			[(match_operand:SF 4 "nonimmediate_operand" "xm")
+ 			 (match_operand:SF 5 "register_operand" "0")])
+ 		      (match_operand:SF 2 "register_operand" "x")
+ 		      (match_operand:SF 3 "const0_operand" "X")))]
+   "TARGET_SSE2"
+   "#")
+ 
+ (define_insn "*sse_movdfcc_const0_4"
+   [(set (match_operand:SF 0 "register_operand" "=x")
+ 	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+ 			[(match_operand:SF 4 "nonimmediate_operand" "xm")
+ 			 (match_operand:SF 5 "register_operand" "0")])
+ 		      (match_operand:SF 2 "const0_operand" "x")
+ 		      (match_operand:SF 3 "register_operand" "X")))]
+   "TARGET_SSE2"
+   "#")
+ 
+ (define_split
+   [(set (match_operand 0 "register_operand" "")
+ 	(if_then_else (match_operator 1 "comparison_operator"
+ 			[(match_operand 4 "register_operand" "")
+ 			 (match_operand 5 "nonimmediate_operand" "")])
+ 		      (match_operand 2 "nonmemory_operand" "")
+ 		      (match_operand 3 "nonmemory_operand" "")))]
+   "SSE_REG_P (operands[0]) && reload_completed
+    && (const0_operand (operands[2], GET_MODE (operands[0]))
+        || const0_operand (operands[3], GET_MODE (operands[0])))"
+   [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)]))
+    (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
+ 					    (subreg:TI (match_dup 7) 0)))]
+   "
+ {
+   PUT_MODE (operands[1], GET_MODE (operands[0]));
+   if (!sse_comparison_operator (operands[1], VOIDmode))
+     {
+       rtx tmp = operands[5];
+       operands[5] = operands[4];
+       operands[4] = tmp;
+       PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1])));
+     }
+   if (const0_operand (operands[2], GET_MODE (operands[0])))
+     {
+       operands[7] = operands[3];
+       operands[6] = gen_rtx_NOT (TImode, gen_rtx_SUBREG (TImode, operands[0],
+ 							 0));
+     }
+   else
+     {
+       operands[7] = operands[2];
+       operands[6] = gen_rtx_SUBREG (TImode, operands[0], 0);
+     }
+ }")
+ 
  (define_insn "allocate_stack_worker"
    [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3)
     (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0)))
Index: i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.224
diff -c -3 -p -r1.224 i386.c
*** i386.c	2001/02/25 16:35:48	1.224
--- i386.c	2001/02/27 13:11:04
*************** ix86_expand_fp_movcc (operands)
*** 5962,5967 ****
--- 6180,6271 ----
    rtx tmp;
    rtx compare_op, second_test, bypass_test;
  
+   /* For SF/DFmode conditional moves based on comparisons
+      in same mode, we may want to use SSE min/max instructions.  */
+   if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
+        || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
+       && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
+       /* We may be called from the post-reload splitter.  */
+       && (!REG_P (operands[0])
+ 	  || SSE_REG_P (operands[0])
+ 	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
+     {
+       rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
+       code = GET_CODE (operands[1]);
+ 
+       /* See if we have (cross) match between comparison operands and
+          conditional move operands.  */
+       if (rtx_equal_p (operands[2], op1))
+ 	{
+ 	  rtx tmp = op0;
+ 	  op0 = op1;
+ 	  op1 = tmp;
+ 	  code = reverse_condition_maybe_unordered (code);
+ 	}
+       if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
+ 	{
+ 	  /* Check for min operation.  */
+ 	  if (code == LT)
+ 	    {
+ 	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
+ 	       if (memory_operand (op0, VOIDmode))
+ 		 op0 = force_reg (GET_MODE (operands[0]), op0);
+ 	       if (GET_MODE (operands[0]) == SFmode)
+ 		 emit_insn (gen_minsf3 (operands[0], op0, op1));
+ 	       else
+ 		 emit_insn (gen_mindf3 (operands[0], op0, op1));
+ 	       return 1;
+ 	    }
+ 	  /* Check for max operation.  */
+ 	  if (code == GT)
+ 	    {
+ 	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
+ 	       if (memory_operand (op0, VOIDmode))
+ 		 op0 = force_reg (GET_MODE (operands[0]), op0);
+ 	       if (GET_MODE (operands[0]) == SFmode)
+ 		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
+ 	       else
+ 		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
+ 	       return 1;
+ 	    }
+ 	}
+       /* Manage condition to be sse_comparison_operator.  In case we are
+ 	 in non-ieee mode, try to canonicalize the destination operand
+ 	 to be first in the comparison - this helps reload to avoid extra
+ 	 moves.  */
+       if (!sse_comparison_operator (operands[1], VOIDmode)
+ 	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
+ 	{
+ 	  rtx tmp = ix86_compare_op0;
+ 	  ix86_compare_op0 = ix86_compare_op1;
+ 	  ix86_compare_op1 = tmp;
+ 	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
+ 					VOIDmode, ix86_compare_op0,
+ 					ix86_compare_op1);
+ 	}
+       /* Similary try to manage result to be first operand of conditional
+ 	 move. */
+       if (rtx_equal_p (operands[0], operands[3]))
+ 	{
+ 	  rtx tmp = operands[2];
+ 	  operands[2] = operands[3];
+ 	  operands[2] = tmp;
+ 	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
+ 					  (GET_CODE (operands[1])),
+ 					VOIDmode, ix86_compare_op0,
+ 					ix86_compare_op1);
+ 	}
+       if (GET_MODE (operands[0]) == SFmode)
+ 	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
+ 				    operands[2], operands[3],
+ 				    ix86_compare_op0, ix86_compare_op1));
+       else
+ 	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
+ 				    operands[2], operands[3],
+ 				    ix86_compare_op0, ix86_compare_op1));
+       return 1;
+     }
+ 
    /* The floating point conditional move instructions don't directly
       support conditions resulting from a signed integer comparison.  */
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]