This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[4.0] fix target/17990


Here's the more aggressive version for 4.0.

All of the neg and abs patterns are rewritten, and unified.  

We can now handle negdf/absdf in integer registers in 64-bit mode.
No extra register is needed if we use the btcq/btrq instructions.
I measured

	movabsq	$0x8000000000000000, %rdx
	xorq	%rdx, %rax
vs
	rolq	$1, %rax
	xorb	$1, %rax
	rorq	$1, %rax
vs
	btcq	$63, %rax

On my Athlon64 system, all three options are within measurement
error of one another.  Diego tested for me on Nocona and found
with xorq as baseline, btcq about 4 times slower, and the rotates
about 7.4 times slower.  So there are some peepholes available
for Nocona to split the bt instructions if there's a free register.

Tested on i686-linux and x86_64-linux.  I built povray and ran
the benchmark image on a p4, which excersised -mfpmath=sse.


r~


        PR target/17990
        * config/i386/i386.c (x86_use_bt): New.
        (ix86_expand_unary_operator): Use MEM_P.
        (ix86_expand_fp_absneg_operator): New.
        * config/i386/i386.h (x86_use_bt): Declare.
        (TARGET_USE_BT): New.
        * config/i386/i386-protos.h: Update.
        * config/i386/i386.md (negsf2): Use ix86_expand_fp_absneg_operator.
        (negdf2, negxf2, abssf2, absdf2, absxf2): Likewise.
        (negsf2_memory, negsf2_ifs, negsf2_if, negdf2_memory, negdf2_ifs,
        negdf2_ifs_rex64, negdf2_if, negdf2_if_rex64, negxf2_if,
        abssf2_memory, abssf2_ifs, abssf2_if, absdf2_memory, absdf2_ifs,
        absdf2_ifs_rex64, absdf2_if, absxf2_if): Remove.
        (absnegsf2_mixed, absnegsf2_sse, absnegsf2_i387, absnegdf2_mixed,
        absnegdf2_sse, absnegdf2_i387, absnegxf2_i387): New.  Merge all
        neg and abs splitters.  Handle DFmode in general regs in 64-bit mode.
        (negextendsfdf2, absextendsfdf2): Disable for non-mixed sse math.
        (btsq, btrq, btcq): New.  Add peepholes as well.
        (movv4sf_internal splitter): Postpone til after reload.
        (movv2di_internal splitter): Likewise.
        * config/i386/predicates.md (const_0_to_63_operand): New.
        (absneg_operator): New.

Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.121
diff -c -p -d -r1.121 i386-protos.h
*** config/i386/i386-protos.h	8 Dec 2004 06:50:55 -0000	1.121
--- config/i386/i386-protos.h	14 Dec 2004 22:32:24 -0000
*************** extern void ix86_expand_binary_operator 
*** 130,135 ****
--- 130,137 ----
  extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
  extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
  					rtx[]);
+ extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
+ 					    rtx[]);
  extern int ix86_unary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
  extern int ix86_match_ccmode (rtx, enum machine_mode);
  extern rtx ix86_expand_compare (enum rtx_code, rtx *, rtx *);
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.752
diff -c -p -d -r1.752 i386.c
*** config/i386/i386.c	13 Dec 2004 01:11:41 -0000	1.752
--- config/i386/i386.c	14 Dec 2004 22:32:26 -0000
*************** const int x86_ext_80387_constants = m_K6
*** 579,584 ****
--- 579,585 ----
     the 16 byte window.  */
  const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
  const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
+ const int x86_use_bt = m_ATHLON_K8;
  
  /* In case the average insn count for single function invocation is
     lower than this constant, emit fast (but longer) prologue and
*************** ix86_expand_unary_operator (enum rtx_cod
*** 7636,7642 ****
    /* If the destination is memory, and we do not have matching source
       operands, do things in registers.  */
    matching_memory = 0;
!   if (GET_CODE (dst) == MEM)
      {
        if (rtx_equal_p (dst, src))
  	matching_memory = 1;
--- 7637,7643 ----
    /* If the destination is memory, and we do not have matching source
       operands, do things in registers.  */
    matching_memory = 0;
!   if (MEM_P (dst))
      {
        if (rtx_equal_p (dst, src))
  	matching_memory = 1;
*************** ix86_expand_unary_operator (enum rtx_cod
*** 7645,7654 ****
      }
  
    /* When source operand is memory, destination must match.  */
!   if (!matching_memory && GET_CODE (src) == MEM)
      src = force_reg (mode, src);
  
!   /* If optimizing, copy to regs to improve CSE */
    if (optimize && ! no_new_pseudos)
      {
        if (GET_CODE (dst) == MEM)
--- 7646,7655 ----
      }
  
    /* When source operand is memory, destination must match.  */
!   if (MEM_P (src) && !matching_memory)
      src = force_reg (mode, src);
  
!   /* If optimizing, copy to regs to improve CSE.  */
    if (optimize && ! no_new_pseudos)
      {
        if (GET_CODE (dst) == MEM)
*************** ix86_unary_operator_ok (enum rtx_code co
*** 7695,7700 ****
--- 7696,7786 ----
    return TRUE;
  }
  
+ /* Generate code for floating point ABS or NEG.  */
+ 
+ void
+ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
+ 				rtx operands[])
+ {
+   rtx mask, set, use, clob, dst, src;
+   bool matching_memory;
+   bool use_sse = false;
+ 
+   if (TARGET_SSE_MATH)
+     {
+       if (mode == SFmode)
+ 	use_sse = true;
+       else if (mode == DFmode && TARGET_SSE2)
+ 	use_sse = true;
+     }
+ 
+   /* NEG and ABS performed with SSE use bitwise mask operations.
+      Create the appropriate mask now.  */
+   if (use_sse)
+     {
+       HOST_WIDE_INT hi, lo;
+       int shift = 63;
+ 
+       /* Find the sign bit, sign extended to 2*HWI.  */
+       if (mode == SFmode)
+         lo = 0x80000000, hi = lo < 0;
+       else if (HOST_BITS_PER_WIDE_INT >= 64)
+         lo = (HOST_WIDE_INT)1 << shift, hi = -1;
+       else
+         lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+ 
+       /* If we're looking for the absolute value, then we want
+ 	 the compliment.  */
+       if (code == ABS)
+         lo = ~lo, hi = ~hi;
+ 
+       /* Force this value into the low part of a fp vector constant.  */
+       mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
+       mask = gen_lowpart (mode, mask);
+       if (mode == SFmode)
+         mask = gen_rtx_CONST_VECTOR (V4SFmode,
+ 				     gen_rtvec (4, mask, CONST0_RTX (SFmode),
+ 						CONST0_RTX (SFmode),
+ 						CONST0_RTX (SFmode)));
+       else
+         mask = gen_rtx_CONST_VECTOR (V2DFmode,
+ 				     gen_rtvec (2, mask, CONST0_RTX (DFmode)));
+       mask = force_reg (GET_MODE (mask), mask);
+     }
+   else
+     {
+       /* When not using SSE, we don't use the mask, but prefer to keep the
+ 	 same general form of the insn pattern to reduce duplication when
+ 	 it comes time to split.  */
+       mask = const0_rtx;
+     }
+ 
+   dst = operands[0];
+   src = operands[1];
+ 
+   /* If the destination is memory, and we don't have matching source
+      operands, do things in registers.  */
+   matching_memory = false;
+   if (MEM_P (dst))
+     {
+       if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
+ 	matching_memory = true;
+       else
+ 	dst = gen_reg_rtx (mode);
+     }
+   if (MEM_P (src) && !matching_memory)
+     src = force_reg (mode, src);
+ 
+   set = gen_rtx_fmt_e (code, mode, src);
+   set = gen_rtx_SET (VOIDmode, dst, set);
+   use = gen_rtx_USE (VOIDmode, mask);
+   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
+ 
+   if (dst != operands[0])
+     emit_move_insn (operands[0], dst);
+ }
+ 
  /* Return TRUE or FALSE depending on whether the first SET in INSN
     has source and destination with matching CC modes, and that the
     CC mode is at least as constrained as REQ_MODE.  */
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.406
diff -c -p -d -r1.406 i386.h
*** config/i386/i386.h	13 Dec 2004 10:27:43 -0000	1.406
--- config/i386/i386.h	14 Dec 2004 22:32:27 -0000
*************** extern const int x86_sse_partial_reg_dep
*** 247,252 ****
--- 247,253 ----
  extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
  extern const int x86_use_ffreep, x86_sse_partial_regs_for_cvtsd2ss;
  extern const int x86_inter_unit_moves, x86_schedule;
+ extern const int x86_use_bt;
  extern int x86_prefetch_sse;
  
  #define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
*************** extern int x86_prefetch_sse;
*** 302,307 ****
--- 303,309 ----
  #define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK)
  #define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
  #define TARGET_SCHEDULE (x86_schedule & TUNEMASK)
+ #define TARGET_USE_BT (x86_use_bt & TUNEMASK)
  
  #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
  
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.573
diff -c -p -d -r1.573 i386.md
*** config/i386/i386.md	13 Dec 2004 10:27:43 -0000	1.573
--- config/i386/i386.md	14 Dec 2004 22:32:32 -0000
***************
*** 9560,9909 ****
  ;; Changing of sign for FP values is doable using integer unit too.
  
  (define_expand "negsf2"
!   [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
! 		   (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
! 	      (clobber (reg:CC FLAGS_REG))])]
!   "TARGET_80387"
!   "if (TARGET_SSE)
!      {
!        /* In case operand is in memory,  we will not use SSE.  */
!        if (memory_operand (operands[0], VOIDmode)
! 	   && rtx_equal_p (operands[0], operands[1]))
! 	 emit_insn (gen_negsf2_memory (operands[0], operands[1]));
!        else
! 	{
! 	  /* Using SSE is tricky, since we need bitwise negation of -0
! 	     in register.  */
! 	  rtx reg = gen_reg_rtx (SFmode);
! 	  rtx dest = operands[0];
! 	  rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode));
  
! 	  operands[1] = force_reg (SFmode, operands[1]);
! 	  operands[0] = force_reg (SFmode, operands[0]);
! 	  reg = force_reg (V4SFmode,
! 			   gen_rtx_CONST_VECTOR (V4SFmode,
! 			     gen_rtvec (4, imm, CONST0_RTX (SFmode),
! 					CONST0_RTX (SFmode),
! 					CONST0_RTX (SFmode))));
! 	  emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg));
! 	  if (dest != operands[0])
! 	    emit_move_insn (dest, operands[0]);
! 	}
!        DONE;
!      }
!    ix86_expand_unary_operator (NEG, SFmode, operands); DONE;")
  
! (define_insn "negsf2_memory"
!   [(set (match_operand:SF 0 "memory_operand" "=m")
! 	(neg:SF (match_operand:SF 1 "memory_operand" "0")))
     (clobber (reg:CC FLAGS_REG))]
!   "ix86_unary_operator_ok (NEG, SFmode, operands)"
    "#")
  
! (define_insn "negsf2_ifs"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! 	(neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
!    (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r"))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_SSE
!    && (reload_in_progress || reload_completed
!        || (register_operand (operands[0], VOIDmode)
! 	   && register_operand (operands[1], VOIDmode)))"
    "#")
  
! (define_split
!   [(set (match_operand:SF 0 "memory_operand" "")
! 	(neg:SF (match_operand:SF 1 "memory_operand" "")))
!    (use (match_operand:SF 2 "" ""))
!    (clobber (reg:CC FLAGS_REG))]
!   ""
!   [(parallel [(set (match_dup 0)
! 		   (neg:SF (match_dup 1)))
! 	      (clobber (reg:CC FLAGS_REG))])])
! 
! (define_split
!   [(set (match_operand:SF 0 "register_operand" "")
! 	(neg:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:V4SF 2 "" ""))
!    (clobber (reg:CC FLAGS_REG))]
!   "reload_completed && !SSE_REG_P (operands[0])"
!   [(parallel [(set (match_dup 0)
! 		   (neg:SF (match_dup 1)))
! 	      (clobber (reg:CC FLAGS_REG))])])
! 
! (define_split
!   [(set (match_operand:SF 0 "register_operand" "")
! 	(neg:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
!    (clobber (reg:CC FLAGS_REG))]
!   "reload_completed && SSE_REG_P (operands[0])"
!   [(set (match_dup 0)
! 	(xor:V4SF (match_dup 1)
! 		  (match_dup 2)))]
! {
!   operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
!   operands[1] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
!   if (operands_match_p (operands[0], operands[2]))
!     {
!       rtx tmp;
!       tmp = operands[1];
!       operands[1] = operands[2];
!       operands[2] = tmp;
!     }
! })
! 
! 
! ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
! ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
! ;; to itself.
! (define_insn "*negsf2_if"
    [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f")
! 	(neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,0")))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_80387 && !TARGET_SSE
!    && ix86_unary_operator_ok (NEG, SFmode, operands)"
    "#")
  
- (define_split
-   [(set (match_operand:SF 0 "fp_register_operand" "")
- 	(neg:SF (match_operand:SF 1 "register_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed"
-   [(set (match_dup 0)
- 	(neg:SF (match_dup 1)))]
-   "")
- 
- (define_split
-   [(set (match_operand:SF 0 "register_and_not_fp_reg_operand" "")
- 	(neg:SF (match_operand:SF 1 "register_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed"
-   [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])]
-   "operands[1] = gen_int_mode (0x80000000, SImode);
-    operands[0] = gen_lowpart (SImode, operands[0]);")
- 
- (define_split
-   [(set (match_operand 0 "memory_operand" "")
- 	(neg (match_operand 1 "memory_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))"
-   [(parallel [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])]
- {
-   int size = GET_MODE_SIZE (GET_MODE (operands[1]));
- 
-   if (GET_MODE (operands[1]) == XFmode)
-     size = 10;
-   operands[0] = adjust_address (operands[0], QImode, size - 1);
-   operands[1] = gen_int_mode (0x80, QImode);
- })
- 
  (define_expand "negdf2"
!   [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
! 		   (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
! 	      (clobber (reg:CC FLAGS_REG))])]
!   "TARGET_80387"
!   "if (TARGET_SSE2)
!      {
!        /* In case operand is in memory,  we will not use SSE.  */
!        if (memory_operand (operands[0], VOIDmode)
! 	   && rtx_equal_p (operands[0], operands[1]))
! 	 emit_insn (gen_negdf2_memory (operands[0], operands[1]));
!        else
! 	{
! 	  /* Using SSE is tricky, since we need bitwise negation of -0
! 	     in register.  */
! 	  rtx reg;
! #if HOST_BITS_PER_WIDE_INT >= 64
! 	  rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
! #else
! 	  rtx imm = immed_double_const (0, 0x80000000, DImode);
! #endif
! 	  rtx dest = operands[0];
  
! 	  operands[1] = force_reg (DFmode, operands[1]);
! 	  operands[0] = force_reg (DFmode, operands[0]);
! 	  imm = gen_lowpart (DFmode, imm);
! 	  reg = force_reg (V2DFmode,
! 			   gen_rtx_CONST_VECTOR (V2DFmode,
! 			     gen_rtvec (2, imm, CONST0_RTX (DFmode))));
! 	  emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg));
! 	  if (dest != operands[0])
! 	    emit_move_insn (dest, operands[0]);
! 	}
!        DONE;
!      }
!    ix86_expand_unary_operator (NEG, DFmode, operands); DONE;")
  
! (define_insn "negdf2_memory"
!   [(set (match_operand:DF 0 "memory_operand" "=m")
! 	(neg:DF (match_operand:DF 1 "memory_operand" "0")))
     (clobber (reg:CC FLAGS_REG))]
!   "ix86_unary_operator_ok (NEG, DFmode, operands)"
    "#")
  
! (define_insn "negdf2_ifs"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf")
! 	(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
!    (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r"))
     (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && TARGET_SSE2
!    && (reload_in_progress || reload_completed
!        || (register_operand (operands[0], VOIDmode)
! 	   && register_operand (operands[1], VOIDmode)))"
    "#")
  
! (define_insn "*negdf2_ifs_rex64"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y")
! 	(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
!    (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r"))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_64BIT && TARGET_SSE2
!    && (reload_in_progress || reload_completed
!        || (register_operand (operands[0], VOIDmode)
! 	   && register_operand (operands[1], VOIDmode)))"
    "#")
  
! (define_split
!   [(set (match_operand:DF 0 "memory_operand" "")
! 	(neg:DF (match_operand:DF 1 "memory_operand" "")))
!    (use (match_operand:V2DF 2 "" ""))
!    (clobber (reg:CC FLAGS_REG))]
!   ""
!   [(parallel [(set (match_dup 0)
! 		   (neg:DF (match_dup 1)))
! 	      (clobber (reg:CC FLAGS_REG))])])
  
! (define_split
!   [(set (match_operand:DF 0 "register_operand" "")
! 	(neg:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:V2DF 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "reload_completed && !SSE_REG_P (operands[0])
!    && (!TARGET_64BIT || FP_REG_P (operands[0]))"
!   [(parallel [(set (match_dup 0)
! 		   (neg:DF (match_dup 1)))
! 	      (clobber (reg:CC FLAGS_REG))])])
  
  (define_split
!   [(set (match_operand:DF 0 "register_operand" "")
! 	(neg:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:V2DF 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])"
!   [(parallel [(set (match_dup 0)
! 		   (xor:DI (match_dup 1) (match_dup 2)))
! 	      (clobber (reg:CC FLAGS_REG))])]
!    "operands[0] = gen_lowpart (DImode, operands[0]);
!     operands[1] = gen_lowpart (DImode, operands[1]);
!     operands[2] = gen_lowpart (DImode, operands[2]);")
  
  (define_split
!   [(set (match_operand:DF 0 "register_operand" "")
! 	(neg:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
     (clobber (reg:CC FLAGS_REG))]
    "reload_completed && SSE_REG_P (operands[0])"
!   [(set (match_dup 0)
! 	(xor:V2DF (match_dup 1)
! 		  (match_dup 2)))]
  {
!   operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
!   operands[1] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
!   /* Avoid possible reformatting on the operands.  */
!   if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
!     emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
    if (operands_match_p (operands[0], operands[2]))
      {
-       rtx tmp;
        tmp = operands[1];
        operands[1] = operands[2];
        operands[2] = tmp;
      }
  })
  
- ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
- ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
- ;; to itself.
- (define_insn "*negdf2_if"
-   [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f")
- 	(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,0")))
-    (clobber (reg:CC FLAGS_REG))]
-   "!TARGET_64BIT && TARGET_80387
-    && ix86_unary_operator_ok (NEG, DFmode, operands)"
-   "#")
- 
- ;; FIXME: We should to allow integer registers here.  Problem is that
- ;; we need another scratch register to get constant from.
- ;; Forcing constant to mem if no register available in peep2 should be
- ;; safe even for PIC mode, because of RIP relative addressing.
- (define_insn "*negdf2_if_rex64"
-   [(set (match_operand:DF 0 "nonimmediate_operand" "=f,mf")
- 	(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,0")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_64BIT && TARGET_80387
-    && ix86_unary_operator_ok (NEG, DFmode, operands)"
-   "#")
- 
  (define_split
!   [(set (match_operand:DF 0 "fp_register_operand" "")
! 	(neg:DF (match_operand:DF 1 "register_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_80387 && reload_completed"
!   [(set (match_dup 0)
! 	(neg:DF (match_dup 1)))]
!   "")
  
  (define_split
!   [(set (match_operand:DF 0 "register_and_not_fp_reg_operand" "")
! 	(neg:DF (match_operand:DF 1 "register_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && TARGET_80387 && reload_completed"
!   [(parallel [(set (match_dup 3) (xor:SI (match_dup 3) (match_dup 4)))
! 	      (clobber (reg:CC FLAGS_REG))])]
!   "operands[4] = gen_int_mode (0x80000000, SImode);
!    split_di (operands+0, 1, operands+2, operands+3);")
! 
! (define_expand "negxf2"
!   [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "")
! 		   (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))
  	      (clobber (reg:CC FLAGS_REG))])]
!   "TARGET_80387"
!   "ix86_expand_unary_operator (NEG, XFmode, operands); DONE;")
  
! ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
! ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
! ;; to itself.
! (define_insn "*negxf2_if"
!   [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f")
! 	(neg:XF (match_operand:XF 1 "nonimmediate_operand" "0,0")))
!    (clobber (reg:CC FLAGS_REG))]
!   "TARGET_80387
!    && ix86_unary_operator_ok (NEG, XFmode, operands)"
!   "#")
  
  (define_split
!   [(set (match_operand:XF 0 "fp_register_operand" "")
! 	(neg:XF (match_operand:XF 1 "register_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_80387 && reload_completed"
!   [(set (match_dup 0)
! 	(neg:XF (match_dup 1)))]
!   "")
  
  (define_split
!   [(set (match_operand:XF 0 "register_and_not_fp_reg_operand" "")
! 	(neg:XF (match_operand:XF 1 "register_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_80387 && reload_completed"
!   [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1)))
  	      (clobber (reg:CC FLAGS_REG))])]
!   "operands[1] = GEN_INT (0x8000);
!    operands[0] = gen_rtx_REG (SImode,
! 			      true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));")
  
! ;; Conditionalize these after reload. If they matches before reload, we 
  ;; lose the clobber and ability to use integer instructions.
  
  (define_insn "*negsf2_1"
--- 9560,9824 ----
  ;; Changing of sign for FP values is doable using integer unit too.
  
  (define_expand "negsf2"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "")
! 	(neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))]
!   "TARGET_80387 || TARGET_SSE_MATH"
!   "ix86_expand_fp_absneg_operator (NEG, SFmode, operands); DONE;")
  
! (define_expand "abssf2"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "")
! 	(abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))]
!   "TARGET_80387 || TARGET_SSE_MATH"
!   "ix86_expand_fp_absneg_operator (ABS, SFmode, operands); DONE;")
  
! (define_insn "*absnegsf2_mixed"
!   [(set (match_operand:SF 0 "nonimmediate_operand"    "=x#fr,x#fr,f#xr,rm#xf")
! 	(match_operator:SF 3 "absneg_operator"
! 	  [(match_operand:SF 1 "nonimmediate_operand" "0    ,x#fr,0   ,0")]))
!    (use (match_operand:V4SF 2 "nonimmediate_operand"  "xm   ,0   ,X   ,X"))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_SSE_MATH && TARGET_MIX_SSE_I387
!    && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
    "#")
  
! (define_insn "*absnegsf2_sse"
!   [(set (match_operand:SF 0 "nonimmediate_operand"    "=x#r,x#r,rm#x")
! 	(match_operator:SF 3 "absneg_operator"
! 	  [(match_operand:SF 1 "nonimmediate_operand" "0   ,x#r,0")]))
!    (use (match_operand:V4SF 2 "nonimmediate_operand"  "xm  ,0  ,X"))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_SSE_MATH
!    && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
    "#")
  
! (define_insn "*absnegsf2_i387"
    [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f")
! 	(match_operator:SF 3 "absneg_operator"
! 	  [(match_operand:SF 1 "nonimmediate_operand" "0,0")]))
!    (use (match_operand 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_80387 && !TARGET_SSE_MATH
!    && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
    "#")
  
  (define_expand "negdf2"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "")
! 	(neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
!   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
!   "ix86_expand_fp_absneg_operator (NEG, DFmode, operands); DONE;")
  
! (define_expand "absdf2"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
!   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
!   "ix86_expand_fp_absneg_operator (ABS, DFmode, operands); DONE;")
  
! (define_insn "*absnegdf2_mixed"
!   [(set (match_operand:DF 0 "nonimmediate_operand"    "=Y#fr,Y#fr,f#Yr,rm#Yf")
! 	(match_operator:DF 3 "absneg_operator"
! 	  [(match_operand:DF 1 "nonimmediate_operand" "0    ,Y#fr,0   ,0")]))
!    (use (match_operand:V2DF 2 "nonimmediate_operand"  "Ym   ,0   ,X   ,X"))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
!    && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
    "#")
  
! (define_insn "*absnegdf2_sse"
!   [(set (match_operand:DF 0 "nonimmediate_operand"    "=Y#r,Y#r,rm#Y")
! 	(match_operator:DF 3 "absneg_operator"
! 	  [(match_operand:DF 1 "nonimmediate_operand" "0   ,Y#r,0")]))
!    (use (match_operand:V2DF 2 "nonimmediate_operand"  "Ym  ,0  ,X"))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_SSE2 && TARGET_SSE_MATH
!    && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
    "#")
  
! (define_insn "*absnegdf2_i387"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f")
! 	(match_operator:DF 3 "absneg_operator"
! 	  [(match_operand:DF 1 "nonimmediate_operand" "0,0")]))
!    (use (match_operand 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
!    && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
    "#")
  
! (define_expand "negxf2"
!   [(set (match_operand:XF 0 "nonimmediate_operand" "")
! 	(neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
!   "TARGET_80387"
!   "ix86_expand_fp_absneg_operator (NEG, XFmode, operands); DONE;")
  
! (define_expand "absxf2"
!   [(set (match_operand:XF 0 "nonimmediate_operand" "")
! 	(neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
!   "TARGET_80387"
!   "ix86_expand_fp_absneg_operator (ABS, XFmode, operands); DONE;")
! 
! (define_insn "*absnegxf2_i387"
!   [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f")
! 	(match_operator:XF 3 "absneg_operator"
! 	  [(match_operand:XF 1 "nonimmediate_operand" "0,0")]))
!    (use (match_operand 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_80387
!    && ix86_unary_operator_ok (GET_CODE (operands[3]), XFmode, operands)"
!   "#")
! 
! ;; Splitters for fp abs and neg.
  
  (define_split
!   [(set (match_operand 0 "fp_register_operand" "")
! 	(match_operator 1 "absneg_operator" [(match_dup 0)]))
!    (use (match_operand 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "reload_completed"
!   [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
  
  (define_split
!   [(set (match_operand 0 "register_operand" "")
! 	(match_operator 3 "absneg_operator"
! 	  [(match_operand 1 "register_operand" "")]))
!    (use (match_operand 2 "nonimmediate_operand" ""))
     (clobber (reg:CC FLAGS_REG))]
    "reload_completed && SSE_REG_P (operands[0])"
!   [(set (match_dup 0) (match_dup 3))]
  {
!   enum machine_mode mode = GET_MODE (operands[0]);
!   enum machine_mode vmode = GET_MODE (operands[2]);
!   rtx tmp;
!   
!   operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0);
!   operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0);
    if (operands_match_p (operands[0], operands[2]))
      {
        tmp = operands[1];
        operands[1] = operands[2];
        operands[2] = tmp;
      }
+   if (GET_CODE (operands[3]) == ABS)
+     tmp = gen_rtx_AND (vmode, operands[1], operands[2]);
+   else
+     tmp = gen_rtx_XOR (vmode, operands[1], operands[2]);
+   operands[3] = tmp;
  })
  
  (define_split
!   [(set (match_operand:SF 0 "register_operand" "")
! 	(match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
!    (use (match_operand:V4SF 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "reload_completed"
!   [(parallel [(set (match_dup 0) (match_dup 1))
! 	      (clobber (reg:CC FLAGS_REG))])]
! { 
!   rtx tmp;
!   operands[0] = gen_lowpart (SImode, operands[0]);
!   if (GET_CODE (operands[1]) == ABS)
!     {
!       tmp = gen_int_mode (0x7fffffff, SImode);
!       tmp = gen_rtx_AND (SImode, operands[0], tmp);
!     }
!   else
!     {
!       tmp = gen_int_mode (0x80000000, SImode);
!       tmp = gen_rtx_XOR (SImode, operands[0], tmp);
!     }
!   operands[1] = tmp;
! })
  
  (define_split
!   [(set (match_operand:DF 0 "register_operand" "")
! 	(match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
!    (use (match_operand 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "reload_completed"
!   [(parallel [(set (match_dup 0) (match_dup 1))
  	      (clobber (reg:CC FLAGS_REG))])]
! {
!   rtx tmp;
!   if (TARGET_64BIT)
!     {
!       tmp = gen_lowpart (DImode, operands[0]);
!       tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63));
!       operands[0] = tmp;
  
!       if (GET_CODE (operands[1]) == ABS)
! 	tmp = const0_rtx;
!       else
! 	tmp = gen_rtx_NOT (DImode, tmp);
!     }
!   else
!     {
!       operands[0] = gen_highpart (SImode, operands[0]);
!       if (GET_CODE (operands[1]) == ABS)
! 	{
! 	  tmp = gen_int_mode (0x7fffffff, SImode);
! 	  tmp = gen_rtx_AND (SImode, operands[0], tmp);
! 	}
!       else
! 	{
! 	  tmp = gen_int_mode (0x80000000, SImode);
! 	  tmp = gen_rtx_XOR (SImode, operands[0], tmp);
! 	}
!     }
!   operands[1] = tmp;
! })
  
  (define_split
!   [(set (match_operand:XF 0 "register_operand" "")
! 	(match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
!    (use (match_operand 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "reload_completed"
!   [(parallel [(set (match_dup 0) (match_dup 1))
! 	      (clobber (reg:CC FLAGS_REG))])]
! {
!   rtx tmp;
!   operands[0] = gen_rtx_REG (SImode,
! 			     true_regnum (operands[0])
! 			     + (TARGET_64BIT ? 1 : 2));
!   if (GET_CODE (operands[1]) == ABS)
!     {
!       tmp = GEN_INT (0x7fff);
!       tmp = gen_rtx_AND (SImode, operands[0], tmp);
!     }
!   else
!     {
!       tmp = GEN_INT (0x8000);
!       tmp = gen_rtx_XOR (SImode, operands[0], tmp);
!     }
!   operands[1] = tmp;
! })
  
  (define_split
!   [(set (match_operand 0 "memory_operand" "")
! 	(match_operator 1 "absneg_operator" [(match_dup 0)]))
!    (use (match_operand 2 "" ""))
     (clobber (reg:CC FLAGS_REG))]
!   "reload_completed"
!   [(parallel [(set (match_dup 0) (match_dup 1))
  	      (clobber (reg:CC FLAGS_REG))])]
! {
!   enum machine_mode mode = GET_MODE (operands[0]);
!   int size = mode == XFmode ? 10 : GET_MODE_SIZE (mode);
!   rtx tmp;
  
!   operands[0] = adjust_address (operands[0], QImode, size - 1);
!   if (GET_CODE (operands[1]) == ABS)
!     {
!       tmp = gen_int_mode (0x7f, QImode);
!       tmp = gen_rtx_AND (QImode, operands[0], tmp);
!     }
!   else
!     {
!       tmp = gen_int_mode (0x80, QImode);
!       tmp = gen_rtx_XOR (QImode, operands[0], tmp);
!     }
!   operands[1] = tmp;
! })
! 
! ;; Conditionalize these after reload. If they match before reload, we 
  ;; lose the clobber and ability to use integer instructions.
  
  (define_insn "*negsf2_1"
***************
*** 9922,9943 ****
    [(set_attr "type" "fsgn")
     (set_attr "mode" "DF")])
  
! (define_insn "*negextendsfdf2"
!   [(set (match_operand:DF 0 "register_operand" "=f")
! 	(neg:DF (float_extend:DF
! 		  (match_operand:SF 1 "register_operand" "0"))))]
!   "TARGET_80387"
    "fchs"
    [(set_attr "type" "fsgn")
     (set_attr "mode" "DF")])
  
! (define_insn "*negxf2_1"
    [(set (match_operand:XF 0 "register_operand" "=f")
! 	(neg:XF (match_operand:XF 1 "register_operand" "0")))]
    "TARGET_80387 && reload_completed"
    "fchs"
    [(set_attr "type" "fsgn")
!    (set_attr "mode" "XF")])
  
  (define_insn "*negextenddfxf2"
    [(set (match_operand:XF 0 "register_operand" "=f")
--- 9837,9882 ----
    [(set_attr "type" "fsgn")
     (set_attr "mode" "DF")])
  
! (define_insn "*negxf2_1"
!   [(set (match_operand:XF 0 "register_operand" "=f")
! 	(neg:XF (match_operand:XF 1 "register_operand" "0")))]
!   "TARGET_80387 && reload_completed"
    "fchs"
    [(set_attr "type" "fsgn")
+    (set_attr "mode" "XF")])
+ 
+ (define_insn "*abssf2_1"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+ 	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+   "TARGET_80387 && reload_completed"
+   "fabs"
+   [(set_attr "type" "fsgn")
+    (set_attr "mode" "SF")])
+ 
+ (define_insn "*absdf2_1"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+ 	(abs:DF (match_operand:DF 1 "register_operand" "0")))]
+   "TARGET_80387 && reload_completed"
+   "fabs"
+   [(set_attr "type" "fsgn")
     (set_attr "mode" "DF")])
  
! (define_insn "*absxf2_1"
    [(set (match_operand:XF 0 "register_operand" "=f")
! 	(abs:XF (match_operand:XF 1 "register_operand" "0")))]
    "TARGET_80387 && reload_completed"
+   "fabs"
+   [(set_attr "type" "fsgn")
+    (set_attr "mode" "DF")])
+ 
+ (define_insn "*negextendsfdf2"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+ 	(neg:DF (float_extend:DF
+ 		  (match_operand:SF 1 "register_operand" "0"))))]
+   "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
    "fchs"
    [(set_attr "type" "fsgn")
!    (set_attr "mode" "DF")])
  
  (define_insn "*negextenddfxf2"
    [(set (match_operand:XF 0 "register_operand" "=f")
***************
*** 9956,10325 ****
    "fchs"
    [(set_attr "type" "fsgn")
     (set_attr "mode" "XF")])
- 
- ;; Absolute value instructions
- 
- (define_expand "abssf2"
-   [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
- 		   (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
- 	      (clobber (reg:CC FLAGS_REG))])]
-   "TARGET_80387"
-   "if (TARGET_SSE)
-      {
-        /* In case operand is in memory,  we will not use SSE.  */
-        if (memory_operand (operands[0], VOIDmode)
- 	   && rtx_equal_p (operands[0], operands[1]))
- 	 emit_insn (gen_abssf2_memory (operands[0], operands[1]));
-        else
- 	{
- 	  /* Using SSE is tricky, since we need bitwise negation of -0
- 	     in register.  */
- 	  rtx reg = gen_reg_rtx (V4SFmode);
- 	  rtx dest = operands[0];
- 	  rtx imm;
- 
- 	  operands[1] = force_reg (SFmode, operands[1]);
- 	  operands[0] = force_reg (SFmode, operands[0]);
- 	  imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
- 	  reg = force_reg (V4SFmode,
- 			   gen_rtx_CONST_VECTOR (V4SFmode,
- 			   gen_rtvec (4, imm, CONST0_RTX (SFmode),
- 				      CONST0_RTX (SFmode),
- 				      CONST0_RTX (SFmode))));
- 	  emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
- 	  if (dest != operands[0])
- 	    emit_move_insn (dest, operands[0]);
- 	}
-        DONE;
-      }
-    ix86_expand_unary_operator (ABS, SFmode, operands); DONE;")
- 
- (define_insn "abssf2_memory"
-   [(set (match_operand:SF 0 "memory_operand" "=m")
- 	(abs:SF (match_operand:SF 1 "memory_operand" "0")))
-    (clobber (reg:CC FLAGS_REG))]
-   "ix86_unary_operator_ok (ABS, SFmode, operands)"
-   "#")
- 
- (define_insn "abssf2_ifs"
-   [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
- 	(abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
-    (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r"))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_SSE
-    && (reload_in_progress || reload_completed
-        || (register_operand (operands[0], VOIDmode)
- 	    && register_operand (operands[1], VOIDmode)))"
-   "#")
- 
- (define_split
-   [(set (match_operand:SF 0 "memory_operand" "")
- 	(abs:SF (match_operand:SF 1 "memory_operand" "")))
-    (use (match_operand:V4SF 2 "" ""))
-    (clobber (reg:CC FLAGS_REG))]
-   ""
-   [(parallel [(set (match_dup 0)
- 		   (abs:SF (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])])
- 
- (define_split
-   [(set (match_operand:SF 0 "register_operand" "")
- 	(abs:SF (match_operand:SF 1 "register_operand" "")))
-    (use (match_operand:V4SF 2 "" ""))
-    (clobber (reg:CC FLAGS_REG))]
-   "reload_completed && !SSE_REG_P (operands[0])"
-   [(parallel [(set (match_dup 0)
- 		   (abs:SF (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])])
- 
- (define_split
-   [(set (match_operand:SF 0 "register_operand" "")
- 	(abs:SF (match_operand:SF 1 "register_operand" "")))
-    (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
-    (clobber (reg:CC FLAGS_REG))]
-   "reload_completed && SSE_REG_P (operands[0])"
-   [(set (match_dup 0)
- 	(and:V4SF (match_dup 1)
- 		  (match_dup 2)))]
- {
-   operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
-   operands[1] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-   if (operands_match_p (operands[0], operands[2]))
-     {
-       rtx tmp;
-       tmp = operands[1];
-       operands[1] = operands[2];
-       operands[2] = tmp;
-     }
- })
- 
- ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
- ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
- ;; to itself.
- (define_insn "*abssf2_if"
-   [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f")
- 	(abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,0")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && ix86_unary_operator_ok (ABS, SFmode, operands) && !TARGET_SSE"
-   "#")
- 
- (define_split
-   [(set (match_operand:SF 0 "fp_register_operand" "")
- 	(abs:SF (match_operand:SF 1 "register_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed"
-   [(set (match_dup 0)
- 	(abs:SF (match_dup 1)))]
-   "")
- 
- (define_split
-   [(set (match_operand:SF 0 "register_and_not_fp_reg_operand" "")
- 	(abs:SF (match_operand:SF 1 "register_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed"
-   [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])]
-   "operands[1] = gen_int_mode (~0x80000000, SImode);
-    operands[0] = gen_lowpart (SImode, operands[0]);")
- 
- (define_split
-   [(set (match_operand 0 "memory_operand" "")
- 	(abs (match_operand 1 "memory_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))"
-   [(parallel [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])]
- {
-   int size = GET_MODE_SIZE (GET_MODE (operands[1]));
- 
-   if (GET_MODE (operands[1]) == XFmode)
-     size = 10;
-   operands[0] = adjust_address (operands[0], QImode, size - 1);
-   operands[1] = gen_int_mode (~0x80, QImode);
- })
- 
- (define_expand "absdf2"
-   [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
- 		   (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
- 	      (clobber (reg:CC FLAGS_REG))])]
-   "TARGET_80387"
-   "if (TARGET_SSE2)
-      {
-        /* In case operand is in memory,  we will not use SSE.  */
-        if (memory_operand (operands[0], VOIDmode)
- 	   && rtx_equal_p (operands[0], operands[1]))
- 	 emit_insn (gen_absdf2_memory (operands[0], operands[1]));
-        else
- 	{
- 	  /* Using SSE is tricky, since we need bitwise negation of -0
- 	     in register.  */
- 	  rtx reg = gen_reg_rtx (V2DFmode);
- #if HOST_BITS_PER_WIDE_INT >= 64
- 	  rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode);
- #else
- 	  rtx imm = immed_double_const (~0, ~0x80000000, DImode);
- #endif
- 	  rtx dest = operands[0];
- 
- 	  operands[1] = force_reg (DFmode, operands[1]);
- 	  operands[0] = force_reg (DFmode, operands[0]);
- 
- 	  /* Produce LONG_DOUBLE with the proper immediate argument.  */
- 	  imm = gen_lowpart (DFmode, imm);
- 	  reg = force_reg (V2DFmode,
- 			   gen_rtx_CONST_VECTOR (V2DFmode,
- 			   gen_rtvec (2, imm, CONST0_RTX (DFmode))));
- 	  emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
- 	  if (dest != operands[0])
- 	    emit_move_insn (dest, operands[0]);
- 	}
-        DONE;
-      }
-    ix86_expand_unary_operator (ABS, DFmode, operands); DONE;")
- 
- (define_insn "absdf2_memory"
-   [(set (match_operand:DF 0 "memory_operand" "=m")
- 	(abs:DF (match_operand:DF 1 "memory_operand" "0")))
-    (clobber (reg:CC FLAGS_REG))]
-   "ix86_unary_operator_ok (ABS, DFmode, operands)"
-   "#")
- 
- (define_insn "absdf2_ifs"
-   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
- 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
-    (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r"))
-    (clobber (reg:CC FLAGS_REG))]
-   "!TARGET_64BIT && TARGET_SSE2
-    && (reload_in_progress || reload_completed
-        || (register_operand (operands[0], VOIDmode)
- 	   && register_operand (operands[1], VOIDmode)))"
-   "#")
- 
- (define_insn "*absdf2_ifs_rex64"
-   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
- 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
-    (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r"))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_64BIT && TARGET_SSE2
-    && (reload_in_progress || reload_completed
-        || (register_operand (operands[0], VOIDmode)
- 	   && register_operand (operands[1], VOIDmode)))"
-   "#")
- 
- (define_split
-   [(set (match_operand:DF 0 "memory_operand" "")
- 	(abs:DF (match_operand:DF 1 "memory_operand" "")))
-    (use (match_operand:V2DF 2 "" ""))
-    (clobber (reg:CC FLAGS_REG))]
-   ""
-   [(parallel [(set (match_dup 0)
- 		   (abs:DF (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])])
- 
- (define_split
-   [(set (match_operand:DF 0 "register_operand" "")
- 	(abs:DF (match_operand:DF 1 "register_operand" "")))
-    (use (match_operand:V2DF 2 "" ""))
-    (clobber (reg:CC FLAGS_REG))]
-   "reload_completed && !SSE_REG_P (operands[0])"
-   [(parallel [(set (match_dup 0)
- 		   (abs:DF (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])])
- 
- (define_split
-   [(set (match_operand:DF 0 "register_operand" "")
- 	(abs:DF (match_operand:DF 1 "register_operand" "")))
-    (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
-    (clobber (reg:CC FLAGS_REG))]
-   "reload_completed && SSE_REG_P (operands[0])"
-   [(set (match_dup 0)
- 	(and:V2DF (match_dup 1)
- 		  (match_dup 2)))]
- {
-   operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
-   operands[1] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
-   /* Avoid possible reformatting on the operands.  */
-   if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
-     emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
-   if (operands_match_p (operands[0], operands[2]))
-     {
-       rtx tmp;
-       tmp = operands[1];
-       operands[1] = operands[2];
-       operands[2] = tmp;
-     }
- })
- 
- 
- ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
- ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
- ;; to itself.
- (define_insn "*absdf2_if"
-   [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f")
- 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,0")))
-    (clobber (reg:CC FLAGS_REG))]
-   "!TARGET_64BIT && TARGET_80387
-    && ix86_unary_operator_ok (ABS, DFmode, operands)"
-   "#")
- 
- ;; FIXME: We should to allow integer registers here.  Problem is that
- ;; we need another scratch register to get constant from.
- ;; Forcing constant to mem if no register available in peep2 should be
- ;; safe even for PIC mode, because of RIP relative addressing.
- (define_insn "*absdf2_if_rex64"
-   [(set (match_operand:DF 0 "nonimmediate_operand" "=f,mf")
- 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,0")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_64BIT && TARGET_80387
-    && ix86_unary_operator_ok (ABS, DFmode, operands)"
-   "#")
- 
- (define_split
-   [(set (match_operand:DF 0 "fp_register_operand" "")
- 	(abs:DF (match_operand:DF 1 "register_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed"
-   [(set (match_dup 0)
- 	(abs:DF (match_dup 1)))]
-   "")
- 
- (define_split
-   [(set (match_operand:DF 0 "register_and_not_fp_reg_operand" "")
- 	(abs:DF (match_operand:DF 1 "register_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "!TARGET_64BIT && TARGET_80387 && reload_completed"
-   [(parallel [(set (match_dup 3) (and:SI (match_dup 3) (match_dup 4)))
- 	      (clobber (reg:CC FLAGS_REG))])]
-   "operands[4] = gen_int_mode (~0x80000000, SImode);
-    split_di (operands+0, 1, operands+2, operands+3);")
- 
- (define_expand "absxf2"
-   [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "")
- 		   (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))
- 	      (clobber (reg:CC FLAGS_REG))])]
-   "TARGET_80387"
-   "ix86_expand_unary_operator (ABS, XFmode, operands); DONE;")
- 
- ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
- ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
- ;; to itself.
- (define_insn "*absxf2_if"
-   [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f")
- 	(abs:XF (match_operand:XF 1 "nonimmediate_operand" "0,0")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387
-    && ix86_unary_operator_ok (ABS, XFmode, operands)"
-   "#")
- 
- (define_split
-   [(set (match_operand:XF 0 "fp_register_operand" "")
- 	(abs:XF (match_operand:XF 1 "register_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed"
-   [(set (match_dup 0)
- 	(abs:XF (match_dup 1)))]
-   "")
- 
- (define_split
-   [(set (match_operand:XF 0 "register_and_not_fp_reg_operand" "")
- 	(abs:XF (match_operand:XF 1 "register_operand" "")))
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_80387 && reload_completed"
-   [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1)))
- 	      (clobber (reg:CC FLAGS_REG))])]
-   "operands[1] = GEN_INT (~0x8000);
-    operands[0] = gen_rtx_REG (SImode,
- 			      true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));")
- 
- (define_insn "*abssf2_1"
-   [(set (match_operand:SF 0 "register_operand" "=f")
- 	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
-   "TARGET_80387 && reload_completed"
-   "fabs"
-   [(set_attr "type" "fsgn")
-    (set_attr "mode" "SF")])
- 
- (define_insn "*absdf2_1"
-   [(set (match_operand:DF 0 "register_operand" "=f")
- 	(abs:DF (match_operand:DF 1 "register_operand" "0")))]
-   "TARGET_80387 && reload_completed"
-   "fabs"
-   [(set_attr "type" "fsgn")
-    (set_attr "mode" "DF")])
  
  (define_insn "*absextendsfdf2"
    [(set (match_operand:DF 0 "register_operand" "=f")
  	(abs:DF (float_extend:DF
  		  (match_operand:SF 1 "register_operand" "0"))))]
!   "TARGET_80387"
!   "fabs"
!   [(set_attr "type" "fsgn")
!    (set_attr "mode" "DF")])
! 
! (define_insn "*absxf2_1"
!   [(set (match_operand:XF 0 "register_operand" "=f")
! 	(abs:XF (match_operand:XF 1 "register_operand" "0")))]
!   "TARGET_80387 && reload_completed"
    "fabs"
    [(set_attr "type" "fsgn")
     (set_attr "mode" "DF")])
--- 9895,9906 ----
    "fchs"
    [(set_attr "type" "fsgn")
     (set_attr "mode" "XF")])
  
  (define_insn "*absextendsfdf2"
    [(set (match_operand:DF 0 "register_operand" "=f")
  	(abs:DF (float_extend:DF
  		  (match_operand:SF 1 "register_operand" "0"))))]
!   "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
    "fabs"
    [(set_attr "type" "fsgn")
     (set_attr "mode" "DF")])
***************
*** 12625,12630 ****
--- 12206,12351 ----
  })
  
  ;; %%% bts, btr, btc, bt.
+ ;; In general these instructions are *slow* when applied to memory,
+ ;; since they enforce atomic operation.  When applied to registers,
+ ;; it depends on the cpu implementation.  They're never faster than
+ ;; the corresponding and/ior/xor operations, so with 32-bit there's
+ ;; no point.  But in 64-bit, we can't hold the relevant immediates
+ ;; within the instruction itself, so operating on bits in the high
+ ;; 32-bits of a register becomes easier.
+ ;;
+ ;; These are slow on Nocona, but fast on Athlon64.  We do require the use
+ ;; of btrq and btcq for corner cases of post-reload expansion of absdf and
+ ;; negdf respectively, so they can never be disabled entirely.
+ 
+ (define_insn "*btsq"
+   [(set (zero_extract:DI (match_operand 0 "register_operand" "+r")
+ 			 (const_int 1)
+ 			 (match_operand 1 "const_0_to_63_operand" ""))
+ 	(const_int 1))
+    (clobber (reg:CC FLAGS_REG))]
+   "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+   "bts{q} %1,%0"
+   [(set_attr "type" "alu1")])
+ 
+ (define_insn "*btrq"
+   [(set (zero_extract:DI (match_operand 0 "register_operand" "+r")
+ 			 (const_int 1)
+ 			 (match_operand 1 "const_0_to_63_operand" ""))
+ 	(const_int 0))
+    (clobber (reg:CC FLAGS_REG))]
+   "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+   "btr{q} %1,%0"
+   [(set_attr "type" "alu1")])
+ 
+ (define_insn "*btcq"
+   [(set (zero_extract:DI (match_operand 0 "register_operand" "+r")
+ 			 (const_int 1)
+ 			 (match_operand 1 "const_0_to_63_operand" ""))
+ 	(not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
+    (clobber (reg:CC FLAGS_REG))]
+   "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+   "btc{q} %1,%0"
+   [(set_attr "type" "alu1")])
+ 
+ ;; Allow Nocona to avoid these instructions if a register is available.
+ 
+ (define_peephole2
+   [(match_scratch:DI 2 "r")
+    (parallel [(set (zero_extract:DI
+ 		     (match_operand 0 "register_operand" "")
+ 		     (const_int 1)
+ 		     (match_operand 1 "const_0_to_63_operand" ""))
+ 		   (const_int 1))
+ 	      (clobber (reg:CC FLAGS_REG))])]
+   "TARGET_64BIT && !TARGET_USE_BT"
+   [(const_int 0)]
+ {
+   HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+   rtx op1;
+ 
+   if (HOST_BITS_PER_WIDE_INT >= 64)
+     lo = (HOST_WIDE_INT)1 << i, hi = 0;
+   else if (i < HOST_BITS_PER_WIDE_INT)
+     lo = (HOST_WIDE_INT)1 << i, hi = 0;
+   else
+     lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+ 
+   op1 = immed_double_const (lo, hi, DImode);
+   if (i >= 31)
+     {
+       emit_move_insn (operands[2], op1);
+       op1 = operands[2];
+     }
+ 
+   emit_insn (gen_iordi3 (operands[0], operands[0], op1));
+   DONE;
+ })
+ 
+ (define_peephole2
+   [(match_scratch:DI 2 "r")
+    (parallel [(set (zero_extract:DI
+ 		     (match_operand 0 "register_operand" "")
+ 		     (const_int 1)
+ 		     (match_operand 1 "const_0_to_63_operand" ""))
+ 		   (const_int 0))
+ 	      (clobber (reg:CC FLAGS_REG))])]
+   "TARGET_64BIT && !TARGET_USE_BT"
+   [(const_int 0)]
+ {
+   HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+   rtx op1;
+ 
+   if (HOST_BITS_PER_WIDE_INT >= 64)
+     lo = (HOST_WIDE_INT)1 << i, hi = 0;
+   else if (i < HOST_BITS_PER_WIDE_INT)
+     lo = (HOST_WIDE_INT)1 << i, hi = 0;
+   else
+     lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+ 
+   op1 = immed_double_const (~lo, ~hi, DImode);
+   if (i >= 32)
+     {
+       emit_move_insn (operands[2], op1);
+       op1 = operands[2];
+     }
+ 
+   emit_insn (gen_anddi3 (operands[0], operands[0], op1));
+   DONE;
+ })
+ 
+ (define_peephole2
+   [(match_scratch:DI 2 "r")
+    (parallel [(set (zero_extract:DI
+ 		     (match_operand 0 "register_operand" "")
+ 		     (const_int 1)
+ 		     (match_operand 1 "const_0_to_63_operand" ""))
+ 	      (not:DI (zero_extract:DI
+ 			(match_dup 0) (const_int 1) (match_dup 1))))
+ 	      (clobber (reg:CC FLAGS_REG))])]
+   "TARGET_64BIT && !TARGET_USE_BT"
+   [(const_int 0)]
+ {
+   HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+   rtx op1;
+ 
+   if (HOST_BITS_PER_WIDE_INT >= 64)
+     lo = (HOST_WIDE_INT)1 << i, hi = 0;
+   else if (i < HOST_BITS_PER_WIDE_INT)
+     lo = (HOST_WIDE_INT)1 << i, hi = 0;
+   else
+     lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+ 
+   op1 = immed_double_const (lo, hi, DImode);
+   if (i >= 31)
+     {
+       emit_move_insn (operands[2], op1);
+       op1 = operands[2];
+     }
+ 
+   emit_insn (gen_xordi3 (operands[0], operands[0], op1));
+   DONE;
+ })
  
  ;; Store-flag instructions.
  
***************
*** 19995,20001 ****
  (define_split
    [(set (match_operand:V4SF 0 "register_operand" "")
  	(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
!   "TARGET_SSE"
    [(set (match_dup 0)
  	(vec_merge:V4SF
  	 (vec_duplicate:V4SF (match_dup 1))
--- 19716,19722 ----
  (define_split
    [(set (match_operand:V4SF 0 "register_operand" "")
  	(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
!   "TARGET_SSE && reload_completed"
    [(set (match_dup 0)
  	(vec_merge:V4SF
  	 (vec_duplicate:V4SF (match_dup 1))
***************
*** 20089,20095 ****
  (define_split
    [(set (match_operand:V2DF 0 "register_operand" "")
  	(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
!   "TARGET_SSE2"
    [(set (match_dup 0)
  	(vec_merge:V2DF
  	 (vec_duplicate:V2DF (match_dup 1))
--- 19810,19816 ----
  (define_split
    [(set (match_operand:V2DF 0 "register_operand" "")
  	(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
!   "TARGET_SSE2 && reload_completed"
    [(set (match_dup 0)
  	(vec_merge:V2DF
  	 (vec_duplicate:V2DF (match_dup 1))
Index: config/i386/predicates.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/predicates.md,v
retrieving revision 1.8
diff -c -p -d -r1.8 predicates.md
*** config/i386/predicates.md	13 Dec 2004 10:27:46 -0000	1.8
--- config/i386/predicates.md	14 Dec 2004 22:32:32 -0000
***************
*** 536,541 ****
--- 536,546 ----
    (and (match_code "const_int")
         (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
  
+ ;; Match 0 to 63.
+ (define_predicate "const_0_to_63_operand"
+   (and (match_code "const_int")
+        (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 63")))
+ 
  ;; Match 0 to 255.
  (define_predicate "const_0_to_255_operand"
    (and (match_code "const_int")
***************
*** 835,837 ****
--- 840,845 ----
  
  (define_predicate "compare_operator"
    (match_code "compare"))
+ 
+ (define_predicate "absneg_operator"
+   (match_code "abs,neg"))


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]