This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
SSE abs/neg patterns
- To: rth at cygnus dot com, gcc-patches at gcc dot gnu dot org, patches at x86-64 dot org
- Subject: SSE abs/neg patterns
- From: Jan Hubicka <jh at suse dot cz>
- Date: Sat, 3 Mar 2001 19:58:51 +0100
Hi,
This is attempt to fill last two gaps in SSE code genrations - neg and abs.
SSE has no direct instructions for these beasts and I see two ways to implement
them - using the logicals on sign bit or using arithmetics.
This patch employs the second approach, since the first requires creating
negative zero value in register and I don't know about any resonable way
to do so except for loading it from memory.
The neg is implemented as "0-x", while abs as "max (x, 0-x)".
Both are having problems with zeros. The signetness of "0-0" is undefined,
so the sign don't have to be actually reversed.
I am not sure if this is problem or not - at least currently gcc compiles
"-0.0" as "0.0" and does few optimizations with similar problems.
If this is problem, I would love to see these versions at least under
-ffast-math.
Honza
Sat Mar 3 19:52:47 CET 2001 Jan Hubicka <jh@suse.cz>
* i386.md (negsf, negdf, abssf, absdf): Enable for non TARGET_80387 too.
(neg?f2_if, abs?f2_if): Add SSE; new splitter.
(neg?f2_if_sse_only, abs?f2_sse_only): New.
Index: i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.227
diff -c -3 -p -r1.227 i386.md
*** i386.md 2001/02/28 18:34:35 1.227
--- i386.md 2001/03/03 18:50:45
***************
*** 7139,7155 ****
[(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
(neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
(clobber (reg:CC 17))])]
! "TARGET_80387"
"ix86_expand_unary_operator (NEG, SFmode, operands); DONE;")
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
;; to itself.
(define_insn "*negsf2_if"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f")
! (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,0")))
(clobber (reg:CC 17))]
! "TARGET_80387 && ix86_unary_operator_ok (NEG, SFmode, operands)"
"#")
(define_split
--- 6925,6949 ----
[(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
(neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
(clobber (reg:CC 17))])]
! ""
"ix86_expand_unary_operator (NEG, SFmode, operands); DONE;")
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
;; to itself.
(define_insn "*negsf2_if"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=&x#fr,f#rx,rm#fx")
! (neg:SF (match_operand:SF 1 "nonimmediate_operand" "xm#fr,0,0")))
! (clobber (reg:CC 17))]
! "TARGET_80387
! && (!TARGET_SSE2 || ix86_unary_operator_ok (NEG, SFmode, operands))"
! "#")
!
! (define_insn "*negsf2_if_sse_only"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=&x#r,rm#x")
! (neg:SF (match_operand:SF 1 "nonimmediate_operand" "xm#r,0")))
(clobber (reg:CC 17))]
! "(!TARGET_SSE2 || ix86_unary_operator_ok (NEG, SFmode, operands))"
"#")
(define_split
***************
*** 7161,7168 ****
--- 6955,6974 ----
(neg:SF (match_dup 1)))]
"")
+ ;; We split negation as 0-x. We also may use xor with negative zero, that
+ ;; may be cheaper on some implementations. Pentium4 preffers the 0-x.
+ ;; Unfortunate fact is, that we do have earlycobber on the destination.
(define_split
[(set (match_operand:SF 0 "register_operand" "")
+ (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE && SSE_REGNO_P (REGNO (operands[0])) && reload_completed"
+ [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 0) (minus:SF (match_dup 0) (match_dup 1)))]
+ "operands[2] = CONST0_RTX (SFmode);")
+
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
(clobber (reg:CC 17))]
"TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))"
***************
*** 7194,7212 ****
[(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (reg:CC 17))])]
! "TARGET_80387"
"ix86_expand_unary_operator (NEG, DFmode, operands); DONE;")
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
;; to itself.
(define_insn "*negdf2_if"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f")
! (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,0")))
(clobber (reg:CC 17))]
! "TARGET_80387 && ix86_unary_operator_ok (NEG, DFmode, operands)"
"#")
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
--- 7000,7026 ----
[(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (reg:CC 17))])]
! ""
"ix86_expand_unary_operator (NEG, DFmode, operands); DONE;")
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
;; to itself.
(define_insn "*negdf2_if"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=&Y#fr,f#Yr,rm#Yf")
! (neg:DF (match_operand:DF 1 "nonimmediate_operand" "mY#fr,0,0")))
(clobber (reg:CC 17))]
! "TARGET_80387
! && (!TARGET_SSE2 || ix86_unary_operator_ok (NEG, DFmode, operands))"
"#")
+ (define_insn "*negdf2_if_sse_only"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=&Y#r,rm#Y")
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "Ym#r,0")))
+ (clobber (reg:CC 17))]
+ "(!TARGET_SSE2 || ix86_unary_operator_ok (NEG, DFmode, operands))"
+ "#")
+
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
***************
*** 7216,7221 ****
--- 7030,7047 ----
(neg:DF (match_dup 1)))]
"")
+ ;; We split negation as 0-x. We also may use xor with negative zero, that
+ ;; may be cheaper on some implementations. Pentium4 preffers the 0-x.
+ ;; Unfortunate fact is, that we do have earlycobber on the destination.
+ (define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE2 && SSE_REGNO_P (REGNO (operands[0])) && reload_completed"
+ [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 0) (minus:DF (match_dup 0) (match_dup 1)))]
+ "operands[2] = CONST0_RTX (DFmode);")
+
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
***************
*** 7393,7411 ****
[(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
(neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
(clobber (reg:CC 17))])]
! "TARGET_80387"
"ix86_expand_unary_operator (ABS, SFmode, operands); DONE;")
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
;; to itself.
(define_insn "*abssf2_if"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f")
! (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,0")))
(clobber (reg:CC 17))]
! "TARGET_80387 && ix86_unary_operator_ok (ABS, SFmode, operands)"
"#")
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
--- 7219,7245 ----
[(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
(neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
(clobber (reg:CC 17))])]
! ""
"ix86_expand_unary_operator (ABS, SFmode, operands); DONE;")
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
;; to itself.
(define_insn "*abssf2_if"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=&x#fr,f#rx,rm#fx")
! (abs:SF (match_operand:SF 1 "nonimmediate_operand" "xm#fr,0,0")))
(clobber (reg:CC 17))]
! "TARGET_80387
! && (!TARGET_SSE || ix86_unary_operator_ok (NEG, SFmode, operands))"
"#")
+ (define_insn "*abssf2_if_sse_only"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=&x#fr,rm#fx")
+ (abs:SF (match_operand:SF 1 "nonimmediate_operand" "xm#fr,0")))
+ (clobber (reg:CC 17))]
+ "(!TARGET_SSE || ix86_unary_operator_ok (NEG, SFmode, operands))"
+ "#")
+
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
***************
*** 7415,7420 ****
--- 7249,7267 ----
(abs:SF (match_dup 1)))]
"")
+ ;; Compute abs(x) as max (x,0-x). Compared to nanding with negative zero
+ ;; it has advantage to not require to build constant.
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE && SSE_REGNO_P (REGNO (operands[0])) && reload_completed"
+ [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 0) (minus:SF (match_dup 0) (match_dup 1)))
+ (set (match_dup 0) (if_then_else:SF (gt (match_dup 0) (match_dup 1))
+ (match_dup 0) (match_dup 1)))]
+ "operands[2] = CONST0_RTX (SFmode);")
+
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
***************
*** 7448,7464 ****
[(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (reg:CC 17))])]
! "TARGET_80387"
"ix86_expand_unary_operator (ABS, DFmode, operands); DONE;")
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
;; to itself.
(define_insn "*absdf2_if"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,0")))
(clobber (reg:CC 17))]
! "TARGET_80387 && ix86_unary_operator_ok (ABS, DFmode, operands)"
"#")
(define_split
--- 7295,7319 ----
[(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (reg:CC 17))])]
! ""
"ix86_expand_unary_operator (ABS, DFmode, operands); DONE;")
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
;; to itself.
(define_insn "*absdf2_if"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=&x#fr,f#rx,rm#fx")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "xm#rf,0,0")))
! (clobber (reg:CC 17))]
! "TARGET_80387
! && (!TARGET_SSE2 || ix86_unary_operator_ok (NEG, SFmode, operands))"
! "#")
!
! (define_insn "*abssf2_if_sse_only"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=&x#fr,rm#fx")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "xm#fr,0")))
(clobber (reg:CC 17))]
! "(!TARGET_SSE2 || ix86_unary_operator_ok (NEG, SFmode, operands))"
"#")
(define_split
***************
*** 7470,7477 ****
--- 7325,7345 ----
(abs:DF (match_dup 1)))]
"")
+ ;; Compute abs(x) as max (x,0-x). Compared to nanding with negative zero
+ ;; it has advantage to not require to build constant.
(define_split
[(set (match_operand:DF 0 "register_operand" "")
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE2 && SSE_REGNO_P (REGNO (operands[0])) && reload_completed"
+ [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 0) (minus:DF (match_dup 0) (match_dup 1)))
+ (set (match_dup 0) (if_then_else:DF (gt (match_dup 0) (match_dup 1))
+ (match_dup 0) (match_dup 1)))]
+ "operands[2] = CONST0_RTX (DFmode);")
+
+ (define_split
+ [(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
(clobber (reg:CC 17))]
"TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))"