This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: SSE fix 4
> On Mon, Oct 14, 2002 at 05:22:59PM +0200, Jan Hubicka wrote:
> > (define_split
> > [(set (match_operand:SF 0 "register_operand" "")
> > + (abs:SF (match_dup 0)))
> > + (use (match_operand:V4SF 1 "nonmemory_operand" ""))
> > + (clobber (reg:CC 17))]
> > + "reload_completed && SSE_REG_P (operands[0])"
> > + [(set (subreg:TI (match_dup 0) 0)
> > + (and:TI (subreg:TI (match_dup 0) 0)
> > + (subreg:TI (match_dup 1) 0)))]
> > + {
> > + operands[0] = gen_rtx_SUBREG (V4SFmode, operands[0], 0);
>
> Produces nested subregs. Not Ok.
I caught that already.
This version builds SPEC without the nested subregs
Mon Oct 14 18:41:50 CEST 2002 Jan Hubicka <jh@suse.cz>
* i386.md (abssf,absdf): Use vector operands for SSE
(abssf2_ifs, absdf2_ifs, absdf2_ifs_rex64 and splitters): Update for
vector operand.
Index: i386.md
===================================================================
RCS file: /cvsroot/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.380.4.8
diff -c -3 -p -r1.380.4.8 i386.md
*** i386.md 14 Oct 2002 02:42:25 -0000 1.380.4.8
--- i386.md 14 Oct 2002 16:54:11 -0000
***************
*** 9996,10009 ****
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
! rtx reg = gen_reg_rtx (SFmode);
rtx dest = operands[0];
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
emit_move_insn (reg,
! gen_lowpart (SFmode,
! gen_int_mode (0x80000000, SImode)));
emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
--- 9996,10012 ----
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
! rtx reg = gen_reg_rtx (V4SFmode);
rtx dest = operands[0];
+ rtx imm;
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
+ imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
emit_move_insn (reg,
! gen_rtx_CONST_VECTOR (V4SFmode,
! gen_rtvec (4, imm, CONST0_RTX (SFmode),
! CONST0_RTX (SFmode), CONST0_RTX (SFmode))));
emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
***************
*** 10020,10028 ****
"#")
(define_insn "abssf2_ifs"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf")
! (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0")))
! (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
--- 10023,10031 ----
"#")
(define_insn "abssf2_ifs"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x,0,0")))
! (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,x*rm,x*rm"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
***************
*** 10033,10039 ****
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(abs:SF (match_operand:SF 1 "memory_operand" "")))
! (use (match_operand:SF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
--- 10036,10042 ----
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(abs:SF (match_operand:SF 1 "memory_operand" "")))
! (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
***************
*** 10043,10049 ****
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
! (use (match_operand:SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
--- 10046,10052 ----
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
! (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
***************
*** 10052,10064 ****
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
! (use (match_operand:SF 2 "register_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (and:TI (not:TI (subreg:TI (match_dup 2) 0))
! (subreg:TI (match_dup 1) 0)))])
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
--- 10055,10088 ----
(define_split
[(set (match_operand:SF 0 "register_operand" "")
+ (abs:SF (match_dup 0)))
+ (use (match_operand:V4SF 1 "nonmemory_operand" ""))
+ (clobber (reg:CC 17))]
+ "reload_completed && SSE_REG_P (operands[0])"
+ [(set (subreg:TI (match_dup 0) 0)
+ (and:TI (subreg:TI (match_dup 0) 0)
+ (match_dup 1)))]
+ {
+ operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ operands[1] = simplify_gen_subreg (TImode, operands[1], V4SFmode, 0);
+ })
+
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
! (use (match_operand:V4SF 2 "register_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (and:TI (subreg:TI (match_dup 0) 0)
! (match_dup 1)))]
! {
! /* Operand2 should match operand0, as the opposite case is handled above. */
! if (REGNO (operands[2]) != REGNO (operands[0]))
! abort ();
! operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
! operands[1] = simplify_gen_subreg (TImode, operands[1], V4SFmode, 0);
! })
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
***************
*** 10121,10137 ****
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
! rtx reg = gen_reg_rtx (DFmode);
#if HOST_BITS_PER_WIDE_INT >= 64
! rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
#else
! rtx imm = immed_double_const (0, 0x80000000, DImode);
#endif
rtx dest = operands[0];
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
! emit_move_insn (reg, gen_lowpart (DFmode, imm));
emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
--- 10145,10166 ----
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
! rtx reg = gen_reg_rtx (V2DFmode);
#if HOST_BITS_PER_WIDE_INT >= 64
! rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode);
#else
! rtx imm = immed_double_const (~0, ~0x80000000, DImode);
#endif
rtx dest = operands[0];
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
!
! /* Produce LONG_DOUBLE with the proper immediate argument. */
! imm = gen_lowpart (DFmode, imm);
! emit_move_insn (reg,
! gen_rtx_CONST_VECTOR (V2DFmode,
! gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
***************
*** 10148,10156 ****
"#")
(define_insn "absdf2_ifs"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0")))
! (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
--- 10177,10185 ----
"#")
(define_insn "absdf2_ifs"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Y*rm,Y*rm"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
***************
*** 10159,10167 ****
"#")
(define_insn "*absdf2_ifs_rex64"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0")))
! (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
--- 10188,10196 ----
"#")
(define_insn "*absdf2_ifs_rex64"
! [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
***************
*** 10172,10178 ****
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(abs:DF (match_operand:DF 1 "memory_operand" "")))
! (use (match_operand:DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
--- 10201,10207 ----
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(abs:DF (match_operand:DF 1 "memory_operand" "")))
! (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
***************
*** 10182,10188 ****
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
--- 10211,10217 ----
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
***************
*** 10191,10212 ****
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:DF 2 "register_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (and:TI (not:TI (subreg:TI (match_dup 2) 0))
! (subreg:TI (match_dup 1) 0)))]
{
/* Avoid possible reformating on the operands. */
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
! {
! rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
! emit_insn (gen_sse2_unpcklpd (op, op, op));
! op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
! emit_insn (gen_sse2_unpcklpd (op, op, op));
! }
})
--- 10220,10258 ----
(define_split
[(set (match_operand:DF 0 "register_operand" "")
+ (abs:DF (match_dup:DF 0)))
+ (use (match_operand:V2DF 1 "nonimmediate_operand" ""))
+ (clobber (reg:CC 17))]
+ "reload_completed && SSE_REG_P (operands[0])"
+ [(set (subreg:TI (match_dup 0) 0)
+ (and:TI (subreg:TI (match_dup 0) 0)
+ (match_dup 1)))]
+ {
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ operands[1] = simplify_gen_subreg (TImode, operands[1], V2DFmode, 0);
+ /* Avoid possible reformating on the operands. */
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
+ })
+
+ (define_split
+ [(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:V2DF 2 "register_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (and:TI (subreg:TI (match_dup 0) 0)
! (match_dup 1)))]
{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ operands[1] = simplify_gen_subreg (TImode, operands[1], V2DFmode, 0);
+ /* Operand2 should match operand0, as the opposite case is handled above. */
+ if (REGNO (operands[2]) != REGNO (operands[0]))
+ abort ();
/* Avoid possible reformating on the operands. */
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
! emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
})