This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[3.4-BIB] -march=pentium4 -msse2 fixes
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sun, 17 Nov 2002 14:05:00 +0100
- Subject: [3.4-BIB] -march=pentium4 -msse2 fixes
Hi,
Diego's tester run into new problem - I made abs/neg SSE patterns to accept
memory operands for abs/neg and that is a bug. The operand is not DFmode, but
V2DFmode and must be aligned and we can't ensure that. The attached patch
fixes the constraints to disallow memory operand, fixes few inconsistencies
(I've verified by hand that they now generate proper preferences) and I also
added simplify_gen_subreg into the splitters to avoid unnecesary subregs in the
insn chain now when the logical patterns are redesigned.
I've regtested abd bootstrapped the patch on BIB branch and installed it as
obvious.
Honza
Sun Nov 17 13:49:46 CET 2002 Jan Hubicka <jh@suse.cz>
* i386.md (negsf2_ifs, negdf2_ifs, negdf2_ifs_rex64, abssf2_ifs,
absdf2_ifs, absdf2_ifs_rex64): Fix constraints.
(neg?f2_ifs, abs?f2_ifs splitters): Refuse memory operand; do not
generate unnecesary subregs.
*** i386.md Sat Nov 16 16:51:16 2002
--- /aux/hubicka/bib/egcs/gcc/config/i386/i386.md Sun Nov 17 12:42:48 2002
***************
*** 9559,9566 ****
(define_insn "negsf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm#fr,0,0")))
! (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,*x*rm,*x*rm"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
--- 9559,9566 ----
(define_insn "negsf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
! (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
***************
*** 9590,9603 ****
(define_split
[(set (match_operand:SF 0 "register_operand" "")
! (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
(use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (xor:TI (subreg:TI (match_dup 1) 0)
! (subreg:TI (match_dup 2) 0)))]
{
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
--- 9590,9605 ----
(define_split
[(set (match_operand:SF 0 "register_operand" "")
! (neg:SF (match_operand:SF 1 "register_operand" "")))
(use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (xor:TI (match_dup 1)
! (match_dup 2)))]
{
+ operands[1] = simplify_gen_subreg (TImode, operands[1], SFmode, 0);
+ operands[2] = simplify_gen_subreg (TImode, operands[2], V4SFmode, 0);
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
***************
*** 9701,9708 ****
(define_insn "negdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf")
! (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym#fr,0,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm,*Y*rm"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
--- 9703,9710 ----
(define_insn "negdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf")
! (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
***************
*** 9713,9719 ****
(define_insn "*negdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
--- 9715,9721 ----
(define_insn "*negdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
***************
*** 9757,9771 ****
(define_split
[(set (match_operand:DF 0 "register_operand" "")
! (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
(use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (xor:TI (subreg:TI (match_dup 1) 0)
! (subreg:TI (match_dup 2) 0)))]
{
operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
/* Avoid possible reformating on the operands. */
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
--- 9759,9775 ----
(define_split
[(set (match_operand:DF 0 "register_operand" "")
! (neg:DF (match_operand:DF 1 "register_operand" "")))
(use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (xor:TI (match_dup 1)
! (match_dup 2)))]
{
operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0);
+ operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0);
/* Avoid possible reformating on the operands. */
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
***************
*** 10030,10042 ****
(define_insn "abssf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x,0,0")))
! (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,x*rm,x*rm"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
|| (register_operand (operands[0], VOIDmode)
! && register_operand (operands[1], VOIDmode)))"
"#")
(define_split
--- 10034,10046 ----
(define_insn "abssf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
! (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
|| (register_operand (operands[0], VOIDmode)
! && register_operand (operands[1], VOIDmode)))"
"#")
(define_split
***************
*** 10061,10074 ****
(define_split
[(set (match_operand:SF 0 "register_operand" "")
! (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))
(use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (and:TI (subreg:TI (match_dup 1) 0)
! (subreg:TI (match_dup 2) 0)))]
{
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
--- 10065,10080 ----
(define_split
[(set (match_operand:SF 0 "register_operand" "")
! (abs:SF (match_operand:SF 1 "register_operand" "")))
(use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (and:TI (match_dup 1)
! (match_dup 2)))]
{
+ operands[1] = simplify_gen_subreg (TImode, operands[1], SFmode, 0);
+ operands[2] = simplify_gen_subreg (TImode, operands[2], V4SFmode, 0);
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
***************
*** 10172,10179 ****
(define_insn "absdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Y*rm,Y*rm"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
--- 10178,10185 ----
(define_insn "absdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
***************
*** 10183,10190 ****
(define_insn "*absdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
--- 10189,10196 ----
(define_insn "*absdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
***************
*** 10214,10228 ****
(define_split
[(set (match_operand:DF 0 "register_operand" "")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))
(use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (and:TI (subreg:TI (match_dup 1) 0)
! (subreg:TI (match_dup 2) 0)))]
{
operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
/* Avoid possible reformating on the operands. */
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
--- 10220,10236 ----
(define_split
[(set (match_operand:DF 0 "register_operand" "")
! (abs:DF (match_operand:DF 1 "register_operand" "")))
(use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
! (and:TI (match_dup 1)
! (match_dup 2)))]
{
operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0);
+ operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0);
/* Avoid possible reformating on the operands. */
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
***************
*** 17089,17095 ****
|| const0_operand (operands[3], GET_MODE (operands[0])))"
[(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)]))
(set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
! (subreg:TI (match_dup 7) 0)))]
{
if (TARGET_SSE_PARTIAL_REGS && !optimize_size
&& GET_MODE (operands[2]) == DFmode)
--- 17097,17103 ----
|| const0_operand (operands[3], GET_MODE (operands[0])))"
[(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)]))
(set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
! (match_dup 7)))]
{
if (TARGET_SSE_PARTIAL_REGS && !optimize_size
&& GET_MODE (operands[2]) == DFmode)
***************
*** 17124,17129 ****
--- 17132,17139 ----
operands[7] = operands[2];
operands[6] = gen_rtx_SUBREG (TImode, operands[0], 0);
}
+ operands[7] = simplify_gen_subreg (TImode, operands[7],
+ GET_MODE (operands[7]), 0);
})
(define_expand "allocate_stack_worker"