This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[3.4-BIB] improve neg SSE generation
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org, rth at cygnus dot com
- Date: Sun, 27 Oct 2002 23:41:52 +0100
- Subject: [3.4-BIB] improve neg SSE generation
Hi,
this patch changes neg patterns to work like abs patterns I reorganized some
time ago. It also adds splitter to optimize movps vector load of constant
having all fields but first 0 into movss load.
Honza
Mon Oct 28 01:51:45 CET 2002 Jan Hubicka <jh@suse.cz>
* i386.md (negsf, negdf): Reorganize to use vector modes
for SSE variants.
(abssf, absdf): Use force_reg.
(movv4sf, movv2df): New splitters.
* i386.h (PREDICATE_CODES): add zero_extended_scalar_load_operand
* i386.c (zero_extended_scalar_load_operand
*** i386.md.old Sun Oct 27 23:00:14 2002
--- i386.md Mon Oct 28 00:53:03 2002
***************
*** 9532,9543 ****
in register. */
rtx reg = gen_reg_rtx (SFmode);
rtx dest = operands[0];
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
! emit_move_insn (reg,
! gen_lowpart (SFmode,
! gen_int_mode (0x80000000, SImode)));
emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
--- 9532,9546 ----
in register. */
rtx reg = gen_reg_rtx (SFmode);
rtx dest = operands[0];
+ rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode));
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
! reg = force_reg (V4SFmode,
! gen_rtx_CONST_VECTOR (V4SFmode,
! gen_rtvec (4, imm, CONST0_RTX (SFmode),
! CONST0_RTX (SFmode),
! CONST0_RTX (SFmode))));
emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
***************
*** 9555,9562 ****
(define_insn "negsf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
! (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
--- 9558,9565 ----
(define_insn "negsf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm#fr,0,0")))
! (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,*x*rm,*x*rm"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
***************
*** 9577,9583 ****
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
! (use (match_operand:SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
--- 9580,9586 ----
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
! (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
***************
*** 9587,9593 ****
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
! (use (match_operand:SF 2 "register_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
--- 9590,9596 ----
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
! (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
***************
*** 9666,9672 ****
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
! rtx reg = gen_reg_rtx (DFmode);
#if HOST_BITS_PER_WIDE_INT >= 64
rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
#else
--- 9669,9675 ----
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
! rtx reg;
#if HOST_BITS_PER_WIDE_INT >= 64
rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
#else
***************
*** 9676,9682 ****
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
! emit_move_insn (reg, gen_lowpart (DFmode, imm));
emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
--- 9679,9687 ----
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
! reg = force_reg (V2DFmode,
! gen_rtx_CONST_VECTOR (V2DFmode,
! gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
***************
*** 9694,9701 ****
(define_insn "negdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf")
! (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
! (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
--- 9699,9706 ----
(define_insn "negdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf")
! (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym#fr,0,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm,*Y*rm"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
***************
*** 9706,9712 ****
(define_insn "*negdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,fm#Yr,r#Yf")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
! (use (match_operand:DF 2 "general_operand" "Y,0,*g#Yr,*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
--- 9711,9717 ----
(define_insn "*negdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,fm#Yr,r#Yf")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
***************
*** 9717,9723 ****
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(neg:DF (match_operand:DF 1 "memory_operand" "")))
! (use (match_operand:DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
--- 9722,9728 ----
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(neg:DF (match_operand:DF 1 "memory_operand" "")))
! (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
***************
*** 9727,9733 ****
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])
&& (!TARGET_64BIT || FP_REG_P (operands[0]))"
--- 9732,9738 ----
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])
&& (!TARGET_64BIT || FP_REG_P (operands[0]))"
***************
*** 9738,9744 ****
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:DF 2 "" ""))
(clobber (reg:CC 17))]
"TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
--- 9743,9749 ----
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
***************
*** 9750,9771 ****
(define_split
[(set (match_operand:DF 0 "register_operand" "")
! (neg:DF (match_operand:DF 1 "register_operand" "")))
! (use (match_operand:DF 2 "register_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
(xor:TI (subreg:TI (match_dup 1) 0)
(subreg:TI (match_dup 2) 0)))]
{
/* Avoid possible reformating on the operands. */
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
! {
! rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
! emit_insn (gen_sse2_unpcklpd (op, op, op));
! op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
! emit_insn (gen_sse2_unpcklpd (op, op, op));
! }
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
--- 9755,9772 ----
(define_split
[(set (match_operand:DF 0 "register_operand" "")
! (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
! (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
(xor:TI (subreg:TI (match_dup 1) 0)
(subreg:TI (match_dup 2) 0)))]
{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
/* Avoid possible reformating on the operands. */
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
! emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
***************
*** 10005,10014 ****
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
! emit_move_insn (reg,
! gen_rtx_CONST_VECTOR (V4SFmode,
! gen_rtvec (4, imm, CONST0_RTX (SFmode),
! CONST0_RTX (SFmode), CONST0_RTX (SFmode))));
emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
--- 10006,10016 ----
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
! reg = force_reg (V4SFmode,
! gen_rtx_CONST_VECTOR (V4SFmode,
! gen_rtvec (4, imm, CONST0_RTX (SFmode),
! CONST0_RTX (SFmode),
! CONST0_RTX (SFmode))));
emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
***************
*** 10158,10166 ****
/* Produce LONG_DOUBLE with the proper immediate argument. */
imm = gen_lowpart (DFmode, imm);
! emit_move_insn (reg,
! gen_rtx_CONST_VECTOR (V2DFmode,
! gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
--- 10160,10168 ----
/* Produce LONG_DOUBLE with the proper immediate argument. */
imm = gen_lowpart (DFmode, imm);
! reg = force_reg (V2DFmode,
! gen_rtx_CONST_VECTOR (V2DFmode,
! gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
***************
*** 10189,10195 ****
(define_insn "*absdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0")))
(use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
--- 10191,10197 ----
(define_insn "*absdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
! (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym,0")))
(use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
***************
*** 18168,18173 ****
--- 18170,18185 ----
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
+ (define_split
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (match_dup 1)
+ (vec_duplicate:V4SF (float:SF (const_int 0)))
+ (const_int 1)))])
+
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
***************
*** 18205,18210 ****
--- 18217,18232 ----
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
+ (define_split
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE2"
+ [(set (match_dup 0)
+ (vec_merge:V2DF
+ (match_dup 1)
+ (vec_duplicate:V2DF (float:DF (const_int 0)))
+ (const_int 1)))])
+
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
(match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
*** i386.h.old Mon Oct 28 00:09:05 2002
--- i386.h Mon Oct 28 00:09:31 2002
*************** do { \
*** 3248,3253 ****
--- 3248,3254 ----
{"register_and_not_any_fp_reg_operand", {REG}}, \
{"fp_register_operand", {REG}}, \
{"register_and_not_fp_reg_operand", {REG}}, \
+ {"zero_extended_scalar_load_operand", {MEM}}, \
/* A list of predicates that do special things with modes, and so
should not elicit warnings for VOIDmode match_operand. */
*** i386.c.old Sun Oct 27 17:35:58 2002
--- i386.c Mon Oct 28 00:19:59 2002
*************** non_q_regs_operand (op, mode)
*** 3484,3489 ****
--- 3485,3515 ----
return NON_QI_REG_P (op);
}
+ int
+ zero_extended_scalar_load_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+ {
+ unsigned n_elts;
+ if (GET_CODE (op) != MEM)
+ return 0;
+ op = maybe_get_pool_constant (op);
+ if (!op)
+ return 0;
+ if (GET_CODE (op) != CONST_VECTOR)
+ return 0;
+ n_elts =
+ (GET_MODE_SIZE (GET_MODE (op)) /
+ GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
+ for (n_elts--; n_elts > 0; n_elts--)
+ {
+ rtx elt = CONST_VECTOR_ELT (op, n_elts);
+ if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
+ return 0;
+ }
+ return 1;
+ }
+
/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
insns. */
int