[PATCH, i386]: Remove moves using pextr insn
Uros Bizjak
ubizjak@gmail.com
Tue May 23 20:11:00 GMT 2017
Hello!
Implementing SImode and DImode moves using pextr insn doesn't bring
anything, since the latency of pextr is slightly worse than movd for
targets without efficient inter-unit moves.
2017-05-23 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (*movdi_internal): Remove SSE4
alternative 18 (?r, *v). Update insn attributes.
(*movsi_internal): Remove SSE4 alternative 13 (?r, *v).
Update insn attributes.
(*zero_extendsidi2): Remove SSE4 alternative (?r, *x).
Update insn attributes.
* config/i386/sse.md (vec_extract<ssevecmodelower>_0): Remove SSE4
alternative 1 (r, v). Remove isa attribute.
* config/i386/i386.c (dimode_scalar_chain::make_vector_copies):
Always move value through stack for !TARGET_INTER_UNIT_MOVES_TO_VEC
and !TARGET_INTER_UNIT_MOVES_TO_VEC targets.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Committed to mainline, will be committed to gcc-7 branch after a couple of days.
Uros.
-------------- next part --------------
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 248369)
+++ config/i386/i386.c (working copy)
@@ -3881,6 +3881,15 @@ dimode_scalar_chain::make_vector_copies (unsigned
emit_insn (gen_zero_extendsidi2 (vreg, tmp));
}
+ else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
+ {
+ rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
+ emit_move_insn (adjust_address (tmp, SImode, 0),
+ gen_rtx_SUBREG (SImode, reg, 0));
+ emit_move_insn (adjust_address (tmp, SImode, 4),
+ gen_rtx_SUBREG (SImode, reg, 4));
+ emit_move_insn (vreg, tmp);
+ }
else if (TARGET_SSE4_1)
{
emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
@@ -3891,7 +3900,7 @@ dimode_scalar_chain::make_vector_copies (unsigned
gen_rtx_SUBREG (SImode, reg, 4),
GEN_INT (2)));
}
- else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
+ else
{
rtx tmp = gen_reg_rtx (DImode);
emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
@@ -3905,15 +3914,6 @@ dimode_scalar_chain::make_vector_copies (unsigned
gen_rtx_SUBREG (V4SImode, vreg, 0),
gen_rtx_SUBREG (V4SImode, tmp, 0)));
}
- else
- {
- rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
- emit_move_insn (adjust_address (tmp, SImode, 0),
- gen_rtx_SUBREG (SImode, reg, 0));
- emit_move_insn (adjust_address (tmp, SImode, 4),
- gen_rtx_SUBREG (SImode, reg, 4));
- emit_move_insn (vreg, tmp);
- }
rtx_insn *seq = get_insns ();
end_sequence ();
rtx_insn *insn = DF_REF_INSN (ref);
@@ -3987,8 +3987,17 @@ dimode_scalar_chain::convert_reg (unsigned regno)
if (scalar_copy)
{
start_sequence ();
- if (TARGET_SSE4_1)
+ if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
{
+ rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
+ emit_move_insn (tmp, reg);
+ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
+ adjust_address (tmp, SImode, 0));
+ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
+ adjust_address (tmp, SImode, 4));
+ }
+ else if (TARGET_SSE4_1)
+ {
rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
emit_insn
(gen_rtx_SET
@@ -4003,7 +4012,7 @@ dimode_scalar_chain::convert_reg (unsigned regno)
gen_rtx_VEC_SELECT (SImode,
gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
}
- else if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
+ else
{
rtx vcopy = gen_reg_rtx (V2DImode);
emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
@@ -4014,15 +4023,6 @@ dimode_scalar_chain::convert_reg (unsigned regno)
emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
gen_rtx_SUBREG (SImode, vcopy, 0));
}
- else
- {
- rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
- emit_move_insn (tmp, reg);
- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
- adjust_address (tmp, SImode, 0));
- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
- adjust_address (tmp, SImode, 4));
- }
rtx_insn *seq = get_insns ();
end_sequence ();
emit_conversion_insns (seq, insn);
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 248369)
+++ config/i386/i386.md (working copy)
@@ -2147,9 +2147,9 @@
(define_insn "*movdi_internal"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m")
+ "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m")
(match_operand:DI 1 "general_operand"
- "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,v,*Yj,*v,r ,*Yj ,*Yn ,*r,*km,*k,*k"))]
+ "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,v,*Yj,r ,*Yj ,*Yn ,*r,*km,*k,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2171,9 +2171,6 @@
return "movq\t{%1, %0|%0, %1}";
case TYPE_SSELOG1:
- if (GENERAL_REG_P (operands[0]))
- return "%vpextrq\t{$0, %1, %0|%0, %1, 0}";
-
return standard_sse_constant_opcode (insn, operands[1]);
case TYPE_SSEMOV:
@@ -2227,10 +2224,8 @@
[(set (attr "isa")
(cond [(eq_attr "alternative" "0,1")
(const_string "nox64")
- (eq_attr "alternative" "2,3,4,5,10,11,17,19,22,24")
+ (eq_attr "alternative" "2,3,4,5,10,11,17,18,21,23")
(const_string "x64")
- (eq_attr "alternative" "18")
- (const_string "x64_sse4")
]
(const_string "*")))
(set (attr "type")
@@ -2240,13 +2235,13 @@
(const_string "mmx")
(eq_attr "alternative" "7,8,9,10,11")
(const_string "mmxmov")
- (eq_attr "alternative" "12,18")
+ (eq_attr "alternative" "12")
(const_string "sselog1")
- (eq_attr "alternative" "13,14,15,16,17,19")
+ (eq_attr "alternative" "13,14,15,16,17,18")
(const_string "ssemov")
- (eq_attr "alternative" "20,21")
+ (eq_attr "alternative" "19,20")
(const_string "ssecvt")
- (eq_attr "alternative" "22,23,24,25")
+ (eq_attr "alternative" "21,22,23,24")
(const_string "mskmov")
(and (match_operand 0 "register_operand")
(match_operand 1 "pic_32bit_operand"))
@@ -2256,23 +2251,18 @@
(set (attr "modrm")
(if_then_else
(and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
- (const_string "0")
- (const_string "*")))
+ (const_string "0")
+ (const_string "*")))
(set (attr "length_immediate")
- (cond [(and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
- (const_string "8")
- (eq_attr "alternative" "18")
- (const_string "1")
- ]
- (const_string "*")))
+ (if_then_else
+ (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
+ (const_string "8")
+ (const_string "*")))
(set (attr "prefix_rex")
- (if_then_else (eq_attr "alternative" "10,11,17,18,19")
+ (if_then_else
+ (eq_attr "alternative" "10,11,17,18")
(const_string "1")
(const_string "*")))
- (set (attr "prefix_extra")
- (if_then_else (eq_attr "alternative" "18")
- (const_string "1")
- (const_string "*")))
(set (attr "prefix")
(if_then_else (eq_attr "type" "sselog1,ssemov")
(const_string "maybe_vex")
@@ -2301,8 +2291,6 @@
(and (eq_attr "alternative" "14,15,16")
(not (match_test "TARGET_SSE2")))
(const_string "V2SF")
- (eq_attr "alternative" "18")
- (const_string "TI")
]
(const_string "DI")))
(set (attr "enabled")
@@ -2328,17 +2316,14 @@
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,*k,*k ,*rm")
+ "=r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?*Yi,*k,*k ,*rm")
(match_operand:SI 1 "general_operand"
- "g ,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*r,*km,*k"))]
+ "g ,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,r ,*r,*km,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
{
case TYPE_SSELOG1:
- if (GENERAL_REG_P (operands[0]))
- return "%vpextrd\t{$0, %1, %0|%0, %1, 0}";
-
return standard_sse_constant_opcode (insn, operands[1]);
case TYPE_MSKMOV:
@@ -2394,20 +2379,16 @@
gcc_unreachable ();
}
}
- [(set (attr "isa")
- (if_then_else (eq_attr "alternative" "13")
- (const_string "sse4")
- (const_string "*")))
- (set (attr "type")
+ [(set (attr "type")
(cond [(eq_attr "alternative" "2")
(const_string "mmx")
(eq_attr "alternative" "3,4,5,6,7")
(const_string "mmxmov")
- (eq_attr "alternative" "8,13")
+ (eq_attr "alternative" "8")
(const_string "sselog1")
- (eq_attr "alternative" "9,10,11,12,14")
+ (eq_attr "alternative" "9,10,11,12,13")
(const_string "ssemov")
- (eq_attr "alternative" "15,16,17")
+ (eq_attr "alternative" "14,15,16")
(const_string "mskmov")
(and (match_operand 0 "register_operand")
(match_operand 1 "pic_32bit_operand"))
@@ -2414,14 +2395,6 @@
(const_string "lea")
]
(const_string "imov")))
- (set (attr "length_immediate")
- (if_then_else (eq_attr "alternative" "13")
- (const_string "1")
- (const_string "*")))
- (set (attr "prefix_extra")
- (if_then_else (eq_attr "alternative" "13")
- (const_string "1")
- (const_string "*")))
(set (attr "prefix")
(if_then_else (eq_attr "type" "sselog1,ssemov")
(const_string "maybe_vex")
@@ -2450,8 +2423,6 @@
(and (eq_attr "alternative" "10,11")
(not (match_test "TARGET_SSE2")))
(const_string "SF")
- (eq_attr "alternative" "13")
- (const_string "TI")
]
(const_string "SI")))])
@@ -3762,10 +3733,10 @@
(define_insn "*zero_extendsidi2"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r,?r,?o,r ,o,?*Ym,?!*y,?r ,?r,?*Yi,*x,*x,*v,*r")
+ "=r,?r,?o,r ,o,?*Ym,?!*y,?r ,?*Yi,*x,*x,*v,*r")
(zero_extend:DI
(match_operand:SI 1 "x86_64_zext_operand"
- "0 ,rm,r ,rmWz,0,r ,m ,*Yj,*x,r ,m ,*x,*v,*k")))]
+ "0 ,rm,r ,rmWz,0,r ,m ,*Yj,r ,m ,*x,*v,*k")))]
""
{
switch (get_attr_type (insn))
@@ -3782,9 +3753,6 @@
case TYPE_MMXMOV:
return "movd\t{%1, %0|%0, %1}";
- case TYPE_SSELOG1:
- return "%vpextrd\t{$0, %1, %k0|%k0, %1, 0}";
-
case TYPE_SSEMOV:
if (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1]))
{
@@ -3812,15 +3780,13 @@
(const_string "nox64")
(eq_attr "alternative" "3,7")
(const_string "x64")
- (eq_attr "alternative" "8")
- (const_string "x64_sse4")
+ (eq_attr "alternative" "9")
+ (const_string "sse2")
(eq_attr "alternative" "10")
- (const_string "sse2")
+ (const_string "sse4")
(eq_attr "alternative" "11")
- (const_string "sse4")
+ (const_string "avx512f")
(eq_attr "alternative" "12")
- (const_string "avx512f")
- (eq_attr "alternative" "13")
(const_string "x64_avx512bw")
]
(const_string "*")))
@@ -3829,24 +3795,18 @@
(const_string "multi")
(eq_attr "alternative" "5,6")
(const_string "mmxmov")
- (eq_attr "alternative" "7,9,10,11,12")
+ (eq_attr "alternative" "7,8,9,10,11")
(const_string "ssemov")
- (eq_attr "alternative" "8")
- (const_string "sselog1")
- (eq_attr "alternative" "13")
+ (eq_attr "alternative" "12")
(const_string "mskmov")
]
(const_string "imovx")))
(set (attr "prefix_extra")
- (if_then_else (eq_attr "alternative" "8,11,12")
+ (if_then_else (eq_attr "alternative" "10,11")
(const_string "1")
(const_string "*")))
- (set (attr "length_immediate")
- (if_then_else (eq_attr "alternative" "8")
- (const_string "1")
- (const_string "*")))
(set (attr "prefix")
- (if_then_else (eq_attr "type" "ssemov,sselog1")
+ (if_then_else (eq_attr "type" "ssemov")
(const_string "maybe_vex")
(const_string "orig")))
(set (attr "prefix_0f")
@@ -3856,7 +3816,7 @@
(set (attr "mode")
(cond [(eq_attr "alternative" "5,6")
(const_string "DI")
- (eq_attr "alternative" "7,8,9,11,12")
+ (eq_attr "alternative" "7,8,10,11")
(const_string "TI")
]
(const_string "SI")))])
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 248369)
+++ config/i386/sse.md (working copy)
@@ -13529,13 +13529,12 @@
"#")
(define_insn "*vec_extract<ssevecmodelower>_0"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,v ,m")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,v ,m")
(vec_select:SWI48
- (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,v,vm,v")
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,vm,v")
(parallel [(const_int 0)])))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- [(set_attr "isa" "*,sse4,*,*")])
+ "#")
(define_insn "*vec_extractv2di_0_sse"
[(set (match_operand:DI 0 "nonimmediate_operand" "=v,m")
More information about the Gcc-patches
mailing list