PATCH: Don't check Y2/TARGET_SSE2 on V16QI/V8HI/V2DF/V2DI
H.J. Lu
hjl.tools@gmail.com
Tue May 20 20:36:00 GMT 2008
Some x86 testcases fail with -msse on SSE2 vector modes. I don't
want to spend more time on it. I withdrew this patch.
H.J.
On Tue, May 20, 2008 at 6:51 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, May 19, 2008 at 10:44:11PM -0700, H.J. Lu wrote:
>> i386.h has
>>
>> #define VALID_SSE2_REG_MODE(MODE) \
>> ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
>> || (MODE) == V2DImode || (MODE) == DFmode)
>>
>> #define VALID_SSE_REG_MODE(MODE) \
>> ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode \
>> || (MODE) == SFmode || (MODE) == TFmode)
>>
>> That is V16QImode, V8HImode, V2DFmode and V2DImode are only available
>> for SSE2 or above. However, there are many things like:
>>
>> (define_insn "*vec_concatv2df"
>> [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
>> (vec_concat:V2DF
>> (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
>> (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
>> "TARGET_SSE"
>> "@
>> unpcklpd\t{%2, %0|%0, %2}
>> movhpd\t{%2, %0|%0, %2}
>> movsd\t{%1, %0|%0, %1}
>> movlhps\t{%2, %0|%0, %2}
>> movhps\t{%2, %0|%0, %2}"
>> [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
>> (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
>>
>> (define_insn "*vec_dupv2di"
>> [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
>> (vec_duplicate:V2DI
>> (match_operand:DI 1 "register_operand" " 0 ,0")))]
>> "TARGET_SSE"
>> "@
>> punpcklqdq\t%0, %0
>> movlhps\t%0, %0"
>> [(set_attr "type" "sselog1,ssemov")
>> (set_attr "mode" "TI,V4SF")])
>>
>> if (TARGET_SSE2 && mode == V2DFmode)
>> ...
>>
>> case V8HImode:
>> if (TARGET_SSE2)
>>
>>
>> Do we really need to check TARGET_SSE2 on modes which are enabled
>> only for SSE2? If they are just oversights, I will try to fix them.
>>
>
> Since V16QI/V8HI/V2DF/V2DI are only enabled for SSE2, I don't see
> why we need to check if SSE2 is available before using SSE2 on them.
> Did I miss something? I am testing this on Linux/Intel64. OK for
> trunk if it passes?
>
> Thanks.
>
>
> H.J.
> ----
> 2008-05-20 H.J. Lu <hongjiu.lu@intel.com>
>
> * config/i386/i386.c (ix86_expand_vector_move_misalign): No
> need to check TARGET_SSE2 for V2DFmode.
> (ix86_expand_vector_init_duplicate): No need to check
> TARGET_SSE2 for V8HImode nor V16QImode.
> (ix86_expand_vector_init_one_nonzero): Always use vector set
> for V8HImode.
> (ix86_expand_vector_set): Always use vector merge for V8HImode.
> (ix86_expand_vector_extract): Always use vector extract for
> V8HImode.
>
> * config/i386/i386.h (SSE_VEC_MODE_P): New.
>
> * config/i386/sse.md (mov<mode>): Replace TARGET_SSE with
> SSE_VEC_MODE_P (<MODE>mode).
> (*mov<mode>_internal): Likewise.
> (push<mode>1): Likewise.
> (movmisalign<mode>): Likewise.
> (*vec_concatv2df): Change Y2 to x. Replace TARGET_SSE with
> TARGET_SSE2. Remove SSE alternatives.
> (*vec_dupv2di): Likewise.
> (vec_concatv2di): Likewise.
> (*vec_concatv2di_rex64_sse): Likewise.
> (*vec_concatv2di_rex64_sse4_1): Replace TARGET_SSE with
> TARGET_SSE2. Remove SSE alternatives.
> (vec_setv2di): Replace TARGET_SSE with TARGET_SSE2.
> (vec_initv2di): Likewise.
> (vec_setv8hi): Likewise.
> (vec_extractv8hi): Likewise.
> (vec_initv8hi): Likewise.
> (vec_setv16qi): Likewise.
> (vec_extractv16qi): Likewise.
> (vec_initv16qi): Likewise.
>
> Index: gcc/config/i386/i386.h
> ===================================================================
> --- gcc/config/i386/i386.h (revision 135610)
> +++ gcc/config/i386/i386.h (working copy)
> @@ -1486,6 +1486,11 @@ enum reg_class
> #define SSE_VEC_FLOAT_MODE_P(MODE) \
> ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
>
> +#define SSE_VEC_MODE_P(MODE) \
> + ((TARGET_SSE && ((MODE) == V4SImode || (MODE) == V4SFmode)) \
> + || (TARGET_SSE2 && ((MODE) == V16QImode || (MODE) == V8HImode \
> + || (MODE) == V2DImode || (MODE) == V2DFmode)))
> +
> #define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
> #define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
>
> Index: gcc/config/i386/sse.md
> ===================================================================
> --- gcc/config/i386/sse.md (revision 135610)
> +++ gcc/config/i386/sse.md (working copy)
> @@ -79,7 +79,7 @@
> (define_expand "mov<mode>"
> [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
> (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
> - "TARGET_SSE"
> + "SSE_VEC_MODE_P (<MODE>mode)"
> {
> ix86_expand_vector_move (<MODE>mode, operands);
> DONE;
> @@ -88,7 +88,7 @@
> (define_insn "*mov<mode>_internal"
> [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
> (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
> - "TARGET_SSE
> + "SSE_VEC_MODE_P (<MODE>mode)
> && (register_operand (operands[0], <MODE>mode)
> || register_operand (operands[1], <MODE>mode))"
> {
> @@ -187,7 +187,7 @@
>
> (define_expand "push<mode>1"
> [(match_operand:SSEMODE 0 "register_operand" "")]
> - "TARGET_SSE"
> + "SSE_VEC_MODE_P (<MODE>mode)"
> {
> ix86_expand_push (<MODE>mode, operands[0]);
> DONE;
> @@ -196,7 +196,7 @@
> (define_expand "movmisalign<mode>"
> [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
> (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
> - "TARGET_SSE"
> + "SSE_VEC_MODE_P (<MODE>mode)"
> {
> ix86_expand_vector_move_misalign (<MODE>mode, operands);
> DONE;
> @@ -2772,19 +2772,17 @@
> (set_attr "mode" "DF")])
>
> (define_insn "*vec_concatv2df"
> - [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
> + [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
> (vec_concat:V2DF
> - (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
> - (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
> - "TARGET_SSE"
> + (match_operand:DF 1 "nonimmediate_operand" " 0,0,m")
> + (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
> + "TARGET_SSE2"
> "@
> unpcklpd\t{%2, %0|%0, %2}
> movhpd\t{%2, %0|%0, %2}
> - movsd\t{%1, %0|%0, %1}
> - movlhps\t{%2, %0|%0, %2}
> - movhps\t{%2, %0|%0, %2}"
> - [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
> - (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
> + movsd\t{%1, %0|%0, %1}"
> + [(set_attr "type" "sselog,ssemov,ssemov")
> + (set_attr "mode" "V2DF,V1DF,DF")])
>
> (define_expand "vec_setv2df"
> [(match_operand:V2DF 0 "register_operand" "")
> @@ -4808,15 +4806,13 @@
> (set_attr "mode" "TI,V4SF")])
>
> (define_insn "*vec_dupv2di"
> - [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
> + [(set (match_operand:V2DI 0 "register_operand" "=Y2")
> (vec_duplicate:V2DI
> - (match_operand:DI 1 "register_operand" " 0 ,0")))]
> - "TARGET_SSE"
> - "@
> - punpcklqdq\t%0, %0
> - movlhps\t%0, %0"
> - [(set_attr "type" "sselog1,ssemov")
> - (set_attr "mode" "TI,V4SF")])
> + (match_operand:DI 1 "register_operand" " 0")))]
> + "TARGET_SSE2"
> + "punpcklqdq\t%0, %0"
> + [(set_attr "type" "sselog1")
> + (set_attr "mode" "TI")])
>
> (define_insn "*vec_concatv2si_sse4_1"
> [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
> @@ -4879,26 +4875,25 @@
> (set_attr "mode" "TI,V4SF,V2SF")])
>
> (define_insn "vec_concatv2di"
> - [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
> + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x")
> (vec_concat:V2DI
> - (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
> - (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
> - "!TARGET_64BIT && TARGET_SSE"
> + (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,m")
> + (match_operand:DI 2 "vector_move_operand" " C, C,x,m,0")))]
> + "!TARGET_64BIT && TARGET_SSE2"
> "@
> movq\t{%1, %0|%0, %1}
> movq2dq\t{%1, %0|%0, %1}
> punpcklqdq\t{%2, %0|%0, %2}
> - movlhps\t{%2, %0|%0, %2}
> movhps\t{%2, %0|%0, %2}
> movlps\t{%1, %0|%0, %1}"
> - [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
> - (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
> + [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
> + (set_attr "mode" "TI,TI,TI,V2SF,V2SF")])
>
> (define_insn "*vec_concatv2di_rex64_sse4_1"
> - [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x,x,x")
> + [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x,x")
> (vec_concat:V2DI
> - (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,0,m")
> - (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,x,m,0")))]
> + (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,m")
> + (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m,0")))]
> "TARGET_64BIT && TARGET_SSE4_1"
> "@
> pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
> @@ -4906,35 +4901,33 @@
> movq\t{%1, %0|%0, %1}
> movq2dq\t{%1, %0|%0, %1}
> punpcklqdq\t{%2, %0|%0, %2}
> - movlhps\t{%2, %0|%0, %2}
> movhps\t{%2, %0|%0, %2}
> movlps\t{%1, %0|%0, %1}"
> - [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
> - (set_attr "prefix_extra" "1,*,*,*,*,*,*,*")
> - (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
> + [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
> + (set_attr "prefix_extra" "1,*,*,*,*,*,*")
> + (set_attr "mode" "TI,TI,TI,TI,TI,V2SF,V2SF")])
>
> (define_insn "*vec_concatv2di_rex64_sse"
> - [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
> + [(set (match_operand:V2DI 0 "register_operand" "=x,Yi,!x,x,x,x")
> (vec_concat:V2DI
> - (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
> - (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
> - "TARGET_64BIT && TARGET_SSE"
> + (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y,0,0,m")
> + (match_operand:DI 2 "vector_move_operand" " C,C ,C ,x,m,0")))]
> + "TARGET_64BIT && TARGET_SSE2"
> "@
> movq\t{%1, %0|%0, %1}
> movq\t{%1, %0|%0, %1}
> movq2dq\t{%1, %0|%0, %1}
> punpcklqdq\t{%2, %0|%0, %2}
> - movlhps\t{%2, %0|%0, %2}
> movhps\t{%2, %0|%0, %2}
> movlps\t{%1, %0|%0, %1}"
> - [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
> - (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
> + [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
> + (set_attr "mode" "TI,TI,TI,TI,V2SF,V2SF")])
>
> (define_expand "vec_setv2di"
> [(match_operand:V2DI 0 "register_operand" "")
> (match_operand:DI 1 "register_operand" "")
> (match_operand 2 "const_int_operand" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_set (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -4945,7 +4938,7 @@
> [(match_operand:DI 0 "register_operand" "")
> (match_operand:V2DI 1 "register_operand" "")
> (match_operand 2 "const_int_operand" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_extract (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -4955,7 +4948,7 @@
> (define_expand "vec_initv2di"
> [(match_operand:V2DI 0 "register_operand" "")
> (match_operand 1 "" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_init (false, operands[0], operands[1]);
> DONE;
> @@ -4996,7 +4989,7 @@
> [(match_operand:V8HI 0 "register_operand" "")
> (match_operand:HI 1 "register_operand" "")
> (match_operand 2 "const_int_operand" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_set (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -5007,7 +5000,7 @@
> [(match_operand:HI 0 "register_operand" "")
> (match_operand:V8HI 1 "register_operand" "")
> (match_operand 2 "const_int_operand" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_extract (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -5017,7 +5010,7 @@
> (define_expand "vec_initv8hi"
> [(match_operand:V8HI 0 "register_operand" "")
> (match_operand 1 "" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_init (false, operands[0], operands[1]);
> DONE;
> @@ -5027,7 +5020,7 @@
> [(match_operand:V16QI 0 "register_operand" "")
> (match_operand:QI 1 "register_operand" "")
> (match_operand 2 "const_int_operand" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_set (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -5038,7 +5031,7 @@
> [(match_operand:QI 0 "register_operand" "")
> (match_operand:V16QI 1 "register_operand" "")
> (match_operand 2 "const_int_operand" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_extract (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -5048,7 +5041,7 @@
> (define_expand "vec_initv16qi"
> [(match_operand:V16QI 0 "register_operand" "")
> (match_operand 1 "" "")]
> - "TARGET_SSE"
> + "TARGET_SSE2"
> {
> ix86_expand_vector_init (false, operands[0], operands[1]);
> DONE;
> Index: gcc/config/i386/i386.c
> ===================================================================
> --- gcc/config/i386/i386.c (revision 135610)
> +++ gcc/config/i386/i386.c (working copy)
> @@ -10749,7 +10749,7 @@ ix86_expand_vector_move_misalign (enum m
> return;
> }
>
> - if (TARGET_SSE2 && mode == V2DFmode)
> + if (mode == V2DFmode)
> {
> rtx zero;
>
> @@ -10831,7 +10831,7 @@ ix86_expand_vector_move_misalign (enum m
> return;
> }
>
> - if (TARGET_SSE2 && mode == V2DFmode)
> + if (mode == V2DFmode)
> {
> m = adjust_address (op0, DFmode, 0);
> emit_insn (gen_sse2_storelpd (m, op1));
> @@ -23802,7 +23802,6 @@ ix86_expand_vector_init_duplicate (bool
> wvmode = V4HImode;
> goto widen;
> case V8HImode:
> - if (TARGET_SSE2)
> {
> rtx tmp1, tmp2;
> /* Extend HImode to SImode using a paradoxical SUBREG. */
> @@ -23834,7 +23833,6 @@ ix86_expand_vector_init_duplicate (bool
> wvmode = V4SImode;
> goto widen;
> case V16QImode:
> - if (TARGET_SSE2)
> {
> rtx tmp1, tmp2;
> /* Extend QImode to SImode using a paradoxical SUBREG. */
> @@ -23909,7 +23907,7 @@ ix86_expand_vector_init_one_nonzero (boo
> use_vector_set = TARGET_SSE4_1;
> break;
> case V8HImode:
> - use_vector_set = TARGET_SSE2;
> + use_vector_set = true;
> break;
> case V4HImode:
> use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
> @@ -24352,9 +24350,6 @@ vec_concat:
> goto vec_interleave;
>
> case V8HImode:
> - if (!TARGET_SSE2)
> - break;
> -
> n = 8;
> goto vec_interleave;
>
> @@ -24629,7 +24624,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx
> return;
>
> case V8HImode:
> - use_vec_merge = TARGET_SSE2;
> + use_vec_merge = true;
> break;
> case V4HImode:
> use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
> @@ -24759,7 +24754,7 @@ ix86_expand_vector_extract (bool mmx_ok,
> break;
>
> case V8HImode:
> - use_vec_extr = TARGET_SSE2;
> + use_vec_extr = true;
> break;
> case V4HImode:
> use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
>
More information about the Gcc-patches
mailing list