This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATCH: PR target/40470: unable to find a registe r to spill in class ‘SSE_FIRST_REG’


On Wed, Jun 17, 2009 at 4:01 PM, H.J. Lu<hongjiu.lu@intel.com> wrote:
> Hi,
>
> Some SSE4 instructions have implicit XMM0 operand. This patch helps RA
> deal with them by forcing operand into XMM0 during expand. OK for trunk
> and 4.4?

I don't think it's a particularly good idea to put things in hardregs
that early.


Richard.

> Thanks.
>
>
> H.J.
> ----
> gcc/
>
> 2009-06-17 ?H.J. Lu ?<hongjiu.lu@intel.com>
>
> ? ? ? ?PR target/40470
> ? ? ? ?* config/i386/sse.md (sse4_1_blendvp<ssemodesuffixf2c>): Renamed
> ? ? ? ?to ...
> ? ? ? ?(*sse4_1_blendvp<ssemodesuffixf2c>): This.
> ? ? ? ?(sse4_1_pblendvb): Renamed to ...
> ? ? ? ?(*sse4_1_pblendvb): This.
> ? ? ? ?(sse4_1_blendvp<ssemodesuffixf2c>): New.
> ? ? ? ?(sse4_1_pblendvb): Likewise.
> ? ? ? ?(sse4_2_pcmpestr): Force operand 1 into XMM0.
> ? ? ? ?(sse4_2_pcmpistr): Likewise.
>
> gcc/testsuite/
>
> 2009-06-17 ?H.J. Lu ?<hongjiu.lu@intel.com>
>
> ? ? ? ?PR target/40470
> ? ? ? ?* gcc.dg/torture/pr40470-1.c: New.
> ? ? ? ?* gcc.dg/torture/pr40470-2.c: Likewise.
>
> --- gcc/config/i386/sse.md.xmm0 2009-05-27 07:42:18.000000000 -0700
> +++ gcc/config/i386/sse.md ? ? ?2009-06-17 06:50:15.000000000 -0700
> @@ -8911,7 +8911,28 @@
> ? ?(set_attr "prefix_extra" "1")
> ? ?(set_attr "mode" "<MODE>")])
>
> -(define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
> +(define_expand "sse4_1_blendvp<ssemodesuffixf2c>"
> + ?[(match_operand:SSEMODEF2P 0 "register_operand" "")
> + ? (match_operand:SSEMODEF2P 1 "register_operand" ?"")
> + ? (match_operand:SSEMODEF2P 2 "register_operand" "")
> + ? (match_operand:SSEMODEF2P 3 "register_operand" "")]
> + ?"TARGET_SSE4_1"
> +{
> + ?/* Force operand 3 into XMM0. ?*/
> + ?rtx xmm0 = gen_rtx_REG (<MODE>mode, XMM0_REG);
> + ?rtx unspec;
> +
> + ?emit_insn (gen_rtx_SET (VOIDmode, xmm0, operands[3]));
> + ?unspec = gen_rtx_UNSPEC (<MODE>mode,
> + ? ? ? ? ? ? ? ? ? ? ? ? ?gen_rtvec (3, operands[1],
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2],
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? xmm0),
> + ? ? ? ? ? ? ? ? ? ? ? ? ?UNSPEC_BLENDV);
> + ?emit_insn (gen_rtx_SET (VOIDmode, operands[0], unspec));
> + ?DONE;
> +})
> +
> +(define_insn "*sse4_1_blendvp<ssemodesuffixf2c>"
> ? [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
> ? ? ? ?(unspec:SSEMODEF2P
> ? ? ? ? ?[(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
> @@ -9023,7 +9044,28 @@
> ? ?(set_attr "prefix" "vex")
> ? ?(set_attr "mode" "TI")])
>
> -(define_insn "sse4_1_pblendvb"
> +(define_expand "sse4_1_pblendvb"
> + ?[(match_operand:V16QI 0 "register_operand" "")
> + ? (match_operand:V16QI 1 "register_operand" ?"")
> + ? (match_operand:V16QI 2 "register_operand" "")
> + ? (match_operand:V16QI 3 "register_operand" "")]
> + ?"TARGET_SSE4_1"
> +{
> + ?/* Force operand 3 into XMM0. ?*/
> + ?rtx xmm0 = gen_rtx_REG (V16QImode, XMM0_REG);
> + ?rtx unspec;
> +
> + ?emit_insn (gen_rtx_SET (VOIDmode, xmm0, operands[3]));
> + ?unspec = gen_rtx_UNSPEC (V16QImode,
> + ? ? ? ? ? ? ? ? ? ? ? ? ?gen_rtvec (3, operands[1],
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2],
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? xmm0),
> + ? ? ? ? ? ? ? ? ? ? ? ? ?UNSPEC_BLENDV);
> + ?emit_insn (gen_rtx_SET (VOIDmode, operands[0], unspec));
> + ?DONE;
> +})
> +
> +(define_insn "*sse4_1_pblendvb"
> ? [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
> ? ? ? ?(unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" ?"0")
> ? ? ? ? ? ? ? ? ? ? ? (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
> @@ -9597,9 +9639,14 @@
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[3], operands[4],
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[5], operands[6]));
> ? if (xmm0)
> - ? ?emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?operands[3], operands[4],
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?operands[5], operands[6]));
> + ? ?{
> + ? ? ?/* Force operand 1 into XMM0. ?*/
> + ? ? ?rtx sse0 = gen_rtx_REG (V16QImode, XMM0_REG);
> + ? ? ?emit_insn (gen_sse4_2_pcmpestrm (sse0, operands[2],
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?operands[3], operands[4],
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?operands[5], operands[6]));
> + ? ? ?emit_insn (gen_rtx_SET (VOIDmode, operands[1], sse0));
> + ? ?}
> ? if (flags && !(ecx || xmm0))
> ? ? emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3],
> @@ -9722,8 +9769,13 @@
> ? ? emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[3], operands[4]));
> ? if (xmm0)
> - ? ?emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?operands[3], operands[4]));
> + ? ?{
> + ? ? ?/* Force operand 1 into XMM0. ?*/
> + ? ? ?rtx sse0 = gen_rtx_REG (V16QImode, XMM0_REG);
> + ? ? ?emit_insn (gen_sse4_2_pcmpistrm (sse0, operands[2],
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?operands[3], operands[4]));
> + ? ? ?emit_insn (gen_rtx_SET (VOIDmode, operands[1], sse0));
> + ? ?}
> ? if (flags && !(ecx || xmm0))
> ? ? emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3],
> --- gcc/testsuite/gcc.dg/torture/pr40470-1.c.xmm0 ? ? ? 2009-06-17 06:45:34.000000000 -0700
> +++ gcc/testsuite/gcc.dg/torture/pr40470-1.c ? ?2009-06-17 06:45:06.000000000 -0700
> @@ -0,0 +1,22 @@
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-msse4" } */
> +#include <nmmintrin.h>
> +__m128i load (char *);
> +char *
> +foo (char *p1, char *p2,
> + ? ? int bmsk, __m128i mask1, __m128i mask2)
> +{
> + ?int len = 0;
> + ?__m128i frag1, frag2;
> + ?int ?cmp_s;
> + ?if( !p2[0]) return p1;
> + ?if( !p1[0] ) return NULL;
> + ?frag2 = load (p2);
> + ?frag1 = load (p1);
> + ?frag2 = _mm_blendv_epi8(frag2, mask2, mask1);
> + ?frag1 = _mm_blendv_epi8(frag1, mask2, mask1);
> + ?cmp_s = _mm_cmpistrs(frag2, frag1, 0x0c);
> + ?if( cmp_s )
> + ? ?__asm("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk) );
> + ?return p1 + len;
> +}
> --- gcc/testsuite/gcc.dg/torture/pr40470-2.c.xmm0 ? ? ? 2009-06-17 06:45:39.000000000 -0700
> +++ gcc/testsuite/gcc.dg/torture/pr40470-2.c ? ?2009-06-17 06:49:46.000000000 -0700
> @@ -0,0 +1,27 @@
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-msse4" } */
> +#include <nmmintrin.h>
> +__m128i load (char *);
> +char *
> +foo (const unsigned char *s1, const unsigned char *s2,
> + ? ? int bmsk, __m128i frag2)
> +{
> + ?int len = 0;
> + ?char *p1 = (char ?*) s1;
> + ?char *p2 = (char ?*) s2;
> + ?__m128i frag1, fruc, mask;
> + ?int cmp_c, cmp_s;
> + ?if( !p2[0]) return (char *) s1;
> + ?if( !p1[0] ) return NULL;
> + ?if( p2[1]) frag2 = load (p2);
> + ?frag1 = load (p1);
> + ?fruc = _mm_loadu_si128 ((__m128i *) s1);
> + ?mask = _mm_cmpistrm(fruc, frag2, 0x44);
> + ?frag2 = _mm_blendv_epi8(frag2, mask, mask);
> + ?frag1 = _mm_blendv_epi8(frag1, mask, mask);
> + ?cmp_c = _mm_cmpistrc(frag2, frag1, 0x0c);
> + ?cmp_s = _mm_cmpistrs(frag2, frag1, 0x0c);
> + ?if( cmp_s ?& cmp_c ?)
> + ? ?__asm("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk) );
> + ?return p2 + len;
> +}
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]