Re: PATCH: PR target/40470: unable to find a register to spill in class ‘SSE_FIRST_REG’
Richard Guenther
richard.guenther@gmail.com
Wed Jun 17 15:01:00 GMT 2009
On Wed, Jun 17, 2009 at 4:01 PM, H.J. Lu<hongjiu.lu@intel.com> wrote:
> Hi,
>
> Some SSE4 instructions have implicit XMM0 operand. This patch helps RA
> deal with them by forcing operand into XMM0 during expand. OK for trunk
> and 4.4?
I don't think it's a particularly good idea to put things in hardregs
that early.
Richard.
> Thanks.
>
>
> H.J.
> ----
> gcc/
>
> 2009-06-17 H.J. Lu <hongjiu.lu@intel.com>
>
> PR target/40470
> * config/i386/sse.md (sse4_1_blendvp<ssemodesuffixf2c>): Renamed
> to ...
> (*sse4_1_blendvp<ssemodesuffixf2c>): This.
> (sse4_1_pblendvb): Renamed to ...
> (*sse4_1_pblendvb): This.
> (sse4_1_blendvp<ssemodesuffixf2c>): New.
> (sse4_1_pblendvb): Likewise.
> (sse4_2_pcmpestr): Force operand 1 into XMM0.
> (sse4_2_pcmpistr): Likewise.
>
> gcc/testsuite/
>
> 2009-06-17 H.J. Lu <hongjiu.lu@intel.com>
>
> PR target/40470
> * gcc.dg/torture/pr40470-1.c: New.
> * gcc.dg/torture/pr40470-2.c: Likewise.
>
> --- gcc/config/i386/sse.md.xmm0 2009-05-27 07:42:18.000000000 -0700
> +++ gcc/config/i386/sse.md 2009-06-17 06:50:15.000000000 -0700
> @@ -8911,7 +8911,28 @@
> (set_attr "prefix_extra" "1")
> (set_attr "mode" "<MODE>")])
>
> -(define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
> +(define_expand "sse4_1_blendvp<ssemodesuffixf2c>"
> + [(match_operand:SSEMODEF2P 0 "register_operand" "")
> + (match_operand:SSEMODEF2P 1 "register_operand" "")
> + (match_operand:SSEMODEF2P 2 "register_operand" "")
> + (match_operand:SSEMODEF2P 3 "register_operand" "")]
> + "TARGET_SSE4_1"
> +{
> + /* Force operand 3 into XMM0. */
> + rtx xmm0 = gen_rtx_REG (<MODE>mode, XMM0_REG);
> + rtx unspec;
> +
> + emit_insn (gen_rtx_SET (VOIDmode, xmm0, operands[3]));
> + unspec = gen_rtx_UNSPEC (<MODE>mode,
> + gen_rtvec (3, operands[1],
> + operands[2],
> + xmm0),
> + UNSPEC_BLENDV);
> + emit_insn (gen_rtx_SET (VOIDmode, operands[0], unspec));
> + DONE;
> +})
> +
> +(define_insn "*sse4_1_blendvp<ssemodesuffixf2c>"
> [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
> (unspec:SSEMODEF2P
> [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
> @@ -9023,7 +9044,28 @@
> (set_attr "prefix" "vex")
> (set_attr "mode" "TI")])
>
> -(define_insn "sse4_1_pblendvb"
> +(define_expand "sse4_1_pblendvb"
> + [(match_operand:V16QI 0 "register_operand" "")
> + (match_operand:V16QI 1 "register_operand" "")
> + (match_operand:V16QI 2 "register_operand" "")
> + (match_operand:V16QI 3 "register_operand" "")]
> + "TARGET_SSE4_1"
> +{
> + /* Force operand 3 into XMM0. */
> + rtx xmm0 = gen_rtx_REG (V16QImode, XMM0_REG);
> + rtx unspec;
> +
> + emit_insn (gen_rtx_SET (VOIDmode, xmm0, operands[3]));
> + unspec = gen_rtx_UNSPEC (V16QImode,
> + gen_rtvec (3, operands[1],
> + operands[2],
> + xmm0),
> + UNSPEC_BLENDV);
> + emit_insn (gen_rtx_SET (VOIDmode, operands[0], unspec));
> + DONE;
> +})
> +
> +(define_insn "*sse4_1_pblendvb"
> [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
> (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
> (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
> @@ -9597,9 +9639,14 @@
> operands[3], operands[4],
> operands[5], operands[6]));
> if (xmm0)
> - emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
> - operands[3], operands[4],
> - operands[5], operands[6]));
> + {
> + /* Force operand 1 into XMM0. */
> + rtx sse0 = gen_rtx_REG (V16QImode, XMM0_REG);
> + emit_insn (gen_sse4_2_pcmpestrm (sse0, operands[2],
> + operands[3], operands[4],
> + operands[5], operands[6]));
> + emit_insn (gen_rtx_SET (VOIDmode, operands[1], sse0));
> + }
> if (flags && !(ecx || xmm0))
> emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
> operands[2], operands[3],
> @@ -9722,8 +9769,13 @@
> emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
> operands[3], operands[4]));
> if (xmm0)
> - emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
> - operands[3], operands[4]));
> + {
> + /* Force operand 1 into XMM0. */
> + rtx sse0 = gen_rtx_REG (V16QImode, XMM0_REG);
> + emit_insn (gen_sse4_2_pcmpistrm (sse0, operands[2],
> + operands[3], operands[4]));
> + emit_insn (gen_rtx_SET (VOIDmode, operands[1], sse0));
> + }
> if (flags && !(ecx || xmm0))
> emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
> operands[2], operands[3],
> --- gcc/testsuite/gcc.dg/torture/pr40470-1.c.xmm0 2009-06-17 06:45:34.000000000 -0700
> +++ gcc/testsuite/gcc.dg/torture/pr40470-1.c 2009-06-17 06:45:06.000000000 -0700
> @@ -0,0 +1,22 @@
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-msse4" } */
> +#include <nmmintrin.h>
> +__m128i load (char *);
> +char *
> +foo (char *p1, char *p2,
> + int bmsk, __m128i mask1, __m128i mask2)
> +{
> + int len = 0;
> + __m128i frag1, frag2;
> + int cmp_s;
> + if( !p2[0]) return p1;
> + if( !p1[0] ) return NULL;
> + frag2 = load (p2);
> + frag1 = load (p1);
> + frag2 = _mm_blendv_epi8(frag2, mask2, mask1);
> + frag1 = _mm_blendv_epi8(frag1, mask2, mask1);
> + cmp_s = _mm_cmpistrs(frag2, frag1, 0x0c);
> + if( cmp_s )
> + __asm("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk) );
> + return p1 + len;
> +}
> --- gcc/testsuite/gcc.dg/torture/pr40470-2.c.xmm0 2009-06-17 06:45:39.000000000 -0700
> +++ gcc/testsuite/gcc.dg/torture/pr40470-2.c 2009-06-17 06:49:46.000000000 -0700
> @@ -0,0 +1,27 @@
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-msse4" } */
> +#include <nmmintrin.h>
> +__m128i load (char *);
> +char *
> +foo (const unsigned char *s1, const unsigned char *s2,
> + int bmsk, __m128i frag2)
> +{
> + int len = 0;
> + char *p1 = (char *) s1;
> + char *p2 = (char *) s2;
> + __m128i frag1, fruc, mask;
> + int cmp_c, cmp_s;
> + if( !p2[0]) return (char *) s1;
> + if( !p1[0] ) return NULL;
> + if( p2[1]) frag2 = load (p2);
> + frag1 = load (p1);
> + fruc = _mm_loadu_si128 ((__m128i *) s1);
> + mask = _mm_cmpistrm(fruc, frag2, 0x44);
> + frag2 = _mm_blendv_epi8(frag2, mask, mask);
> + frag1 = _mm_blendv_epi8(frag1, mask, mask);
> + cmp_c = _mm_cmpistrc(frag2, frag1, 0x0c);
> + cmp_s = _mm_cmpistrs(frag2, frag1, 0x0c);
> + if( cmp_s & cmp_c )
> + __asm("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk) );
> + return p2 + len;
> +}
>
More information about the Gcc-patches
mailing list