Re: PATCH: PR target/40470: unable to find a register to spill in class ‘SSE_FIRST_REG’

Richard Guenther richard.guenther@gmail.com
Wed Jun 17 15:01:00 GMT 2009


On Wed, Jun 17, 2009 at 4:01 PM, H.J. Lu<hongjiu.lu@intel.com> wrote:
> Hi,
>
> Some SSE4 instructions have implicit XMM0 operand. This patch helps RA
> deal with them by forcing operand into XMM0 during expand. OK for trunk
> and 4.4?

I don't think it's a particularly good idea to put things in hardregs
that early.


Richard.

> Thanks.
>
>
> H.J.
> ----
> gcc/
>
> 2009-06-17  H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR target/40470
>        * config/i386/sse.md (sse4_1_blendvp<ssemodesuffixf2c>): Renamed
>        to ...
>        (*sse4_1_blendvp<ssemodesuffixf2c>): This.
>        (sse4_1_pblendvb): Renamed to ...
>        (*sse4_1_pblendvb): This.
>        (sse4_1_blendvp<ssemodesuffixf2c>): New.
>        (sse4_1_pblendvb): Likewise.
>        (sse4_2_pcmpestr): Force operand 1 into XMM0.
>        (sse4_2_pcmpistr): Likewise.
>
> gcc/testsuite/
>
> 2009-06-17  H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR target/40470
>        * gcc.dg/torture/pr40470-1.c: New.
>        * gcc.dg/torture/pr40470-2.c: Likewise.
>
> --- gcc/config/i386/sse.md.xmm0 2009-05-27 07:42:18.000000000 -0700
> +++ gcc/config/i386/sse.md      2009-06-17 06:50:15.000000000 -0700
> @@ -8911,7 +8911,28 @@
>    (set_attr "prefix_extra" "1")
>    (set_attr "mode" "<MODE>")])
>
> -(define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
> +(define_expand "sse4_1_blendvp<ssemodesuffixf2c>"
> +  [(match_operand:SSEMODEF2P 0 "register_operand" "")
> +   (match_operand:SSEMODEF2P 1 "register_operand"  "")
> +   (match_operand:SSEMODEF2P 2 "register_operand" "")
> +   (match_operand:SSEMODEF2P 3 "register_operand" "")]
> +  "TARGET_SSE4_1"
> +{
> +  /* Force operand 3 into XMM0.  */
> +  rtx xmm0 = gen_rtx_REG (<MODE>mode, XMM0_REG);
> +  rtx unspec;
> +
> +  emit_insn (gen_rtx_SET (VOIDmode, xmm0, operands[3]));
> +  unspec = gen_rtx_UNSPEC (<MODE>mode,
> +                          gen_rtvec (3, operands[1],
> +                                     operands[2],
> +                                     xmm0),
> +                          UNSPEC_BLENDV);
> +  emit_insn (gen_rtx_SET (VOIDmode, operands[0], unspec));
> +  DONE;
> +})
> +
> +(define_insn "*sse4_1_blendvp<ssemodesuffixf2c>"
>   [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
>        (unspec:SSEMODEF2P
>          [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
> @@ -9023,7 +9044,28 @@
>    (set_attr "prefix" "vex")
>    (set_attr "mode" "TI")])
>
> -(define_insn "sse4_1_pblendvb"
> +(define_expand "sse4_1_pblendvb"
> +  [(match_operand:V16QI 0 "register_operand" "")
> +   (match_operand:V16QI 1 "register_operand"  "")
> +   (match_operand:V16QI 2 "register_operand" "")
> +   (match_operand:V16QI 3 "register_operand" "")]
> +  "TARGET_SSE4_1"
> +{
> +  /* Force operand 3 into XMM0.  */
> +  rtx xmm0 = gen_rtx_REG (V16QImode, XMM0_REG);
> +  rtx unspec;
> +
> +  emit_insn (gen_rtx_SET (VOIDmode, xmm0, operands[3]));
> +  unspec = gen_rtx_UNSPEC (V16QImode,
> +                          gen_rtvec (3, operands[1],
> +                                     operands[2],
> +                                     xmm0),
> +                          UNSPEC_BLENDV);
> +  emit_insn (gen_rtx_SET (VOIDmode, operands[0], unspec));
> +  DONE;
> +})
> +
> +(define_insn "*sse4_1_pblendvb"
>   [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
>        (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand"  "0")
>                       (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
> @@ -9597,9 +9639,14 @@
>                                     operands[3], operands[4],
>                                     operands[5], operands[6]));
>   if (xmm0)
> -    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
> -                                    operands[3], operands[4],
> -                                    operands[5], operands[6]));
> +    {
> +      /* Force operand 1 into XMM0.  */
> +      rtx sse0 = gen_rtx_REG (V16QImode, XMM0_REG);
> +      emit_insn (gen_sse4_2_pcmpestrm (sse0, operands[2],
> +                                      operands[3], operands[4],
> +                                      operands[5], operands[6]));
> +      emit_insn (gen_rtx_SET (VOIDmode, operands[1], sse0));
> +    }
>   if (flags && !(ecx || xmm0))
>     emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
>                                           operands[2], operands[3],
> @@ -9722,8 +9769,13 @@
>     emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
>                                     operands[3], operands[4]));
>   if (xmm0)
> -    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
> -                                    operands[3], operands[4]));
> +    {
> +      /* Force operand 1 into XMM0.  */
> +      rtx sse0 = gen_rtx_REG (V16QImode, XMM0_REG);
> +      emit_insn (gen_sse4_2_pcmpistrm (sse0, operands[2],
> +                                      operands[3], operands[4]));
> +      emit_insn (gen_rtx_SET (VOIDmode, operands[1], sse0));
> +    }
>   if (flags && !(ecx || xmm0))
>     emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
>                                           operands[2], operands[3],
> --- gcc/testsuite/gcc.dg/torture/pr40470-1.c.xmm0       2009-06-17 06:45:34.000000000 -0700
> +++ gcc/testsuite/gcc.dg/torture/pr40470-1.c    2009-06-17 06:45:06.000000000 -0700
> @@ -0,0 +1,22 @@
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-msse4" } */
> +#include <nmmintrin.h>
> +__m128i load (char *);
> +char *
> +foo (char *p1, char *p2,
> +     int bmsk, __m128i mask1, __m128i mask2)
> +{
> +  int len = 0;
> +  __m128i frag1, frag2;
> +  int  cmp_s;
> +  if( !p2[0]) return p1;
> +  if( !p1[0] ) return NULL;
> +  frag2 = load (p2);
> +  frag1 = load (p1);
> +  frag2 = _mm_blendv_epi8(frag2, mask2, mask1);
> +  frag1 = _mm_blendv_epi8(frag1, mask2, mask1);
> +  cmp_s = _mm_cmpistrs(frag2, frag1, 0x0c);
> +  if( cmp_s )
> +    __asm("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk) );
> +  return p1 + len;
> +}
> --- gcc/testsuite/gcc.dg/torture/pr40470-2.c.xmm0       2009-06-17 06:45:39.000000000 -0700
> +++ gcc/testsuite/gcc.dg/torture/pr40470-2.c    2009-06-17 06:49:46.000000000 -0700
> @@ -0,0 +1,27 @@
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-msse4" } */
> +#include <nmmintrin.h>
> +__m128i load (char *);
> +char *
> +foo (const unsigned char *s1, const unsigned char *s2,
> +     int bmsk, __m128i frag2)
> +{
> +  int len = 0;
> +  char *p1 = (char  *) s1;
> +  char *p2 = (char  *) s2;
> +  __m128i frag1, fruc, mask;
> +  int cmp_c, cmp_s;
> +  if( !p2[0]) return (char *) s1;
> +  if( !p1[0] ) return NULL;
> +  if( p2[1]) frag2 = load (p2);
> +  frag1 = load (p1);
> +  fruc = _mm_loadu_si128 ((__m128i *) s1);
> +  mask = _mm_cmpistrm(fruc, frag2, 0x44);
> +  frag2 = _mm_blendv_epi8(frag2, mask, mask);
> +  frag1 = _mm_blendv_epi8(frag1, mask, mask);
> +  cmp_c = _mm_cmpistrc(frag2, frag1, 0x0c);
> +  cmp_s = _mm_cmpistrs(frag2, frag1, 0x0c);
> +  if( cmp_s  & cmp_c  )
> +    __asm("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk) );
> +  return p2 + len;
> +}
>



More information about the Gcc-patches mailing list