This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH] Implement absv2di2 and absv4di2 expanders for pre-avx512vl (PR target/85572)

From: Uros Bizjak <ubizjak at gmail dot com>
To: Jakub Jelinek <jakub at redhat dot com>
Cc: Kirill Yukhin <kirill dot yukhin at gmail dot com>, "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
Date: Tue, 8 May 2018 11:11:26 +0200
Subject: Re: [PATCH] Implement absv2di2 and absv4di2 expanders for pre-avx512vl (PR target/85572)
References: <20180430191929.GH8577@tucnak>
On Mon, Apr 30, 2018 at 9:19 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> Before avx512vl we don't have a single instruction to do V2DImode and
> V4DImode abs, but that isn't much different from say V4SImode before SSE3
> where we also just emit a short sequence that is better than elementwise
> expansion.  Bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?
>
> 2018-04-30  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/85572
>         * config/i386/i386.c (ix86_expand_sse2_abs): Handle E_V2DImode and
>         E_V4DImode.
>         * config/i386/sse.md (abs<mode>2): Use VI_AVX2 iterator instead of
>         VI1248_AVX512VL_AVX512BW.  Handle V2DImode and V4DImode if not
>         TARGET_AVX512VL using ix86_expand_sse2_abs.  Formatting fixes.
>
>         * g++.dg/other/sse2-pr85572-1.C: New test.
>         * g++.dg/other/sse2-pr85572-2.C: New test.
>         * g++.dg/other/sse4-pr85572-1.C: New test.
>         * g++.dg/other/avx2-pr85572-1.C: New test.

LGTM.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2018-04-25 15:09:29.895453703 +0200
> +++ gcc/config/i386/i386.c      2018-04-30 18:31:56.027101932 +0200
> @@ -49806,39 +49806,74 @@ ix86_expand_sse2_abs (rtx target, rtx in
>
>    switch (mode)
>      {
> +    case E_V2DImode:
> +    case E_V4DImode:
> +      /* For 64-bit signed integer X, with SSE4.2 use
> +        pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X.
> +        Otherwise handle it similarly to V4SImode, except use 64 as W instead of
> +        32 and use logical instead of arithmetic right shift (which is
> +        unimplemented) and subtract.  */
> +      if (TARGET_SSE4_2)
> +       {
> +         tmp0 = gen_reg_rtx (mode);
> +         tmp1 = gen_reg_rtx (mode);
> +         emit_move_insn (tmp1, CONST0_RTX (mode));
> +         if (mode == E_V2DImode)
> +           emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input));
> +         else
> +           emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input));
> +
> +         tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
> +                                     NULL, 0, OPTAB_DIRECT);
> +         x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
> +                                  target, 0, OPTAB_DIRECT);
> +         break;
> +       }
> +
> +      tmp0 = expand_simple_binop (mode, LSHIFTRT, input,
> +                                 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
> +                                 NULL, 0, OPTAB_DIRECT);
> +      tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false);
> +
> +      tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
> +                                 NULL, 0, OPTAB_DIRECT);
> +      x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
> +                              target, 0, OPTAB_DIRECT);
> +      break;
> +
> +    case E_V4SImode:
>        /* For 32-bit signed integer X, the best way to calculate the absolute
>          value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
> -      case E_V4SImode:
> -       tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
> -                                   GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
> -                                   NULL, 0, OPTAB_DIRECT);
> -       tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
> -                                   NULL, 0, OPTAB_DIRECT);
> -       x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
> -                                target, 0, OPTAB_DIRECT);
> -       break;
> +      tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
> +                                 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
> +                                 NULL, 0, OPTAB_DIRECT);
> +      tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
> +                                 NULL, 0, OPTAB_DIRECT);
> +      x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
> +                              target, 0, OPTAB_DIRECT);
> +      break;
>
> +    case E_V8HImode:
>        /* For 16-bit signed integer X, the best way to calculate the absolute
>          value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
> -      case E_V8HImode:
> -       tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
> +      tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
>
> -       x = expand_simple_binop (mode, SMAX, tmp0, input,
> -                                target, 0, OPTAB_DIRECT);
> -       break;
> +      x = expand_simple_binop (mode, SMAX, tmp0, input,
> +                              target, 0, OPTAB_DIRECT);
> +      break;
>
> +    case E_V16QImode:
>        /* For 8-bit signed integer X, the best way to calculate the absolute
>          value of X is min ((unsigned char) X, (unsigned char) (-X)),
>          as SSE2 provides the PMINUB insn.  */
> -      case E_V16QImode:
> -       tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
> +      tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
>
> -       x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
> -                                target, 0, OPTAB_DIRECT);
> -       break;
> +      x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
> +                              target, 0, OPTAB_DIRECT);
> +      break;
>
> -      default:
> -       gcc_unreachable ();
> +    default:
> +      gcc_unreachable ();
>      }
>
>    if (x != target)
> --- gcc/config/i386/sse.md.jj   2018-04-26 22:03:29.000000000 +0200
> +++ gcc/config/i386/sse.md      2018-04-30 19:09:18.740838508 +0200
> @@ -15174,12 +15174,14 @@ (define_insn "abs<mode>2_mask"
>     (set_attr "mode" "<sseinsnmode>")])
>
>  (define_expand "abs<mode>2"
> -  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
> -       (abs:VI1248_AVX512VL_AVX512BW
> -         (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
> +  [(set (match_operand:VI_AVX2 0 "register_operand")
> +       (abs:VI_AVX2
> +         (match_operand:VI_AVX2 1 "vector_operand")))]
>    "TARGET_SSE2"
>  {
> -  if (!TARGET_SSSE3)
> +  if (!TARGET_SSSE3
> +      || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
> +         && !TARGET_AVX512VL))
>      {
>        ix86_expand_sse2_abs (operands[0], operands[1]);
>        DONE;
> --- gcc/testsuite/g++.dg/other/sse2-pr85572-1.C.jj      2018-04-30 18:40:29.566619137 +0200
> +++ gcc/testsuite/g++.dg/other/sse2-pr85572-1.C 2018-04-30 19:04:04.111419300 +0200
> @@ -0,0 +1,14 @@
> +// PR target/85572
> +// { dg-do compile { target i?86-*-* x86_64-*-* } }
> +// { dg-options "-O2 -msse2 -mno-sse3" }
> +// { dg-final { scan-assembler-times {\mpxor\M} 2 } }
> +// { dg-final { scan-assembler-times {\mpsubq\M} 2 } }
> +// { dg-final { scan-assembler-times {\mpsrlq\M} 1 } }
> +
> +typedef long long V __attribute__((vector_size (16)));
> +
> +V
> +foo (V x)
> +{
> +  return x < 0 ? -x : x;
> +}
> --- gcc/testsuite/g++.dg/other/sse2-pr85572-2.C.jj      2018-04-30 18:43:59.057879751 +0200
> +++ gcc/testsuite/g++.dg/other/sse2-pr85572-2.C 2018-04-30 19:04:55.717493565 +0200
> @@ -0,0 +1,45 @@
> +// PR target/85572
> +// { dg-do run { target i?86-*-* x86_64-*-* } }
> +// { dg-options "-O2 -msse2" }
> +// { dg-require-effective-target sse2_runtime }
> +
> +typedef long long V __attribute__((vector_size (16)));
> +typedef long long W __attribute__((vector_size (32)));
> +
> +__attribute__((noipa)) V
> +foo (V x)
> +{
> +  return x < 0 ? -x : x;
> +}
> +
> +__attribute__((noipa)) void
> +bar (W *x, W *y)
> +{
> +  *y = *x < 0 ? -*x : *x;
> +}
> +
> +int
> +main ()
> +{
> +  V a = { 11LL, -15LL };
> +  V b = foo (a);
> +  if (b[0] != 11LL || b[1] != 15LL)
> +    __builtin_abort ();
> +  V c = { -123456789123456LL, 654321654321654LL };
> +  V d = foo (c);
> +  if (d[0] != 123456789123456LL || d[1] != 654321654321654LL)
> +    __builtin_abort ();
> +  V e = { 0, 1 };
> +  V f = foo (e);
> +  if (f[0] != 0 || f[1] != 1)
> +    __builtin_abort ();
> +  W g = { 17LL, -32LL, -123456789123456LL, 654321654321654LL }, h;
> +  bar (&g, &h);
> +  if (h[0] != 17LL || h[1] != 32LL
> +      || h[2] != 123456789123456LL || h[3] != 654321654321654LL)
> +    __builtin_abort ();
> +  W i = { 0, 1, -1, 0 }, j;
> +  bar (&i, &j);
> +  if (j[0] != 0 || j[1] != 1 || j[2] != 1 || j[3] != 0)
> +    __builtin_abort ();
> +}
> --- gcc/testsuite/g++.dg/other/sse4-pr85572-1.C.jj      2018-04-30 18:50:56.569420770 +0200
> +++ gcc/testsuite/g++.dg/other/sse4-pr85572-1.C 2018-04-30 19:03:02.687330914 +0200
> @@ -0,0 +1,14 @@
> +// PR target/85572
> +// { dg-do compile { target i?86-*-* x86_64-*-* } }
> +// { dg-options "-O2 -msse4 -mno-avx" }
> +// { dg-final { scan-assembler-times {\mpxor\M} 2 } }
> +// { dg-final { scan-assembler-times {\mpcmpgtq\M} 1 } }
> +// { dg-final { scan-assembler-times {\mpsubq\M} 1 } }
> +
> +typedef long long V __attribute__((vector_size (16)));
> +
> +V
> +foo (V x)
> +{
> +  return x < 0 ? -x : x;
> +}
> --- gcc/testsuite/g++.dg/other/avx2-pr85572-1.C.jj      2018-04-30 18:53:14.020598885 +0200
> +++ gcc/testsuite/g++.dg/other/avx2-pr85572-1.C 2018-04-30 19:02:47.316308798 +0200
> @@ -0,0 +1,21 @@
> +// PR target/85572
> +// { dg-do compile { target i?86-*-* x86_64-*-* } }
> +// { dg-options "-O2 -mavx2 -mno-avx512f" }
> +// { dg-final { scan-assembler-times {\mvpxor\M} 4 } }
> +// { dg-final { scan-assembler-times {\mvpcmpgtq\M} 2 } }
> +// { dg-final { scan-assembler-times {\mvpsubq\M} 2 } }
> +
> +typedef long long V __attribute__((vector_size (16)));
> +typedef long long W __attribute__((vector_size (32)));
> +
> +V
> +foo (V x)
> +{
> +  return x < 0 ? -x : x;
> +}
> +
> +W
> +bar (W x)
> +{
> +  return x < 0 ? -x : x;
> +}
>
>         Jakub
Follow-Ups:
- Re: [PATCH] Implement absv2di2 and absv4di2 expanders for pre-avx512vl (PR target/85572)
  - From: Uros Bizjak
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]