This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, rs6000] (v2) GIMPLE folding for vector compares


On Tue, Nov 14, 2017 at 11:11 PM, Will Schmidt
<will_schmidt@vnet.ibm.com> wrote:
>
> Hi,
>   Add support for gimple folding of vec_cmp_{eq,ge,gt,le,ne}
> for the integer data types.
>
> As part of this change, several define_insn stanzas have been added/updated
> in vsx.md that specify the "ne: -> not: + eq: " combinations to allow for the generation
> of the desired vcmpne[bhw] instructions, where we otherwise
> would have generated a vcmpeq + vnor combination.  The defines
> also obsoleted the need for the UNSPEC versions of the same, so this ends up
> being just an update to those existing defines.
>
> Several entries have been added to the switch statement in
> builtin_function_type to identify the builtins having unsigned arguments.
>
> A handful of existing tests required updates to their specified optimization
> levels to continue to generate the desired code.  builtins-3-p9.c in particular
> has been updated to reflect improved code gen with the higher specified
> optimization level.
> Testcase coverage is otherwise handled by the already-in-tree
> gcc.target/powerpc/fold-vec-cmp-*.c tests.
>
> Per feedback from the prior version, v2 changes also include:
>   * Reworked the actual folding to use a VEC_COND_EXPR.  For cleanliness, I
>   moved this to a new fold_build_vec_cmp() helper function, which itself
>   is based on build_vec_cmp() as found in typeck.c.
>   * Added an additional fold_compare_helper() function to further factor out
>   the steps that are common to all of the vector compare operations.
>
> Testing is currently underway on P6 and newer. OK for trunk?

The folding part looks good to me.

Richard.

> Thanks,
> -Will
>
>
> 2017-11-14  Will Schmidt  <will_schmidt@vnet.ibm.com>
> [gcc]
>         * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
>         folding of vector compares.
>         (fold_build_vec_cmp): New helper function.
>         (fold_compare_helper): New helper function.
>         (builtin_function_type): Add compare builtins to the list of functions
>         having unsigned arguments.
>         * config/rs6000/vsx.md (vcmpneb, vcmpneh, vcmpnew): Update to specify
>         the not+eq combination.
>
>     [testsuite]
>         * gcc.target/powerpc/builtins-3-p9.c: Add -O1, update
>         expected codegen checks.
>         * gcc.target/powerpc/vec-cmp-sel.c: Mark vars as volatile.
>         * gcc.target/powerpc/vsu/vec-cmpne-0.c: Add -O1.
>         * gcc.target/powerpc/vsu/vec-cmpne-1.c: Add -O1.
>         * gcc.target/powerpc/vsu/vec-cmpne-2.c: Add -O1.
>         * gcc.target/powerpc/vsu/vec-cmpne-3.c: Add -O1.
>         * gcc.target/powerpc/vsu/vec-cmpne-4.c: Add -O1.
>         * gcc.target/powerpc/vsu/vec-cmpne-5.c: Add -O1.
>         * gcc.target/powerpc/vsu/vec-cmpne-6.c: Add -O1.
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 2c80a2f..0317324 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -16206,10 +16206,40 @@ rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
>      default:
>        return false;
>      }
>  }
>
> +/*  Helper function to handle the gimple folding of a vector compare
> +    operation.  This sets up true/false vectors, and uses the
> +    VEC_COND_EXPR operation.
> +    'code' indicates which comparison is to be made. (EQ, GT, ...).
> +    'type' indicates the type of the result.  */
> +static tree
> +fold_build_vec_cmp (tree_code code, tree type,
> +                   tree arg0, tree arg1)
> +{
> +  tree cmp_type = build_same_sized_truth_vector_type (type);
> +  tree zero_vec = build_zero_cst (type);
> +  tree minus_one_vec = build_minus_one_cst (type);
> +  tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
> +  return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
> +}
> +
> +/* Helper function to handle the in-between steps for the
> +   vector compare built-ins.  */
> +static void
> +fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
> +{
> +  tree arg0 = gimple_call_arg (stmt, 0);
> +  tree arg1 = gimple_call_arg (stmt, 1);
> +  tree lhs = gimple_call_lhs (stmt);
> +  gimple *g = gimple_build_assign (lhs,
> +               fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1));
> +  gimple_set_location (g, gimple_location (stmt));
> +  gsi_replace (gsi, g, true);
> +}
> +
>  /* Fold a machine-dependent built-in in GIMPLE.  (For folding into
>     a constant, use rs6000_fold_builtin.)  */
>
>  bool
>  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> @@ -16701,10 +16731,67 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>         gimple_set_location (g, gimple_location (stmt));
>         gsi_replace (gsi, g, true);
>         return true;
>        }
>
> +    /* Vector compares; EQ, NE, GE, GT, LE.  */
> +    case ALTIVEC_BUILTIN_VCMPEQUB:
> +    case ALTIVEC_BUILTIN_VCMPEQUH:
> +    case ALTIVEC_BUILTIN_VCMPEQUW:
> +    case P8V_BUILTIN_VCMPEQUD:
> +      {
> +       fold_compare_helper (gsi, EQ_EXPR, stmt);
> +       return true;
> +      }
> +
> +    case P9V_BUILTIN_CMPNEB:
> +    case P9V_BUILTIN_CMPNEH:
> +    case P9V_BUILTIN_CMPNEW:
> +      {
> +       fold_compare_helper (gsi, NE_EXPR, stmt);
> +       return true;
> +      }
> +
> +    case VSX_BUILTIN_CMPGE_16QI:
> +    case VSX_BUILTIN_CMPGE_U16QI:
> +    case VSX_BUILTIN_CMPGE_8HI:
> +    case VSX_BUILTIN_CMPGE_U8HI:
> +    case VSX_BUILTIN_CMPGE_4SI:
> +    case VSX_BUILTIN_CMPGE_U4SI:
> +    case VSX_BUILTIN_CMPGE_2DI:
> +    case VSX_BUILTIN_CMPGE_U2DI:
> +      {
> +       fold_compare_helper (gsi, GE_EXPR, stmt);
> +       return true;
> +      }
> +
> +    case ALTIVEC_BUILTIN_VCMPGTSB:
> +    case ALTIVEC_BUILTIN_VCMPGTUB:
> +    case ALTIVEC_BUILTIN_VCMPGTSH:
> +    case ALTIVEC_BUILTIN_VCMPGTUH:
> +    case ALTIVEC_BUILTIN_VCMPGTSW:
> +    case ALTIVEC_BUILTIN_VCMPGTUW:
> +    case P8V_BUILTIN_VCMPGTUD:
> +    case P8V_BUILTIN_VCMPGTSD:
> +      {
> +       fold_compare_helper (gsi, GT_EXPR, stmt);
> +       return true;
> +      }
> +
> +    case VSX_BUILTIN_CMPLE_16QI:
> +    case VSX_BUILTIN_CMPLE_U16QI:
> +    case VSX_BUILTIN_CMPLE_8HI:
> +    case VSX_BUILTIN_CMPLE_U8HI:
> +    case VSX_BUILTIN_CMPLE_4SI:
> +    case VSX_BUILTIN_CMPLE_U4SI:
> +    case VSX_BUILTIN_CMPLE_2DI:
> +    case VSX_BUILTIN_CMPLE_U2DI:
> +      {
> +       fold_compare_helper (gsi, LE_EXPR, stmt);
> +       return true;
> +      }
> +
>      default:
>         if (TARGET_DEBUG_BUILTIN)
>            fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
>                     fn_code, fn_name1, fn_name2);
>        break;
> @@ -18260,10 +18347,27 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
>      case MISC_BUILTIN_UNPACK_TD:
>      case MISC_BUILTIN_UNPACK_V1TI:
>        h.uns_p[0] = 1;
>        break;
>
> +      /* unsigned arguments, bool return (compares).  */
> +    case ALTIVEC_BUILTIN_VCMPEQUB:
> +    case ALTIVEC_BUILTIN_VCMPEQUH:
> +    case ALTIVEC_BUILTIN_VCMPEQUW:
> +    case P8V_BUILTIN_VCMPEQUD:
> +    case VSX_BUILTIN_CMPGE_U16QI:
> +    case VSX_BUILTIN_CMPGE_U8HI:
> +    case VSX_BUILTIN_CMPGE_U4SI:
> +    case VSX_BUILTIN_CMPGE_U2DI:
> +    case ALTIVEC_BUILTIN_VCMPGTUB:
> +    case ALTIVEC_BUILTIN_VCMPGTUH:
> +    case ALTIVEC_BUILTIN_VCMPGTUW:
> +    case P8V_BUILTIN_VCMPGTUD:
> +      h.uns_p[1] = 1;
> +      h.uns_p[2] = 1;
> +      break;
> +
>        /* unsigned arguments for 128-bit pack instructions.  */
>      case MISC_BUILTIN_PACK_TD:
>      case MISC_BUILTIN_PACK_V1TI:
>        h.uns_p[1] = 1;
>        h.uns_p[2] = 1;
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 335e5c1..0c00774 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -4596,16 +4596,16 @@
>    emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
>    emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
>    DONE;
>  })
>
> -;; Vector Compare Not Equal Byte
> +;; Vector Compare Not Equal Byte (specified/not+eq:)
>  (define_insn "vcmpneb"
>    [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
> -       (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v")
> -                      (match_operand:V16QI 2 "altivec_register_operand" "v")]
> -        UNSPEC_VCMPNEB))]
> +        (not:V16QI
> +          (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
> +                    (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
>    "TARGET_P9_VECTOR"
>    "vcmpneb %0,%1,%2"
>    [(set_attr "type" "vecsimple")])
>
>  ;; Vector Compare Not Equal or Zero Byte
> @@ -4617,16 +4617,16 @@
>          UNSPEC_VCMPNEZB))]
>    "TARGET_P9_VECTOR"
>    "vcmpnezb %0,%1,%2"
>    [(set_attr "type" "vecsimple")])
>
> -;; Vector Compare Not Equal Half Word
> +;; Vector Compare Not Equal Half Word (specified/not+eq:)
>  (define_insn "vcmpneh"
>    [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
> -       (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
> -                     (match_operand:V8HI 2 "altivec_register_operand" "v")]
> -        UNSPEC_VCMPNEH))]
> +       (not:V8HI
> +         (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
> +                  (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
>    "TARGET_P9_VECTOR"
>    "vcmpneh %0,%1,%2"
>    [(set_attr "type" "vecsimple")])
>
>  ;; Vector Compare Not Equal or Zero Half Word
> @@ -4637,17 +4637,16 @@
>          UNSPEC_VCMPNEZH))]
>    "TARGET_P9_VECTOR"
>    "vcmpnezh %0,%1,%2"
>    [(set_attr "type" "vecsimple")])
>
> -;; Vector Compare Not Equal Word
> +;; Vector Compare Not Equal Word (specified/not+eq:)
>  (define_insn "vcmpnew"
>    [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
> -       (unspec:V4SI
> -        [(match_operand:V4SI 1 "altivec_register_operand" "v")
> -         (match_operand:V4SI 2 "altivec_register_operand" "v")]
> -        UNSPEC_VCMPNEH))]
> +       (not:V4SI
> +         (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
> +                  (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
>    "TARGET_P9_VECTOR"
>    "vcmpnew %0,%1,%2"
>    [(set_attr "type" "vecsimple")])
>
>  ;; Vector Compare Not Equal or Zero Word
> diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
> index 46a31ae..9dc53da 100644
> --- a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
> +++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
> @@ -1,8 +1,8 @@
>  /* { dg-do compile } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
> -/* { dg-options "-mcpu=power9" } */
> +/* { dg-options "-mcpu=power9 -O1" } */
>
>  #include <altivec.h>
>
>  vector bool char
>  test_ne_char (vector bool char x, vector bool char y)
> @@ -51,21 +51,22 @@ test_vull_bperm_vull_vuc (vector unsigned long long x,
>
>       test_ne_char              1 vcmpneb
>       test_ne_short             1 vcmpneh
>       test_ne_int               1 vcmpnew
>       test_ne_long              1 vcmpequd, 1 xxlnor inst
> -     test_nabs_long_long       1 xxspltib, 1 vsubudm, 1 vminsd
>       test_neg_long_long        1 vnegd
>       test_vull_bperm_vull_vuc  1 vbpermd
> -
> +     test_nabs_long_long (-O0) 1 xxspltib, 1 vsubudm, 1 vminsd
> +     test_nabs_long_long (-O1) 1 vnegd, vminsd
> +*/
>
>  /* { dg-final { scan-assembler-times "vcmpneb"  1 } } */
>  /* { dg-final { scan-assembler-times "vcmpneh"  1 } } */
>  /* { dg-final { scan-assembler-times "vcmpnew"  1 } } */
>  /* { dg-final { scan-assembler-times "vcmpequd" 1 } } */
>  /* { dg-final { scan-assembler-times "xxlnor"   1 } } */
> -/* { dg-final { scan-assembler-times "xxspltib" 1 } } */
> -/* { dg-final { scan-assembler-times "vsubudm"  1 } } */
> +/* { dg-final { scan-assembler-times "xxspltib" 0 } } */
> +/* { dg-final { scan-assembler-times "vsubudm"  0 } } */
>  /* { dg-final { scan-assembler-times "vminsd"   1 } } */
> -/* { dg-final { scan-assembler-times "vnegd"    1 } } */
> +/* { dg-final { scan-assembler-times "vnegd"    2 } } */
>  /* { dg-final { scan-assembler-times "vbpermd"  1 } } */
>
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c b/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c
> index 6f3c093..f74a117 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c
> @@ -10,12 +10,13 @@
>     into
>       c != {0,...,0} ? b : a  */
>
>  #include <altivec.h>
>
> +volatile vector signed long long x = { 25399, -12900 };
> +volatile vector signed long long y = { 12178, -9987 };
> +
>  vector signed long long foo () {
> -  vector signed long long x = { 25399, -12900 };
> -  vector signed long long y = { 12178, -9987 };
>    vector bool long long b = vec_cmpge (x, y);
>    vector signed long long z = vec_sel (y, x, b);
>    return z;
>  }
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c
> index 8e036e3..5c09c70 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c
> @@ -1,9 +1,9 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
>  /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
> -/* { dg-options "-mcpu=power9" } */
> +/* { dg-options "-mcpu=power9 -O1" } */
>
>  #include <altivec.h>
>
>  vector bool char
>  fetch_data (vector bool char *arg1_p, vector bool char *arg2_p)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c
> index e510a44..a74f739 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c
> @@ -1,9 +1,9 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
>  /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
> -/* { dg-options "-mcpu=power9" } */
> +/* { dg-options "-mcpu=power9 -O1" } */
>
>  #include <altivec.h>
>
>  vector bool char
>  fetch_data (vector signed char *arg1_p, vector signed char *arg2_p)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c
> index 0ea5aa7..f7f1e0d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c
> @@ -1,9 +1,9 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
>  /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
> -/* { dg-options "-mcpu=power9" } */
> +/* { dg-options "-mcpu=power9 -O1" } */
>
>  #include <altivec.h>
>
>  vector bool char
>  fetch_data (vector unsigned char *arg1_p, vector unsigned char *arg2_p)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c
> index 6bb5ebe..8ec94bd 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c
> @@ -1,9 +1,9 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
>  /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
> -/* { dg-options "-mcpu=power9" } */
> +/* { dg-options "-mcpu=power9 -O1" } */
>
>  #include <altivec.h>
>
>  vector bool short
>  fetch_data (vector signed short *arg1_p, vector signed short *arg2_p)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c
> index a8d3f17..2f47697 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c
> @@ -1,9 +1,9 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
>  /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
> -/* { dg-options "-mcpu=power9" } */
> +/* { dg-options "-mcpu=power9 -O1" } */
>
>  #include <altivec.h>
>
>  vector bool short
>  fetch_data (vector unsigned short *arg1_p, vector unsigned short *arg2_p)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c
> index dae3e22..1167085 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c
> @@ -1,9 +1,9 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
>  /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
> -/* { dg-options "-mcpu=power9" } */
> +/* { dg-options "-mcpu=power9 -O1" } */
>
>  #include <altivec.h>
>
>  vector bool int
>  fetch_data (vector signed int *arg1_p, vector signed int *arg2_p)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c
> index 550a353..031a48f 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c
> @@ -1,9 +1,9 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
>  /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
> -/* { dg-options "-mcpu=power9" } */
> +/* { dg-options "-mcpu=power9 -O1" } */
>
>  #include <altivec.h>
>
>  vector bool int
>  fetch_data (vector unsigned int *arg1_p, vector unsigned int *arg2_p)
>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]