This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, rs6000] (v2) Fold vector shifts in GIMPLE
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: will_schmidt at vnet dot ibm dot com
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Segher Boessenkool <segher at kernel dot crashing dot org>, Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>, David Edelsohn <dje dot gcc at gmail dot com>, Jakub Jelinek <jakub at redhat dot com>
- Date: Tue, 13 Jun 2017 10:03:48 +0200
- Subject: Re: [PATCH, rs6000] (v2) Fold vector shifts in GIMPLE
- Authentication-results: sourceware.org; auth=none
- References: <1497304579.24125.144.camel@brimstone.rchland.ibm.com>
On Mon, Jun 12, 2017 at 11:56 PM, Will Schmidt
<will_schmidt@vnet.ibm.com> wrote:
> Hi,
>
> [PATCH, rs6000] (v2) Fold vector shifts in GIMPLE
>
> Add support for early expansion of vector shifts. Including
> vec_sl (shift left), vec_sr (shift right),
> vec_sra (shift right algebraic), vec_rl (rotate left).
> Part of this includes adding the vector shift right instructions to
> the list of those instructions having an unsigned second argument.
>
> The VSR (vector shift right) folding is a bit more complex than
> the others. This is due to requiring arg0 be unsigned before the
> gimple RSHIFT_EXPR assignment is built, which is required for an
> algebraic shift.
>
> [V2 update] Guard the folding of left shifts with TYPE_OVERFLOW_WRAPS.
> Add -fwrapv test variations for the left shifts.
>
> Sniff-tests passed. full regtest still running. OK for trunk?
>
> Thanks,
> -Will
>
>
>
> [gcc]
>
> 2017-06-12 Will Schmidt <will_schmidt@vnet.ibm.com>
>
> * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
> for early expansion of vector shifts (sl,sr,sra,rl).
> (builtin_function_type): Add vector shift right instructions
> to the unsigned argument list.
>
> [gcc/testsuite]
>
> 2017-06-12 Will Schmidt <will_schmidt@vnet.ibm.com>
>
> * testsuite/gcc.target/powerpc/fold-vec-shift-char.c: New.
> * testsuite/gcc.target/powerpc/fold-vec-shift-int.c: New.
> * testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c: New.
> * testsuite/gcc.target/powerpc/fold-vec-shift-short.c: New.
> * testsuite/gcc.target/powerpc/fold-vec-shift-left.c: New.
> * testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c: New.
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 63ca2d1..55592fb 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -16588,6 +16588,83 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> gsi_replace (gsi, g, true);
> return true;
> }
> + /* Flavors of vec_rotate_left. */
> + case ALTIVEC_BUILTIN_VRLB:
> + case ALTIVEC_BUILTIN_VRLH:
> + case ALTIVEC_BUILTIN_VRLW:
> + case P8V_BUILTIN_VRLD:
> + {
> + arg0 = gimple_call_arg (stmt, 0);
> + arg1 = gimple_call_arg (stmt, 1);
> + lhs = gimple_call_lhs (stmt);
> + gimple *g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
> + gimple_set_location (g, gimple_location (stmt));
> + gsi_replace (gsi, g, true);
> + return true;
> + }
> + /* Flavors of vector shift right algebraic.
> + * vec_sra{b,h,w} -> vsra{b,h,w}. */
> + case ALTIVEC_BUILTIN_VSRAB:
> + case ALTIVEC_BUILTIN_VSRAH:
> + case ALTIVEC_BUILTIN_VSRAW:
> + case P8V_BUILTIN_VSRAD:
> + {
> + arg0 = gimple_call_arg (stmt, 0);
> + arg1 = gimple_call_arg (stmt, 1);
> + lhs = gimple_call_lhs (stmt);
> + gimple *g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
> + gimple_set_location (g, gimple_location (stmt));
> + gsi_replace (gsi, g, true);
> + return true;
> + }
> + /* Flavors of vector shift left.
> + * builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
> + case ALTIVEC_BUILTIN_VSLB:
> + case ALTIVEC_BUILTIN_VSLH:
> + case ALTIVEC_BUILTIN_VSLW:
> + case P8V_BUILTIN_VSLD:
> + {
> + arg0 = gimple_call_arg (stmt, 0);
> + if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
> + && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
> + return false;
> + arg1 = gimple_call_arg (stmt, 1);
> + lhs = gimple_call_lhs (stmt);
> + gimple *g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1);
> + gimple_set_location (g, gimple_location (stmt));
> + gsi_replace (gsi, g, true);
> + return true;
> + }
> + /* Flavors of vector shift right. */
> + case ALTIVEC_BUILTIN_VSRB:
> + case ALTIVEC_BUILTIN_VSRH:
> + case ALTIVEC_BUILTIN_VSRW:
> + case P8V_BUILTIN_VSRD:
> + {
> + arg0 = gimple_call_arg (stmt, 0);
> + arg1 = gimple_call_arg (stmt, 1);
> + lhs = gimple_call_lhs (stmt);
> + gimple *g;
> + /* convert arg0 to unsigned. */
> + arg0 = convert (unsigned_type_for (TREE_TYPE (arg0)), arg0);
Please do not use 'convert', instead do ...
> + tree arg0_uns = create_tmp_reg_or_ssa_name
> + (unsigned_type_for (TREE_TYPE (arg0)));
> + g = gimple_build_assign (arg0_uns, arg0);
g = gimple_build_assign (arg0_uns, VIEW_CONVERT_EXPR, usigned_type, arg0);
You also want to avoid spitting out useless copies here if the
arg/result is already unsigned,
like via
tree arg0_uns = arg0;
if (! TYPE_UNSIGNED (TREE_TYPE (arg0_uns)))
{
...
}
> + gimple_set_location (g, gimple_location (stmt));
> + gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + /* convert lhs to unsigned and do the shift. */
Just use lhs if it has the same sign as arg0_uns.
> + tree lhs_uns = create_tmp_reg_or_ssa_name
> + (unsigned_type_for (TREE_TYPE (lhs)));
You can re-use the type of arg0_uns here.
> + g = gimple_build_assign (lhs_uns, RSHIFT_EXPR, arg0_uns, arg1);
> + gimple_set_location (g, gimple_location (stmt));
> + gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + /* convert lhs back to a signed type for the return. */
> + lhs_uns = convert (signed_type_for (TREE_TYPE (lhs)),lhs_uns);
> + g = gimple_build_assign (lhs, lhs_uns);
See above for how to perform the conversion.
Note that you could use the gimple_build convenience to shorten the code
sequence above to
gimple_seq stmts = NULL;
tree arg0_unsigned = gimple_build (&stmts, VIEW_CONVERT_EXPR,
unsigned_type_for (...), arg0);
tree res = gimple_build (&stmts, RSHIFT_EXPR, TREE_TYPE (arg0_uns),
arg0_uns, arg1);
res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
update_call_from_tree (gsi, res);
The above gimple_build sequence will fold all the stmts thus remove
useless conversions and apply constant folding, etc.
Richard.
> + gimple_set_location (g, gimple_location (stmt));
> + gsi_replace (gsi, g, true);
> + return true;
> + }
> default:
> break;
> }
> @@ -18090,6 +18167,14 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
> h.uns_p[2] = 1;
> break;
>
> + /* unsigned second arguments (vector shift right). */
> + case ALTIVEC_BUILTIN_VSRB:
> + case ALTIVEC_BUILTIN_VSRH:
> + case ALTIVEC_BUILTIN_VSRW:
> + case P8V_BUILTIN_VSRD:
> + h.uns_p[2] = 1;
> + break;
> +
> default:
> break;
> }
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c
> new file mode 100644
> index 0000000..ebe91e7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c
> @@ -0,0 +1,66 @@
> +/* Verify that overloaded built-ins for vec_sl with char
> + inputs produce the right results. */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* { dg-options "-maltivec -O2" } */
> +
> +#include <altivec.h>
> +
> +//# vec_sl - shift left
> +//# vec_sr - shift right
> +//# vec_sra - shift right algebraic
> +//# vec_rl - rotate left
> +
> +vector signed char
> +testsl_signed (vector signed char x, vector unsigned char y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned char
> +testsl_unsigned (vector unsigned char x, vector unsigned char y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed char
> +testsr_signed (vector signed char x, vector unsigned char y)
> +{
> + return vec_sr (x, y);
> +}
> +
> +vector unsigned char
> +testsr_unsigned (vector unsigned char x, vector unsigned char y)
> +{
> + return vec_sr (x, y);
> +}
> +
> +vector signed char
> +testsra_signed (vector signed char x, vector unsigned char y)
> +{
> + return vec_sra (x, y);
> +}
> +
> +vector unsigned char
> +testsra_unsigned (vector unsigned char x, vector unsigned char y)
> +{
> + return vec_sra (x, y);
> +}
> +
> +vector signed char
> +testrl_signed (vector signed char x, vector unsigned char y)
> +{
> + return vec_rl (x, y);
> +}
> +
> +vector unsigned char
> +testrl_unsigned (vector unsigned char x, vector unsigned char y)
> +{
> + return vec_rl (x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "vslb" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrb" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrab" 2 } } */
> +/* { dg-final { scan-assembler-times "vrlb" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c
> new file mode 100644
> index 0000000..e9c5fe1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c
> @@ -0,0 +1,61 @@
> +/* Verify that overloaded built-ins for vec_sl with int
> + inputs produce the right results. */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* { dg-options "-maltivec -O2" } */
> +
> +#include <altivec.h>
> +
> +vector signed int
> +testsl_signed (vector signed int x, vector unsigned int y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned int
> +testsl_unsigned (vector unsigned int x, vector unsigned int y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed int
> +testsr_signed (vector signed int x, vector unsigned int y)
> +{
> + return vec_sr (x, y);
> +}
> +
> +vector unsigned int
> +testsr_unsigned (vector unsigned int x, vector unsigned int y)
> +{
> + return vec_sr (x, y);
> +}
> +
> +vector signed int
> +testsra_signed (vector signed int x, vector unsigned int y)
> +{
> + return vec_sra (x, y);
> +}
> +
> +vector unsigned int
> +testsra_unsigned (vector unsigned int x, vector unsigned int y)
> +{
> + return vec_sra (x, y);
> +}
> +
> +vector signed int
> +testrl_signed (vector signed int x, vector unsigned int y)
> +{
> + return vec_rl (x, y);
> +}
> +
> +vector unsigned int
> +testrl_unsigned (vector unsigned int x, vector unsigned int y)
> +{
> + return vec_rl (x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "vslw" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrw" 2 } } */
> +/* { dg-final { scan-assembler-times "vsraw" 2 } } */
> +/* { dg-final { scan-assembler-times "vrlw" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c
> new file mode 100644
> index 0000000..4e0dc66
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c
> @@ -0,0 +1,61 @@
> +/* Verify that overloaded built-ins for vec_sl produce the right results. */
> +/* This test covers the shift left tests with the -fwrapv option. */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* { dg-options "-maltivec -O2 -fwrapv" } */
> +
> +#include <altivec.h>
> +
> +vector signed char
> +testsl_signed_char (vector signed char x, vector unsigned char y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned char
> +testsl_unsigned_char (vector unsigned char x, vector unsigned char y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed short
> +testsl_signed_short (vector signed short x, vector unsigned short y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned short
> +testsl_unsigned_short (vector unsigned short x, vector unsigned short y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed int
> +testsl_signed_int (vector signed int x, vector unsigned int y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned int
> +testsl_unsigned_int (vector unsigned int x, vector unsigned int y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed long long
> +testsl_signed_longlong (vector signed long long x, vector unsigned long long y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned long long
> +testsl_unsigned_longlong (vector unsigned long long x, vector unsigned long long y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "vslb" 2 } } */
> +/* { dg-final { scan-assembler-times "vslh" 2 } } */
> +/* { dg-final { scan-assembler-times "vslw" 2 } } */
> +/* { dg-final { scan-assembler-times "vsld" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c
> new file mode 100644
> index 0000000..bbc3f01
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c
> @@ -0,0 +1,61 @@
> +/* cross section of shift tests specific for shift-left.
> + * This is a counterpart to the fold-vec-shift-left-frwapv test. */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* { dg-options "-maltivec -O2" } */
> +
> +#include <altivec.h>
> +
> +vector signed char
> +testsl_signed_char (vector signed char x, vector unsigned char y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned char
> +testsl_unsigned_char (vector unsigned char x, vector unsigned char y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed short
> +testsl_signed_short (vector signed short x, vector unsigned short y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned short
> +testsl_unsigned_short (vector unsigned short x, vector unsigned short y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed int
> +testsl_signed_int (vector signed int x, vector unsigned int y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned int
> +testsl_unsigned_int (vector unsigned int x, vector unsigned int y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed long long
> +testsl_signed_longlong (vector signed long long x, vector unsigned long long y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned long long
> +testsl_unsigned_longlong (vector unsigned long long x, vector unsigned long long y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "vslb" 2 } } */
> +/* { dg-final { scan-assembler-times "vslh" 2 } } */
> +/* { dg-final { scan-assembler-times "vslw" 2 } } */
> +/* { dg-final { scan-assembler-times "vsld" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c
> new file mode 100644
> index 0000000..97b82cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c
> @@ -0,0 +1,63 @@
> +/* Verify that overloaded built-ins for vec_sl with long long
> + inputs produce the right results. */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-mpower8-vector -O2" } */
> +
> +#include <altivec.h>
> +
> +vector signed long long
> +testsl_signed (vector signed long long x, vector unsigned long long y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned long long
> +testsl_unsigned (vector unsigned long long x, vector unsigned long long y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed long long
> +testsr_signed (vector signed long long x, vector unsigned long long y)
> +{
> + return vec_sr (x, y);
> +}
> +
> +vector unsigned long long
> +testsr_unsigned (vector unsigned long long x, vector unsigned long long y)
> +{
> + return vec_sr (x, y);
> +}
> +
> +vector signed long long
> +testsra_signed (vector signed long long x, vector unsigned long long y)
> +{
> + return vec_sra (x, y);
> +}
> +
> +/* watch for PR 79544 here (vsrd / vsrad issue) */
> +vector unsigned long long
> +testsra_unsigned (vector unsigned long long x, vector unsigned long long y)
> +{
> + return vec_sra (x, y);
> +}
> +
> +vector signed long long
> +testrl_signed (vector signed long long x, vector unsigned long long y)
> +{
> + return vec_rl (x, y);
> +}
> +
> +vector unsigned long long
> +testrl_unsigned (vector unsigned long long x, vector unsigned long long y)
> +{
> + return vec_rl (x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "vsld" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrd" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrad" 2 } } */
> +/* { dg-final { scan-assembler-times "vrld" 2 } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c
> new file mode 100644
> index 0000000..4ca7c18
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c
> @@ -0,0 +1,61 @@
> +/* Verify that overloaded built-ins for vec_sl with short
> + inputs produce the right results. */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* { dg-options "-maltivec -O2" } */
> +
> +#include <altivec.h>
> +
> +vector signed short
> +testsl_signed (vector signed short x, vector unsigned short y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector unsigned short
> +testsl_unsigned (vector unsigned short x, vector unsigned short y)
> +{
> + return vec_sl (x, y);
> +}
> +
> +vector signed short
> +testsr_signed (vector signed short x, vector unsigned short y)
> +{
> + return vec_sr (x, y);
> +}
> +
> +vector unsigned short
> +testsr_unsigned (vector unsigned short x, vector unsigned short y)
> +{
> + return vec_sr (x, y);
> +}
> +
> +vector signed short
> +testsra_signed (vector signed short x, vector unsigned short y)
> +{
> + return vec_sra (x, y);
> +}
> +
> +vector unsigned short
> +testsra_unsigned (vector unsigned short x, vector unsigned short y)
> +{
> + return vec_sra (x, y);
> +}
> +
> +vector signed short
> +testrl_signed (vector signed short x, vector unsigned short y)
> +{
> + return vec_rl (x, y);
> +}
> +
> +vector unsigned short
> +testrl_unsigned (vector unsigned short x, vector unsigned short y)
> +{
> + return vec_rl (x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "vslh" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrh" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrah" 2 } } */
> +/* { dg-final { scan-assembler-times "vrlh" 2 } } */
>
>