This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Update interface to TARGET_VECTORIZE_VEC_PERM_CONST_OK
- From: Richard Sandiford <richard dot sandiford at linaro dot org>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 22 Sep 2017 17:34:34 +0100
- Subject: Update interface to TARGET_VECTORIZE_VEC_PERM_CONST_OK
- Authentication-results: sourceware.org; auth=none
This patch makes TARGET_VECTORIZE_VEC_PERM_CONST_OK take the permute
vector in the form of a vec_perm_indices instead of an unsigned char *.
It follows on from the recent patch that did the same in target-independent
code.
It was easy to make ARM and AArch64 use vec_perm_indices internally
as well, and converting AArch64 helps with SVE. I did try doing the same
for the other ports, but the surgery needed was much more invasive and
much less obviously correct.
Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
Also tested by comparing the testsuite assembly output on at least one
target per CPU directory. OK to install?
Richard
2017-09-22 Richard Sandiford <richard.sandifird@linaro.org>
gcc/
* target.def (vec_perm_const_ok): Change sel parameter to
vec_perm_indices.
* optabs-query.c (can_vec_perm_p): Update accordingly.
* doc/tm.texi: Regenerate.
* config/aarch64/aarch64.c (expand_vec_perm_d): Change perm to
auto_vec_perm_indices and remove separate nelt field.
(aarch64_evpc_trn, aarch64_evpc_uzp, aarch64_evpc_zip)
(aarch64_evpc_ext, aarch64_evpc_rev, aarch64_evpc_dup)
(aarch64_evpc_tbl, aarch64_expand_vec_perm_const_1)
(aarch64_expand_vec_perm_const): Update accordingly.
(aarch64_vectorize_vec_perm_const_ok): Likewise. Change sel
to vec_perm_indices.
* config/arm/arm.c (expand_vec_perm_d): Change perm to
auto_vec_perm_indices and remove separate nelt field.
(arm_evpc_neon_vuzp, arm_evpc_neon_vzip, arm_evpc_neon_vrev)
(arm_evpc_neon_vtrn, arm_evpc_neon_vext, arm_evpc_neon_vtbl)
(arm_expand_vec_perm_const_1, arm_expand_vec_perm_const): Update
accordingly.
(arm_vectorize_vec_perm_const_ok): Likewise. Change sel
to vec_perm_indices.
* config/i386/i386.c (ix86_vectorize_vec_perm_const_ok): Change
sel to vec_perm_indices.
* config/ia64/ia64.c (ia64_vectorize_vec_perm_const_ok): Likewise.
* config/mips/mips.c (mips_vectorize_vec_perm_const_ok): Likewise.
* config/powerpcspe/powerpcspe.c (rs6000_vectorize_vec_perm_const_ok):
Likewise.
* config/rs6000/rs6000.c (rs6000_vectorize_vec_perm_const_ok):
Likewise.
Index: gcc/target.def
===================================================================
--- gcc/target.def 2017-09-22 17:31:36.935337179 +0100
+++ gcc/target.def 2017-09-22 17:31:56.428954480 +0100
@@ -1847,7 +1847,7 @@ DEFHOOK
DEFHOOK
(vec_perm_const_ok,
"Return true if a vector created for @code{vec_perm_const} is valid.",
- bool, (machine_mode, const unsigned char *sel),
+ bool, (machine_mode, vec_perm_indices),
NULL)
/* Return true if the target supports misaligned store/load of a
Index: gcc/optabs-query.c
===================================================================
--- gcc/optabs-query.c 2017-09-14 17:04:19.080694343 +0100
+++ gcc/optabs-query.c 2017-09-22 17:31:56.428006577 +0100
@@ -367,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool
if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing
&& (sel == NULL
|| targetm.vectorize.vec_perm_const_ok == NULL
- || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0])))
+ || targetm.vectorize.vec_perm_const_ok (mode, *sel)))
return true;
}
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi 2017-09-22 17:31:36.933441374 +0100
+++ gcc/doc/tm.texi 2017-09-22 17:31:56.428006577 +0100
@@ -5774,7 +5774,7 @@ correct for most targets.
Return true if vector alignment is reachable (by peeling N iterations) for the given scalar type @var{type}. @var{is_packed} is false if the scalar access using @var{type} is known to be naturally aligned.
@end deftypefn
-@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, const unsigned char *@var{sel})
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, @var{vec_perm_indices})
Return true if a vector created for @code{vec_perm_const} is valid.
@end deftypefn
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c 2017-09-21 11:53:16.681759682 +0100
+++ gcc/config/aarch64/aarch64.c 2017-09-22 17:31:56.412840135 +0100
@@ -141,8 +141,8 @@ static void aarch64_elf_asm_constructor
static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_override_options_after_change (void);
static bool aarch64_vector_mode_supported_p (machine_mode);
-static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel);
+static bool aarch64_vectorize_vec_perm_const_ok (machine_mode,
+ vec_perm_indices);
static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
const_tree type,
@@ -13146,9 +13146,8 @@ #define MAX_VECT_LEN 16
struct expand_vec_perm_d
{
rtx target, op0, op1;
- unsigned char perm[MAX_VECT_LEN];
+ auto_vec_perm_indices perm;
machine_mode vmode;
- unsigned char nelt;
bool one_vector_p;
bool testing_p;
};
@@ -13231,7 +13230,7 @@ aarch64_expand_vec_perm (rtx target, rtx
static bool
aarch64_evpc_trn (struct expand_vec_perm_d *d)
{
- unsigned int i, odd, mask, nelt = d->nelt;
+ unsigned int i, odd, mask, nelt = d->perm.length ();
rtx out, in0, in1, x;
rtx (*gen) (rtx, rtx, rtx);
machine_mode vmode = d->vmode;
@@ -13319,7 +13318,7 @@ aarch64_evpc_trn (struct expand_vec_perm
static bool
aarch64_evpc_uzp (struct expand_vec_perm_d *d)
{
- unsigned int i, odd, mask, nelt = d->nelt;
+ unsigned int i, odd, mask, nelt = d->perm.length ();
rtx out, in0, in1, x;
rtx (*gen) (rtx, rtx, rtx);
machine_mode vmode = d->vmode;
@@ -13406,7 +13405,7 @@ aarch64_evpc_uzp (struct expand_vec_perm
static bool
aarch64_evpc_zip (struct expand_vec_perm_d *d)
{
- unsigned int i, high, mask, nelt = d->nelt;
+ unsigned int i, high, mask, nelt = d->perm.length ();
rtx out, in0, in1, x;
rtx (*gen) (rtx, rtx, rtx);
machine_mode vmode = d->vmode;
@@ -13499,7 +13498,7 @@ aarch64_evpc_zip (struct expand_vec_perm
static bool
aarch64_evpc_ext (struct expand_vec_perm_d *d)
{
- unsigned int i, nelt = d->nelt;
+ unsigned int i, nelt = d->perm.length ();
rtx (*gen) (rtx, rtx, rtx, rtx);
rtx offset;
@@ -13563,7 +13562,7 @@ aarch64_evpc_ext (struct expand_vec_perm
static bool
aarch64_evpc_rev (struct expand_vec_perm_d *d)
{
- unsigned int i, j, diff, nelt = d->nelt;
+ unsigned int i, j, diff, nelt = d->perm.length ();
rtx (*gen) (rtx, rtx);
if (!d->one_vector_p)
@@ -13641,7 +13640,7 @@ aarch64_evpc_dup (struct expand_vec_perm
rtx out = d->target;
rtx in0;
machine_mode vmode = d->vmode;
- unsigned int i, elt, nelt = d->nelt;
+ unsigned int i, elt, nelt = d->perm.length ();
rtx lane;
elt = d->perm[0];
@@ -13686,7 +13685,7 @@ aarch64_evpc_tbl (struct expand_vec_perm
{
rtx rperm[MAX_VECT_LEN], sel;
machine_mode vmode = d->vmode;
- unsigned int i, nelt = d->nelt;
+ unsigned int i, nelt = d->perm.length ();
if (d->testing_p)
return true;
@@ -13720,12 +13719,11 @@ aarch64_expand_vec_perm_const_1 (struct
/* The pattern matching functions above are written to look for a small
number to begin the sequence (0, 1, N/2). If we begin with an index
from the second operand, we can swap the operands. */
- if (d->perm[0] >= d->nelt)
+ unsigned int nelt = d->perm.length ();
+ if (d->perm[0] >= nelt)
{
- unsigned i, nelt = d->nelt;
-
gcc_assert (nelt == (nelt & -nelt));
- for (i = 0; i < nelt; ++i)
+ for (unsigned int i = 0; i < nelt; ++i)
d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
std::swap (d->op0, d->op1);
@@ -13764,15 +13762,16 @@ aarch64_expand_vec_perm_const (rtx targe
d.vmode = GET_MODE (target);
gcc_assert (VECTOR_MODE_P (d.vmode));
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.testing_p = false;
+ nelt = GET_MODE_NUNITS (d.vmode);
+ d.perm.reserve (nelt);
for (i = which = 0; i < nelt; ++i)
{
rtx e = XVECEXP (sel, 0, i);
int ei = INTVAL (e) & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
- d.perm[i] = ei;
+ d.perm.quick_push (ei);
}
switch (which)
@@ -13807,19 +13806,18 @@ aarch64_expand_vec_perm_const (rtx targe
}
static bool
-aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel)
+aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
{
struct expand_vec_perm_d d;
unsigned int i, nelt, which;
bool ret;
d.vmode = vmode;
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.testing_p = true;
- memcpy (d.perm, sel, nelt);
+ d.perm.safe_splice (sel);
/* Calculate whether all elements are in one vector. */
+ nelt = sel.length ();
for (i = which = 0; i < nelt; ++i)
{
unsigned char e = d.perm[i];
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c 2017-09-22 17:22:08.191305805 +0100
+++ gcc/config/arm/arm.c 2017-09-22 17:31:56.414735941 +0100
@@ -287,8 +287,7 @@ static int arm_cortex_a5_branch_cost (bo
static int arm_cortex_m_branch_cost (bool, bool);
static int arm_cortex_m7_branch_cost (bool, bool);
-static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel);
+static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
@@ -28657,9 +28656,8 @@ #define MAX_VECT_LEN 16
struct expand_vec_perm_d
{
rtx target, op0, op1;
- unsigned char perm[MAX_VECT_LEN];
+ auto_vec_perm_indices perm;
machine_mode vmode;
- unsigned char nelt;
bool one_vector_p;
bool testing_p;
};
@@ -28766,7 +28764,7 @@ neon_pair_endian_lane_map (machine_mode
static bool
arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
{
- unsigned int i, odd, mask, nelt = d->nelt;
+ unsigned int i, odd, mask, nelt = d->perm.length ();
rtx out0, out1, in0, in1;
rtx (*gen)(rtx, rtx, rtx, rtx);
int first_elem;
@@ -28778,7 +28776,7 @@ arm_evpc_neon_vuzp (struct expand_vec_pe
/* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
big endian pattern on 64 bit vectors, so we correct for that. */
swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
- && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
+ && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
@@ -28837,7 +28835,7 @@ arm_evpc_neon_vuzp (struct expand_vec_pe
static bool
arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
{
- unsigned int i, high, mask, nelt = d->nelt;
+ unsigned int i, high, mask, nelt = d->perm.length ();
rtx out0, out1, in0, in1;
rtx (*gen)(rtx, rtx, rtx, rtx);
int first_elem;
@@ -28912,7 +28910,7 @@ arm_evpc_neon_vzip (struct expand_vec_pe
static bool
arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
{
- unsigned int i, j, diff, nelt = d->nelt;
+ unsigned int i, j, diff, nelt = d->perm.length ();
rtx (*gen)(rtx, rtx);
if (!d->one_vector_p)
@@ -28988,7 +28986,7 @@ arm_evpc_neon_vrev (struct expand_vec_pe
static bool
arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
{
- unsigned int i, odd, mask, nelt = d->nelt;
+ unsigned int i, odd, mask, nelt = d->perm.length ();
rtx out0, out1, in0, in1;
rtx (*gen)(rtx, rtx, rtx, rtx);
@@ -29054,7 +29052,7 @@ arm_evpc_neon_vtrn (struct expand_vec_pe
static bool
arm_evpc_neon_vext (struct expand_vec_perm_d *d)
{
- unsigned int i, nelt = d->nelt;
+ unsigned int i, nelt = d->perm.length ();
rtx (*gen) (rtx, rtx, rtx, rtx);
rtx offset;
@@ -29128,7 +29126,7 @@ arm_evpc_neon_vtbl (struct expand_vec_pe
{
rtx rperm[MAX_VECT_LEN], sel;
machine_mode vmode = d->vmode;
- unsigned int i, nelt = d->nelt;
+ unsigned int i, nelt = d->perm.length ();
/* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
numbering of elements for big-endian, we must reverse the order. */
@@ -29165,11 +29163,10 @@ arm_expand_vec_perm_const_1 (struct expa
/* The pattern matching functions above are written to look for a small
number to begin the sequence (0, 1, N/2). If we begin with an index
from the second operand, we can swap the operands. */
- if (d->perm[0] >= d->nelt)
+ unsigned int nelt = d->perm.length ();
+ if (d->perm[0] >= nelt)
{
- unsigned i, nelt = d->nelt;
-
- for (i = 0; i < nelt; ++i)
+ for (unsigned int i = 0; i < nelt; ++i)
d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
std::swap (d->op0, d->op1);
@@ -29204,15 +29201,16 @@ arm_expand_vec_perm_const (rtx target, r
d.vmode = GET_MODE (target);
gcc_assert (VECTOR_MODE_P (d.vmode));
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.testing_p = false;
+ nelt = GET_MODE_NUNITS (d.vmode);
+ d.perm.reserve (nelt);
for (i = which = 0; i < nelt; ++i)
{
rtx e = XVECEXP (sel, 0, i);
int ei = INTVAL (e) & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
- d.perm[i] = ei;
+ d.perm.quick_push (ei);
}
switch (which)
@@ -29249,19 +29247,18 @@ arm_expand_vec_perm_const (rtx target, r
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
static bool
-arm_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel)
+arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
{
struct expand_vec_perm_d d;
unsigned int i, nelt, which;
bool ret;
d.vmode = vmode;
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.testing_p = true;
- memcpy (d.perm, sel, nelt);
+ d.perm.safe_splice (sel);
/* Categorize the set of elements in the selector. */
+ nelt = GET_MODE_NUNITS (d.vmode);
for (i = which = 0; i < nelt; ++i)
{
unsigned char e = d.perm[i];
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c 2017-09-22 17:22:08.149305815 +0100
+++ gcc/config/i386/i386.c 2017-09-22 17:31:56.418527551 +0100
@@ -50024,8 +50024,7 @@ ix86_expand_vec_perm_const (rtx operands
/* Implement targetm.vectorize.vec_perm_const_ok. */
static bool
-ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel)
+ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
{
struct expand_vec_perm_d d;
unsigned int i, nelt, which;
@@ -50096,11 +50095,11 @@ ix86_vectorize_vec_perm_const_ok (machin
/* Extract the values from the vector CST into the permutation
array in D. */
- memcpy (d.perm, sel, nelt);
for (i = which = 0; i < nelt; ++i)
{
- unsigned char e = d.perm[i];
+ unsigned char e = sel[i];
gcc_assert (e < 2 * nelt);
+ d.perm[i] = e;
which |= (e < nelt ? 1 : 2);
}
Index: gcc/config/ia64/ia64.c
===================================================================
--- gcc/config/ia64/ia64.c 2017-09-21 11:53:16.654742357 +0100
+++ gcc/config/ia64/ia64.c 2017-09-22 17:31:56.419475454 +0100
@@ -333,8 +333,7 @@ static machine_mode ia64_get_reg_raw_mod
static section * ia64_hpux_function_section (tree, enum node_frequency,
bool, bool);
-static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel);
+static bool ia64_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
@@ -11824,8 +11823,7 @@ ia64_expand_vec_perm_const (rtx operands
/* Implement targetm.vectorize.vec_perm_const_ok. */
static bool
-ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel)
+ia64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
{
struct expand_vec_perm_d d;
unsigned int i, nelt, which;
@@ -11837,10 +11835,10 @@ ia64_vectorize_vec_perm_const_ok (machin
/* Extract the values from the vector CST into the permutation
array in D. */
- memcpy (d.perm, sel, nelt);
for (i = which = 0; i < nelt; ++i)
{
- unsigned char e = d.perm[i];
+ unsigned char e = sel[i];
+ d.perm[i] = e;
gcc_assert (e < 2 * nelt);
which |= (e < nelt ? 1 : 2);
}
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c 2017-09-21 11:53:16.776320319 +0100
+++ gcc/config/mips/mips.c 2017-09-22 17:31:56.421371259 +0100
@@ -21470,8 +21470,7 @@ mips_sched_reassociation_width (unsigned
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
static bool
-mips_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel)
+mips_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
{
struct expand_vec_perm_d d;
unsigned int i, nelt, which;
@@ -21480,12 +21479,12 @@ mips_vectorize_vec_perm_const_ok (machin
d.vmode = vmode;
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.testing_p = true;
- memcpy (d.perm, sel, nelt);
/* Categorize the set of elements in the selector. */
for (i = which = 0; i < nelt; ++i)
{
- unsigned char e = d.perm[i];
+ unsigned char e = sel[i];
+ d.perm[i] = e;
gcc_assert (e < 2 * nelt);
which |= (e < nelt ? 1 : 2);
}
Index: gcc/config/powerpcspe/powerpcspe.c
===================================================================
--- gcc/config/powerpcspe/powerpcspe.c 2017-09-21 11:53:16.643935427 +0100
+++ gcc/config/powerpcspe/powerpcspe.c 2017-09-22 17:31:56.424214967 +0100
@@ -38731,8 +38731,7 @@ rs6000_expand_vec_perm_const (rtx operan
/* Test whether a constant permutation is supported. */
static bool
-rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel)
+rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
{
/* AltiVec (and thus VSX) can handle arbitrary permutations. */
if (TARGET_ALTIVEC)
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c 2017-09-21 11:53:16.730390867 +0100
+++ gcc/config/rs6000/rs6000.c 2017-09-22 17:31:56.427058675 +0100
@@ -35594,8 +35594,7 @@ rs6000_expand_vec_perm_const (rtx operan
/* Test whether a constant permutation is supported. */
static bool
-rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
- const unsigned char *sel)
+rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
{
/* AltiVec (and thus VSX) can handle arbitrary permutations. */
if (TARGET_ALTIVEC)