This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Update interface to TARGET_VECTORIZE_VEC_PERM_CONST_OK


This patch makes TARGET_VECTORIZE_VEC_PERM_CONST_OK take the permute
vector in the form of a vec_perm_indices instead of an unsigned char *.
It follows on from the recent patch that did the same in target-independent
code.

It was easy to make ARM and AArch64 use vec_perm_indices internally
as well, and converting AArch64 helps with SVE.  I did try doing the same
for the other ports, but the surgery needed was much more invasive and
much less obviously correct.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
Also tested by comparing the testsuite assembly output on at least one
target per CPU directory.  OK to install?

Richard


2017-09-22  Richard Sandiford  <richard.sandifird@linaro.org>

gcc/
	* target.def (vec_perm_const_ok): Change sel parameter to
	vec_perm_indices.
	* optabs-query.c (can_vec_perm_p): Update accordingly.
	* doc/tm.texi: Regenerate.
	* config/aarch64/aarch64.c (expand_vec_perm_d): Change perm to
	auto_vec_perm_indices and remove separate nelt field.
	(aarch64_evpc_trn, aarch64_evpc_uzp, aarch64_evpc_zip)
	(aarch64_evpc_ext, aarch64_evpc_rev, aarch64_evpc_dup)
	(aarch64_evpc_tbl, aarch64_expand_vec_perm_const_1)
	(aarch64_expand_vec_perm_const): Update accordingly.
	(aarch64_vectorize_vec_perm_const_ok): Likewise.  Change sel
	to vec_perm_indices.
	* config/arm/arm.c (expand_vec_perm_d): Change perm to
	auto_vec_perm_indices and remove separate nelt field.
	(arm_evpc_neon_vuzp, arm_evpc_neon_vzip, arm_evpc_neon_vrev)
	(arm_evpc_neon_vtrn, arm_evpc_neon_vext, arm_evpc_neon_vtbl)
	(arm_expand_vec_perm_const_1, arm_expand_vec_perm_const): Update
	accordingly.
	(arm_vectorize_vec_perm_const_ok): Likewise.  Change sel
	to vec_perm_indices.
	* config/i386/i386.c (ix86_vectorize_vec_perm_const_ok): Change
	sel to vec_perm_indices.
	* config/ia64/ia64.c (ia64_vectorize_vec_perm_const_ok): Likewise.
	* config/mips/mips.c (mips_vectorize_vec_perm_const_ok): Likewise.
	* config/powerpcspe/powerpcspe.c (rs6000_vectorize_vec_perm_const_ok):
	Likewise.
	* config/rs6000/rs6000.c (rs6000_vectorize_vec_perm_const_ok):
	Likewise.

Index: gcc/target.def
===================================================================
--- gcc/target.def	2017-09-22 17:31:36.935337179 +0100
+++ gcc/target.def	2017-09-22 17:31:56.428954480 +0100
@@ -1847,7 +1847,7 @@ DEFHOOK
 DEFHOOK
 (vec_perm_const_ok,
  "Return true if a vector created for @code{vec_perm_const} is valid.",
- bool, (machine_mode, const unsigned char *sel),
+ bool, (machine_mode, vec_perm_indices),
  NULL)
 
 /* Return true if the target supports misaligned store/load of a
Index: gcc/optabs-query.c
===================================================================
--- gcc/optabs-query.c	2017-09-14 17:04:19.080694343 +0100
+++ gcc/optabs-query.c	2017-09-22 17:31:56.428006577 +0100
@@ -367,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool
       if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing
 	  && (sel == NULL
 	      || targetm.vectorize.vec_perm_const_ok == NULL
-	      || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0])))
+	      || targetm.vectorize.vec_perm_const_ok (mode, *sel)))
 	return true;
     }
 
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi	2017-09-22 17:31:36.933441374 +0100
+++ gcc/doc/tm.texi	2017-09-22 17:31:56.428006577 +0100
@@ -5774,7 +5774,7 @@ correct for most targets.
 Return true if vector alignment is reachable (by peeling N iterations) for the given scalar type @var{type}.  @var{is_packed} is false if the scalar access using @var{type} is known to be naturally aligned.
 @end deftypefn
 
-@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, const unsigned char *@var{sel})
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, @var{vec_perm_indices})
 Return true if a vector created for @code{vec_perm_const} is valid.
 @end deftypefn
 
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	2017-09-21 11:53:16.681759682 +0100
+++ gcc/config/aarch64/aarch64.c	2017-09-22 17:31:56.412840135 +0100
@@ -141,8 +141,8 @@ static void aarch64_elf_asm_constructor
 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_override_options_after_change (void);
 static bool aarch64_vector_mode_supported_p (machine_mode);
-static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
-						 const unsigned char *sel);
+static bool aarch64_vectorize_vec_perm_const_ok (machine_mode,
+						 vec_perm_indices);
 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
 static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
 							 const_tree type,
@@ -13146,9 +13146,8 @@ #define MAX_VECT_LEN 16
 struct expand_vec_perm_d
 {
   rtx target, op0, op1;
-  unsigned char perm[MAX_VECT_LEN];
+  auto_vec_perm_indices perm;
   machine_mode vmode;
-  unsigned char nelt;
   bool one_vector_p;
   bool testing_p;
 };
@@ -13231,7 +13230,7 @@ aarch64_expand_vec_perm (rtx target, rtx
 static bool
 aarch64_evpc_trn (struct expand_vec_perm_d *d)
 {
-  unsigned int i, odd, mask, nelt = d->nelt;
+  unsigned int i, odd, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
   rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
@@ -13319,7 +13318,7 @@ aarch64_evpc_trn (struct expand_vec_perm
 static bool
 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
 {
-  unsigned int i, odd, mask, nelt = d->nelt;
+  unsigned int i, odd, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
   rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
@@ -13406,7 +13405,7 @@ aarch64_evpc_uzp (struct expand_vec_perm
 static bool
 aarch64_evpc_zip (struct expand_vec_perm_d *d)
 {
-  unsigned int i, high, mask, nelt = d->nelt;
+  unsigned int i, high, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
   rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
@@ -13499,7 +13498,7 @@ aarch64_evpc_zip (struct expand_vec_perm
 static bool
 aarch64_evpc_ext (struct expand_vec_perm_d *d)
 {
-  unsigned int i, nelt = d->nelt;
+  unsigned int i, nelt = d->perm.length ();
   rtx (*gen) (rtx, rtx, rtx, rtx);
   rtx offset;
 
@@ -13563,7 +13562,7 @@ aarch64_evpc_ext (struct expand_vec_perm
 static bool
 aarch64_evpc_rev (struct expand_vec_perm_d *d)
 {
-  unsigned int i, j, diff, nelt = d->nelt;
+  unsigned int i, j, diff, nelt = d->perm.length ();
   rtx (*gen) (rtx, rtx);
 
   if (!d->one_vector_p)
@@ -13641,7 +13640,7 @@ aarch64_evpc_dup (struct expand_vec_perm
   rtx out = d->target;
   rtx in0;
   machine_mode vmode = d->vmode;
-  unsigned int i, elt, nelt = d->nelt;
+  unsigned int i, elt, nelt = d->perm.length ();
   rtx lane;
 
   elt = d->perm[0];
@@ -13686,7 +13685,7 @@ aarch64_evpc_tbl (struct expand_vec_perm
 {
   rtx rperm[MAX_VECT_LEN], sel;
   machine_mode vmode = d->vmode;
-  unsigned int i, nelt = d->nelt;
+  unsigned int i, nelt = d->perm.length ();
 
   if (d->testing_p)
     return true;
@@ -13720,12 +13719,11 @@ aarch64_expand_vec_perm_const_1 (struct
   /* The pattern matching functions above are written to look for a small
      number to begin the sequence (0, 1, N/2).  If we begin with an index
      from the second operand, we can swap the operands.  */
-  if (d->perm[0] >= d->nelt)
+  unsigned int nelt = d->perm.length ();
+  if (d->perm[0] >= nelt)
     {
-      unsigned i, nelt = d->nelt;
-
       gcc_assert (nelt == (nelt & -nelt));
-      for (i = 0; i < nelt; ++i)
+      for (unsigned int i = 0; i < nelt; ++i)
 	d->perm[i] ^= nelt; /* Keep the same index, but in the other vector.  */
 
       std::swap (d->op0, d->op1);
@@ -13764,15 +13762,16 @@ aarch64_expand_vec_perm_const (rtx targe
 
   d.vmode = GET_MODE (target);
   gcc_assert (VECTOR_MODE_P (d.vmode));
-  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
   d.testing_p = false;
 
+  nelt = GET_MODE_NUNITS (d.vmode);
+  d.perm.reserve (nelt);
   for (i = which = 0; i < nelt; ++i)
     {
       rtx e = XVECEXP (sel, 0, i);
       int ei = INTVAL (e) & (2 * nelt - 1);
       which |= (ei < nelt ? 1 : 2);
-      d.perm[i] = ei;
+      d.perm.quick_push (ei);
     }
 
   switch (which)
@@ -13807,19 +13806,18 @@ aarch64_expand_vec_perm_const (rtx targe
 }
 
 static bool
-aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
-				     const unsigned char *sel)
+aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
 {
   struct expand_vec_perm_d d;
   unsigned int i, nelt, which;
   bool ret;
 
   d.vmode = vmode;
-  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
   d.testing_p = true;
-  memcpy (d.perm, sel, nelt);
+  d.perm.safe_splice (sel);
 
   /* Calculate whether all elements are in one vector.  */
+  nelt = sel.length ();
   for (i = which = 0; i < nelt; ++i)
     {
       unsigned char e = d.perm[i];
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	2017-09-22 17:22:08.191305805 +0100
+++ gcc/config/arm/arm.c	2017-09-22 17:31:56.414735941 +0100
@@ -287,8 +287,7 @@ static int arm_cortex_a5_branch_cost (bo
 static int arm_cortex_m_branch_cost (bool, bool);
 static int arm_cortex_m7_branch_cost (bool, bool);
 
-static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
-					     const unsigned char *sel);
+static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 
 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 
@@ -28657,9 +28656,8 @@ #define MAX_VECT_LEN 16
 struct expand_vec_perm_d
 {
   rtx target, op0, op1;
-  unsigned char perm[MAX_VECT_LEN];
+  auto_vec_perm_indices perm;
   machine_mode vmode;
-  unsigned char nelt;
   bool one_vector_p;
   bool testing_p;
 };
@@ -28766,7 +28764,7 @@ neon_pair_endian_lane_map (machine_mode
 static bool
 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
 {
-  unsigned int i, odd, mask, nelt = d->nelt;
+  unsigned int i, odd, mask, nelt = d->perm.length ();
   rtx out0, out1, in0, in1;
   rtx (*gen)(rtx, rtx, rtx, rtx);
   int first_elem;
@@ -28778,7 +28776,7 @@ arm_evpc_neon_vuzp (struct expand_vec_pe
   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
      big endian pattern on 64 bit vectors, so we correct for that.  */
   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
-    && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
+    && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
 
   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
 
@@ -28837,7 +28835,7 @@ arm_evpc_neon_vuzp (struct expand_vec_pe
 static bool
 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
 {
-  unsigned int i, high, mask, nelt = d->nelt;
+  unsigned int i, high, mask, nelt = d->perm.length ();
   rtx out0, out1, in0, in1;
   rtx (*gen)(rtx, rtx, rtx, rtx);
   int first_elem;
@@ -28912,7 +28910,7 @@ arm_evpc_neon_vzip (struct expand_vec_pe
 static bool
 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
 {
-  unsigned int i, j, diff, nelt = d->nelt;
+  unsigned int i, j, diff, nelt = d->perm.length ();
   rtx (*gen)(rtx, rtx);
 
   if (!d->one_vector_p)
@@ -28988,7 +28986,7 @@ arm_evpc_neon_vrev (struct expand_vec_pe
 static bool
 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
 {
-  unsigned int i, odd, mask, nelt = d->nelt;
+  unsigned int i, odd, mask, nelt = d->perm.length ();
   rtx out0, out1, in0, in1;
   rtx (*gen)(rtx, rtx, rtx, rtx);
 
@@ -29054,7 +29052,7 @@ arm_evpc_neon_vtrn (struct expand_vec_pe
 static bool
 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
 {
-  unsigned int i, nelt = d->nelt;
+  unsigned int i, nelt = d->perm.length ();
   rtx (*gen) (rtx, rtx, rtx, rtx);
   rtx offset;
 
@@ -29128,7 +29126,7 @@ arm_evpc_neon_vtbl (struct expand_vec_pe
 {
   rtx rperm[MAX_VECT_LEN], sel;
   machine_mode vmode = d->vmode;
-  unsigned int i, nelt = d->nelt;
+  unsigned int i, nelt = d->perm.length ();
 
   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
      numbering of elements for big-endian, we must reverse the order.  */
@@ -29165,11 +29163,10 @@ arm_expand_vec_perm_const_1 (struct expa
   /* The pattern matching functions above are written to look for a small
      number to begin the sequence (0, 1, N/2).  If we begin with an index
      from the second operand, we can swap the operands.  */
-  if (d->perm[0] >= d->nelt)
+  unsigned int nelt = d->perm.length ();
+  if (d->perm[0] >= nelt)
     {
-      unsigned i, nelt = d->nelt;
-
-      for (i = 0; i < nelt; ++i)
+      for (unsigned int i = 0; i < nelt; ++i)
 	d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
 
       std::swap (d->op0, d->op1);
@@ -29204,15 +29201,16 @@ arm_expand_vec_perm_const (rtx target, r
 
   d.vmode = GET_MODE (target);
   gcc_assert (VECTOR_MODE_P (d.vmode));
-  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
   d.testing_p = false;
 
+  nelt = GET_MODE_NUNITS (d.vmode);
+  d.perm.reserve (nelt);
   for (i = which = 0; i < nelt; ++i)
     {
       rtx e = XVECEXP (sel, 0, i);
       int ei = INTVAL (e) & (2 * nelt - 1);
       which |= (ei < nelt ? 1 : 2);
-      d.perm[i] = ei;
+      d.perm.quick_push (ei);
     }
 
   switch (which)
@@ -29249,19 +29247,18 @@ arm_expand_vec_perm_const (rtx target, r
 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
 
 static bool
-arm_vectorize_vec_perm_const_ok (machine_mode vmode,
-				 const unsigned char *sel)
+arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
 {
   struct expand_vec_perm_d d;
   unsigned int i, nelt, which;
   bool ret;
 
   d.vmode = vmode;
-  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
   d.testing_p = true;
-  memcpy (d.perm, sel, nelt);
+  d.perm.safe_splice (sel);
 
   /* Categorize the set of elements in the selector.  */
+  nelt = GET_MODE_NUNITS (d.vmode);
   for (i = which = 0; i < nelt; ++i)
     {
       unsigned char e = d.perm[i];
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	2017-09-22 17:22:08.149305815 +0100
+++ gcc/config/i386/i386.c	2017-09-22 17:31:56.418527551 +0100
@@ -50024,8 +50024,7 @@ ix86_expand_vec_perm_const (rtx operands
 /* Implement targetm.vectorize.vec_perm_const_ok.  */
 
 static bool
-ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
-				  const unsigned char *sel)
+ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
 {
   struct expand_vec_perm_d d;
   unsigned int i, nelt, which;
@@ -50096,11 +50095,11 @@ ix86_vectorize_vec_perm_const_ok (machin
 
   /* Extract the values from the vector CST into the permutation
      array in D.  */
-  memcpy (d.perm, sel, nelt);
   for (i = which = 0; i < nelt; ++i)
     {
-      unsigned char e = d.perm[i];
+      unsigned char e = sel[i];
       gcc_assert (e < 2 * nelt);
+      d.perm[i] = e;
       which |= (e < nelt ? 1 : 2);
     }
 
Index: gcc/config/ia64/ia64.c
===================================================================
--- gcc/config/ia64/ia64.c	2017-09-21 11:53:16.654742357 +0100
+++ gcc/config/ia64/ia64.c	2017-09-22 17:31:56.419475454 +0100
@@ -333,8 +333,7 @@ static machine_mode ia64_get_reg_raw_mod
 static section * ia64_hpux_function_section (tree, enum node_frequency,
 					     bool, bool);
 
-static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
-					      const unsigned char *sel);
+static bool ia64_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 
 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
@@ -11824,8 +11823,7 @@ ia64_expand_vec_perm_const (rtx operands
 /* Implement targetm.vectorize.vec_perm_const_ok.  */
 
 static bool
-ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
-				  const unsigned char *sel)
+ia64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
 {
   struct expand_vec_perm_d d;
   unsigned int i, nelt, which;
@@ -11837,10 +11835,10 @@ ia64_vectorize_vec_perm_const_ok (machin
 
   /* Extract the values from the vector CST into the permutation
      array in D.  */
-  memcpy (d.perm, sel, nelt);
   for (i = which = 0; i < nelt; ++i)
     {
-      unsigned char e = d.perm[i];
+      unsigned char e = sel[i];
+      d.perm[i] = e;
       gcc_assert (e < 2 * nelt);
       which |= (e < nelt ? 1 : 2);
     }
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c	2017-09-21 11:53:16.776320319 +0100
+++ gcc/config/mips/mips.c	2017-09-22 17:31:56.421371259 +0100
@@ -21470,8 +21470,7 @@ mips_sched_reassociation_width (unsigned
 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
 
 static bool
-mips_vectorize_vec_perm_const_ok (machine_mode vmode,
-				  const unsigned char *sel)
+mips_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
 {
   struct expand_vec_perm_d d;
   unsigned int i, nelt, which;
@@ -21480,12 +21479,12 @@ mips_vectorize_vec_perm_const_ok (machin
   d.vmode = vmode;
   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
   d.testing_p = true;
-  memcpy (d.perm, sel, nelt);
 
   /* Categorize the set of elements in the selector.  */
   for (i = which = 0; i < nelt; ++i)
     {
-      unsigned char e = d.perm[i];
+      unsigned char e = sel[i];
+      d.perm[i] = e;
       gcc_assert (e < 2 * nelt);
       which |= (e < nelt ? 1 : 2);
     }
Index: gcc/config/powerpcspe/powerpcspe.c
===================================================================
--- gcc/config/powerpcspe/powerpcspe.c	2017-09-21 11:53:16.643935427 +0100
+++ gcc/config/powerpcspe/powerpcspe.c	2017-09-22 17:31:56.424214967 +0100
@@ -38731,8 +38731,7 @@ rs6000_expand_vec_perm_const (rtx operan
 /* Test whether a constant permutation is supported.  */
 
 static bool
-rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
-				    const unsigned char *sel)
+rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
 {
   /* AltiVec (and thus VSX) can handle arbitrary permutations.  */
   if (TARGET_ALTIVEC)
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	2017-09-21 11:53:16.730390867 +0100
+++ gcc/config/rs6000/rs6000.c	2017-09-22 17:31:56.427058675 +0100
@@ -35594,8 +35594,7 @@ rs6000_expand_vec_perm_const (rtx operan
 /* Test whether a constant permutation is supported.  */
 
 static bool
-rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
-				    const unsigned char *sel)
+rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
 {
   /* AltiVec (and thus VSX) can handle arbitrary permutations.  */
   if (TARGET_ALTIVEC)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]