[PATCH, rs6000] Support vrotr<mode>3 for int vector types

Kewen.Lin linkw@linux.ibm.com
Wed Jul 17 08:42:00 GMT 2019


Hi all,

This patch follows the idea to improve rs6000 backend instead of
generic expander.  I think this is a better solution?  I was thinking
generic expander change may benefit other targets suffering similar
issues but the previous RFC seems too restricted on const rotation 
count, although it's possible to extend.  Any comments on their pros/
cons are really helpful to me (a noob).

Regression testing just launched, is it OK for trunk if it's bootstrapped
and regresstested on powerpc64le-unknown-linux-gnu?


Thanks,
Kewen

---- 

gcc/ChangeLog

2019-07-17  Kewen Lin  <linkw@gcc.gnu.org>

	* config/rs6000/predicates.md (vint_reg_or_const_vector): New predicate.
	* config/rs6000/vector.md (vrotr<mode>3): New define_expand.

gcc/testsuite/ChangeLog

2019-07-17  Kewen Lin  <linkw@gcc.gnu.org>

	* gcc.target/powerpc/vec_rotate-1.c: New test.
	* gcc.target/powerpc/vec_rotate-2.c: New test.

on 2019/7/16 脧脗脦莽4:45, Kewen.Lin wrote:
> Hi all,
> 
> Based on the previous comments (thank you!), I tried to update the 
> handling in expander and vectorizer.  Middle-end optimizes lrotate
> with const rotation count to rrotate all the time, it makes vectorizer
> fail to vectorize if rrotate isn't supported on the target.  We can at
> least teach it on const rotation count, the cost should be the same? 
> At the same time, the expander already tries to use the opposite 
> rotation optable for scalar, we can teach it to deal with vector as well.
> 
> Is it on the right track and reasonable?
> 
> 
> Thanks,
> Kewen
> 
-------------- next part --------------
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 8ca98299950..c4c74630d26 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -163,6 +163,17 @@
   return VINT_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is a vector register that operates on integer vectors
+;; or if op is a const vector with integer vector modes.
+(define_predicate "vint_reg_or_const_vector"
+  (match_code "reg,subreg,const_vector")
+{
+  if (GET_CODE (op) == CONST_VECTOR && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+    return 1;
+
+  return vint_operand (op, mode);
+})
+
 ;; Return 1 if op is a vector register to do logical operations on (and, or,
 ;; xor, etc.)
 (define_predicate "vlogical_operand"
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 70bcfe02e22..5c6a344e452 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -1260,6 +1260,32 @@
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+;; Expanders for rotatert to make use of vrotl
+(define_expand "vrotr<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand")
+	(rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand")
+		      (match_operand:VEC_I 2 "vint_reg_or_const_vector")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  machine_mode inner_mode = GET_MODE_INNER (<MODE>mode);
+  unsigned int bits = GET_MODE_PRECISION (inner_mode);
+  rtx imm_vec = gen_const_vec_duplicate (<MODE>mode, GEN_INT (bits));
+  rtx rot_count = gen_reg_rtx (<MODE>mode);
+  if (GET_CODE (operands[2]) == CONST_VECTOR)
+    {
+      imm_vec = simplify_const_binary_operation (MINUS, <MODE>mode, imm_vec,
+						 operands[2]);
+      rot_count = force_reg (<MODE>mode, imm_vec);
+    }
+  else
+    {
+      rtx imm_reg = force_reg (<MODE>mode, imm_vec);
+      emit_insn (gen_sub<mode>3 (rot_count, imm_reg, operands[2]));
+    }
+  emit_insn (gen_vrotl<mode>3 (operands[0], operands[1], rot_count));
+  DONE;
+})
+
 ;; Expanders for arithmetic shift left on each vector element
 (define_expand "vashl<mode>3"
   [(set (match_operand:VEC_I 0 "vint_operand")
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c
new file mode 100644
index 00000000000..80aca1a94a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c
@@ -0,0 +1,46 @@
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+
+/* Check vectorizer can exploit vector rotation instructions on Power, mainly
+   for the case rotation count is const number.  */
+
+#define N 256
+unsigned long long sud[N], rud[N];
+unsigned int suw[N], ruw[N];
+unsigned short suh[N], ruh[N];
+unsigned char sub[N], rub[N];
+
+void
+testULL ()
+{
+  for (int i = 0; i < 256; ++i)
+    rud[i] = (sud[i] >> 8) | (sud[i] << (sizeof (sud[0]) * 8 - 8));
+}
+
+void
+testUW ()
+{
+  for (int i = 0; i < 256; ++i)
+    ruw[i] = (suw[i] >> 8) | (suw[i] << (sizeof (suw[0]) * 8 - 8));
+}
+
+void
+testUH ()
+{
+  for (int i = 0; i < 256; ++i)
+    ruh[i] = (unsigned short) (suh[i] >> 9)
+	     | (unsigned short) (suh[i] << (sizeof (suh[0]) * 8 - 9));
+}
+
+void
+testUB ()
+{
+  for (int i = 0; i < 256; ++i)
+    rub[i] = (unsigned char) (sub[i] >> 5)
+	     | (unsigned char) (sub[i] << (sizeof (sub[0]) * 8 - 5));
+}
+
+/* { dg-final { scan-assembler {\mvrld\M} } } */
+/* { dg-final { scan-assembler {\mvrlw\M} } } */
+/* { dg-final { scan-assembler {\mvrlh\M} } } */
+/* { dg-final { scan-assembler {\mvrlb\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c
new file mode 100644
index 00000000000..affda6c023b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c
@@ -0,0 +1,47 @@
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+
+/* Check vectorizer can exploit vector rotation instructions on Power, mainly
+   for the case rotation count isn't const number.  */
+
+#define N 256
+unsigned long long sud[N], rud[N];
+unsigned int suw[N], ruw[N];
+unsigned short suh[N], ruh[N];
+unsigned char sub[N], rub[N];
+extern unsigned char rot_cnt;
+
+void
+testULL ()
+{
+  for (int i = 0; i < 256; ++i)
+    rud[i] = (sud[i] >> rot_cnt) | (sud[i] << (sizeof (sud[0]) * 8 - rot_cnt));
+}
+
+void
+testUW ()
+{
+  for (int i = 0; i < 256; ++i)
+    ruw[i] = (suw[i] >> rot_cnt) | (suw[i] << (sizeof (suw[0]) * 8 - rot_cnt));
+}
+
+void
+testUH ()
+{
+  for (int i = 0; i < 256; ++i)
+    ruh[i] = (unsigned short) (suh[i] >> rot_cnt)
+	     | (unsigned short) (suh[i] << (sizeof (suh[0]) * 8 - rot_cnt));
+}
+
+void
+testUB ()
+{
+  for (int i = 0; i < 256; ++i)
+    rub[i] = (unsigned char) (sub[i] >> rot_cnt)
+	     | (unsigned char) (sub[i] << (sizeof (sub[0]) * 8 - rot_cnt));
+}
+
+/* { dg-final { scan-assembler {\mvrld\M} } } */
+/* { dg-final { scan-assembler {\mvrlw\M} } } */
+/* { dg-final { scan-assembler {\mvrlh\M} } } */
+/* { dg-final { scan-assembler {\mvrlb\M} } } */


More information about the Gcc-patches mailing list