[PATCH] [rtl-optimization] Simplify vector shift/rotate with const_vec_duplicate to vector shift/rotate with const_int element.

liuhongt hongtao.liu@intel.com
Fri Aug 6 07:04:50 GMT 2021


Hi:
  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
  Ok for trunk?

gcc/ChangeLog:

	PR rtl-optimization/101796
	* simplify-rtx.c
	(simplify_context::simplify_binary_operation_1): Simplify
	vector shift/rotate with const_vec_duplicate to vector
	shift/rotate with const_int element.

gcc/testsuite/ChangeLog:

	PR rtl-optimization/101796
	* gcc.target/i386/pr101796.c: New test.
---
 gcc/simplify-rtx.c                       | 15 ++++++
 gcc/testsuite/gcc.target/i386/pr101796.c | 65 ++++++++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101796.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index a719f57870f..75f3e455562 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -3970,6 +3970,21 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
 	    return simplify_gen_binary (code, mode, op0,
 					gen_int_shift_amount (mode, val));
 	}
+
+      /* Optimize vector shift/rotate with const_vec_duplicate
+	 to vector shift/rotate with const_int element.
+      /* TODO: vec_duplicate with variable can also be simplified,
+	 but GCC only require operand 2 of shift/rotate to be a scalar type
+	 which can have different modes in different backends, it makes
+	 simplication difficult to decide which mode should be choosed
+	 for shift/rotate count.  */
+      if ((code == ASHIFTRT || code == LSHIFTRT
+	   || code == ASHIFT || code == ROTATERT
+	   || code == ROTATE)
+	  && const_vec_duplicate_p (op1))
+	return simplify_gen_binary (code, mode, op0,
+				    unwrap_const_vec_duplicate (op1));
+
       break;
 
     case ASHIFT:
diff --git a/gcc/testsuite/gcc.target/i386/pr101796.c b/gcc/testsuite/gcc.target/i386/pr101796.c
new file mode 100644
index 00000000000..c22d6267fe5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101796.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2 " } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } }  */
+/* { dg-final { scan-assembler-not "vpsrlv\[dwq\]" } }  */
+/* { dg-final { scan-assembler-not "vpsllv\[dwq\]" } }  */
+/* { dg-final { scan-assembler-not "vpsrav\[dwq\]" } }  */
+/* { dg-final { scan-assembler-times "vpsrl\[dwq\]" 3 } }  */
+/* { dg-final { scan-assembler-times "vpsll\[dwq\]" 3 } }  */
+/* { dg-final { scan-assembler-times "vpsra\[dwq\]" 3 } }  */
+
+#include <immintrin.h>
+
+__m512i
+foo (__m512i a)
+{
+  return _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3));
+}
+
+__m512i
+foo1 (__m512i a)
+{
+  return _mm512_srlv_epi32 (a, _mm512_set1_epi32 (3));
+}
+
+__m512i
+foo2 (__m512i a, long long b)
+{
+  return _mm512_srlv_epi64 (a, _mm512_set1_epi64 (3));
+}
+
+__m512i
+foo3 (__m512i a)
+{
+  return _mm512_srav_epi16 (a, _mm512_set1_epi16 (3));
+}
+
+__m512i
+foo4 (__m512i a)
+{
+  return _mm512_srav_epi32 (a, _mm512_set1_epi32 (3));
+}
+
+__m512i
+foo5 (__m512i a, long long b)
+{
+  return _mm512_srav_epi64 (a, _mm512_set1_epi64 (3));
+}
+
+__m512i
+foo6 (__m512i a)
+{
+  return _mm512_sllv_epi16 (a, _mm512_set1_epi16 (3));
+}
+
+__m512i
+foo7 (__m512i a)
+{
+  return _mm512_sllv_epi32 (a, _mm512_set1_epi32 (3));
+}
+
+__m512i
+foo8 (__m512i a, long long b)
+{
+  return _mm512_sllv_epi64 (a, _mm512_set1_epi64 (3));
+}
-- 
2.27.0



More information about the Gcc-patches mailing list