[PATCH] i386: Optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw[PR96906]

Hongtao Liu crazylht@gmail.com
Mon Nov 30 13:11:10 GMT 2020


Hi:
  This patch is quite similar like what jakub did in
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560151.html
but for target avx512bw.

.i.e. for -mavx512bw -mavx512vl transform code from

        vpsubusw        %xmm1, %xmm0, %xmm0
        vpxor   %xmm1, %xmm1, %xmm1
        vpcmpw  $0, %xmm1, %xmm0, %k0
to
        vpcmpleuw       %xmm1, %xmm0, %k0

   Bootstrapped/regtested on x86_64-linux is ok.

gcc/ChangeLog
        PR target/96906
         * config/i386/sse.md
        (<avx512>_ucmp<mode>3<mask_scalar_merge_name>): Add a new
        define_split after this insn.

gcc/testsuite/ChangeLog

        * gcc.target/i386/avx512bw-pr96906-1.c: New test.
        * gcc.target/i386/pr96906-1.c: Add -mno-avx512f.

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4aad462f882..eebc3750584 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3006,6 +3006,30 @@ (define_insn
"<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])

+;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
+(define_split
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+        (unspec:<avx512fmaskmode>
+          [(us_minus:VI12_AVX512VL
+             (match_operand:VI12_AVX512VL 1 "vector_operand")
+             (match_operand:VI12_AVX512VL 2 "vector_operand"))
+           (match_operand:VI12_AVX512VL 3 "const0_operand")
+           (match_operand:SI 4 "const0_operand")]
+          UNSPEC_PCMP))]
+  "TARGET_AVX512BW && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
+  [(const_int 0)]
+  {
+    /* LE: 2, NLT: 5.  */
+    rtx cmp_predicate = GEN_INT (2);
+    if (MEM_P (operands[1]))
+      {
+        std::swap (operands[1], operands[2]);
+        cmp_predicate = GEN_INT (5);
+      }
+    emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],
+                                        operands[2], cmp_predicate));
+    DONE;
+  })
+
 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
         (and:<avx512fmaskmode>
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c
new file mode 100644
index 00000000000..ae7ec7abed1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c
@@ -0,0 +1,80 @@
+/* PR target/96906 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -masm=att" } */
+/* { dg-final { scan-assembler-times {(?n)vpcmpub[ \t]*\$2} 6 } } */
+/* { dg-final { scan-assembler-times {(?n)vpcmpuw[ \t]*\$2} 6 } } */
+
+
+#include<immintrin.h>
+
+__mmask8
+ff1 (__m128i x, __m128i y)
+{
+  return _mm_cmp_epi16_mask (_mm_subs_epu16 (x, y), _mm_setzero_si128 (), 0);
+}
+
+__mmask8
+ff2 (__m128i x, __m128i y)
+{
+  return _mm_cmple_epu16_mask (x, y);
+}
+
+__mmask16
+ff3 (__m128i x, __m128i y)
+{
+  return _mm_cmp_epi8_mask (_mm_subs_epu8 (x, y), _mm_setzero_si128 (), 0);
+}
+
+__mmask16
+ff4 (__m128i x, __m128i y)
+{
+  return _mm_cmple_epu8_mask (x, y);
+}
+
+__mmask16
+ff5 (__m256i x, __m256i y)
+{
+  return _mm256_cmp_epi16_mask (_mm256_subs_epu16 (x, y),
_mm256_setzero_si256 (), 0);
+}
+
+__mmask16
+ff6 (__m256i x, __m256i y)
+{
+  return _mm256_cmple_epu16_mask (x, y);
+}
+
+__mmask32
+ff7 (__m256i x, __m256i y)
+{
+  return _mm256_cmp_epi8_mask (_mm256_subs_epu8 (x, y),
_mm256_setzero_si256 (), 0);
+}
+
+__mmask32
+ff8 (__m256i x, __m256i y)
+{
+  return _mm256_cmple_epu8_mask (x, y);
+}
+
+__mmask32
+ff9 (__m512i x, __m512i y)
+{
+  return _mm512_cmp_epi16_mask (_mm512_subs_epu16 (x, y),
_mm512_setzero_si512 (), 0);
+}
+
+__mmask32
+ff10 (__m512i x, __m512i y)
+{
+  return _mm512_cmple_epu16_mask (x, y);
+}
+
+__mmask64
+ff11 (__m512i x, __m512i y)
+{
+  return _mm512_cmp_epi8_mask (_mm512_subs_epu8 (x, y),
_mm512_setzero_si512 (), 0);
+}
+
+__mmask64
+ff12 (__m512i x, __m512i y)
+{
+  return _mm512_cmple_epu8_mask (x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr96906-1.c
b/gcc/testsuite/gcc.target/i386/pr96906-1.c
index 9d836eb2bdd..b1b41bf522d 100644
--- a/gcc/testsuite/gcc.target/i386/pr96906-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr96906-1.c
@@ -1,6 +1,6 @@
 /* PR target/96906 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -mavx2" } */
+/* { dg-options "-O2 -mavx2 -mno-avx512f" } */
 /* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*xmm" 2 } } */
 /* { dg-final { scan-assembler-times "\tvpminuw\[^\n\r]*xmm" 2 } } */
 /* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*ymm" 2 } } */
-- 
2.18.1


More information about the Gcc-patches mailing list