[Bug target/97770] [ICELAKE]Missing vectorization for vpopcnt

crazylht at gmail dot com gcc-bugzilla@gcc.gnu.org
Tue Nov 10 03:04:37 GMT 2020


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97770

--- Comment #2 from Hongtao.liu <crazylht at gmail dot com> ---
After adding expander, successfully vectorize the loop.
---
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b153a87fb98..e8159997c40 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -22678,6 +22678,12 @@ (define_insn "avx5124vnniw_vp4dpwssds_maskz"
     (set_attr ("prefix") ("evex"))
     (set_attr ("mode") ("TI"))])

+(define_expand "popcount<mode>2"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+       (popcount:VI48_AVX512VL
+         (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
+  "TARGET_AVX512VPOPCNTDQ")
+
 (define_insn "vpopcount<mode><mask_name>"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
        (popcount:VI48_AVX512VL
@@ -22722,6 +22728,12 @@ (define_insn "*restore_multiple_leave_return<mode>"
   "TARGET_SSE && TARGET_64BIT"
   "jmp\t%P1")

+(define_insn "popcount<mode>2"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+       (popcount:VI12_AVX512VL
+         (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512BITALG")
+
 (define_insn "vpopcount<mode><mask_name>"
   [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
        (popcount:VI12_AVX512VL

---

But for vector byte/word/quadword, vectorizer still use vpopcntd, but not
vpopcnt{b,w,q}, missing corresponding ifn?

void
fooq(long long* __restrict dest, long long* src)
{
  for (int i = 0; i != 4; i++)
    dest[i] = __builtin_popcount (src[i]);
}

void
foow(short* __restrict dest, short* src)
{
  for (int i = 0; i != 16; i++)
    dest[i] = __builtin_popcount (src[i]);
}

void
foob(char* __restrict dest, char* src)
{
  for (int i = 0; i != 32; i++)
    dest[i] = __builtin_popcount (src[i]);
}


dump of test.c.164.vect

;; Function foow (foow, funcdef_no=0, decl_uid=4228, cgraph_uid=1,
symbol_order=0)

Merging blocks 2 and 6
foow (short int * restrict dest, short int * src)
{
  vector(8) short int * vectp_dest.10;
  vector(8) short int * vectp_dest.9;
  vector(8) short int vect__8.8;
  vector(4) int vect__6.7;
  vector(4) unsigned int vect__5.6;
  vector(8) short int vect__4.5;
  vector(8) short int * vectp_src.4;
  vector(8) short int * vectp_src.3;
  int i;
  long unsigned int _1;
  long unsigned int _2;
  short int * _3;
  short int _4;
  unsigned int _5;
  int _6;
  short int * _7;
  short int _8;
  unsigned int ivtmp_26;
  unsigned int ivtmp_28;
  unsigned int ivtmp_34;
  unsigned int ivtmp_35;

  <bb 2> [local count: 119292720]:

  <bb 3> [local count: 119292719]:
  # i_19 = PHI <i_15(5), 0(2)>
  # ivtmp_35 = PHI <ivtmp_34(5), 8(2)>
  # vectp_src.3_24 = PHI <vectp_src.3_23(5), src_12(D)(2)>
  # vectp_dest.9_9 = PHI <vectp_dest.9_29(5), dest_13(D)(2)>
  # ivtmp_26 = PHI <ivtmp_28(5), 0(2)>
  _1 = (long unsigned int) i_19;
  _2 = _1 * 2;
  _3 = src_12(D) + _2;
  vect__4.5_22 = MEM <vector(8) short int> [(short int *)vectp_src.3_24];
  _4 = *_3;
  vect__5.6_21 = [vec_unpack_lo_expr] vect__4.5_22;
  vect__5.6_18 = [vec_unpack_hi_expr] vect__4.5_22;
  _5 = (unsigned int) _4;
  vect__6.7_17 = .POPCOUNT (vect__5.6_21);
  vect__6.7_16 = .POPCOUNT (vect__5.6_18);
  _6 = 0;
  _7 = dest_13(D) + _2;
  vect__8.8_10 = VEC_PACK_TRUNC_EXPR <vect__6.7_17, vect__6.7_16>;
  _8 = (short int) _6;
  MEM <vector(8) short int> [(short int *)vectp_dest.9_9] = vect__8.8_10;
  i_15 = i_19 + 1;
  ivtmp_34 = ivtmp_35 - 1;
  vectp_src.3_23 = vectp_src.3_24 + 16;
  vectp_dest.9_29 = vectp_dest.9_9 + 16;
  ivtmp_28 = ivtmp_26 + 1;
  if (ivtmp_28 < 1)
    goto <bb 5>; [0.00%]
  else
    goto <bb 4>; [100.00%]

  <bb 5> [local count: 0]:
  goto <bb 3>; [100.00%]

  <bb 4> [local count: 119292720]:
  return;

}


More information about the Gcc-bugs mailing list