[Bug target/97770] [ICELAKE]Missing vectorization for vpopcnt
crazylht at gmail dot com
gcc-bugzilla@gcc.gnu.org
Tue Nov 10 03:04:37 GMT 2020
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97770
--- Comment #2 from Hongtao.liu <crazylht at gmail dot com> ---
After adding expander, successfully vectorize the loop.
---
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b153a87fb98..e8159997c40 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -22678,6 +22678,12 @@ (define_insn "avx5124vnniw_vp4dpwssds_maskz"
(set_attr ("prefix") ("evex"))
(set_attr ("mode") ("TI"))])
+(define_expand "popcount<mode>2"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+ (popcount:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
+ "TARGET_AVX512VPOPCNTDQ")
+
(define_insn "vpopcount<mode><mask_name>"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(popcount:VI48_AVX512VL
@@ -22722,6 +22728,12 @@ (define_insn "*restore_multiple_leave_return<mode>"
"TARGET_SSE && TARGET_64BIT"
"jmp\t%P1")
+(define_insn "popcount<mode>2"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ (popcount:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512BITALG")
+
(define_insn "vpopcount<mode><mask_name>"
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
(popcount:VI12_AVX512VL
---
But for vector byte/word/quadword, vectorizer still use vpopcntd, but not
vpopcnt{b,w,q}, missing corresponding ifn?
void
fooq(long long* __restrict dest, long long* src)
{
for (int i = 0; i != 4; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
foow(short* __restrict dest, short* src)
{
for (int i = 0; i != 16; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
foob(char* __restrict dest, char* src)
{
for (int i = 0; i != 32; i++)
dest[i] = __builtin_popcount (src[i]);
}
dump of test.c.164.vect
;; Function foow (foow, funcdef_no=0, decl_uid=4228, cgraph_uid=1,
symbol_order=0)
Merging blocks 2 and 6
foow (short int * restrict dest, short int * src)
{
vector(8) short int * vectp_dest.10;
vector(8) short int * vectp_dest.9;
vector(8) short int vect__8.8;
vector(4) int vect__6.7;
vector(4) unsigned int vect__5.6;
vector(8) short int vect__4.5;
vector(8) short int * vectp_src.4;
vector(8) short int * vectp_src.3;
int i;
long unsigned int _1;
long unsigned int _2;
short int * _3;
short int _4;
unsigned int _5;
int _6;
short int * _7;
short int _8;
unsigned int ivtmp_26;
unsigned int ivtmp_28;
unsigned int ivtmp_34;
unsigned int ivtmp_35;
<bb 2> [local count: 119292720]:
<bb 3> [local count: 119292719]:
# i_19 = PHI <i_15(5), 0(2)>
# ivtmp_35 = PHI <ivtmp_34(5), 8(2)>
# vectp_src.3_24 = PHI <vectp_src.3_23(5), src_12(D)(2)>
# vectp_dest.9_9 = PHI <vectp_dest.9_29(5), dest_13(D)(2)>
# ivtmp_26 = PHI <ivtmp_28(5), 0(2)>
_1 = (long unsigned int) i_19;
_2 = _1 * 2;
_3 = src_12(D) + _2;
vect__4.5_22 = MEM <vector(8) short int> [(short int *)vectp_src.3_24];
_4 = *_3;
vect__5.6_21 = [vec_unpack_lo_expr] vect__4.5_22;
vect__5.6_18 = [vec_unpack_hi_expr] vect__4.5_22;
_5 = (unsigned int) _4;
vect__6.7_17 = .POPCOUNT (vect__5.6_21);
vect__6.7_16 = .POPCOUNT (vect__5.6_18);
_6 = 0;
_7 = dest_13(D) + _2;
vect__8.8_10 = VEC_PACK_TRUNC_EXPR <vect__6.7_17, vect__6.7_16>;
_8 = (short int) _6;
MEM <vector(8) short int> [(short int *)vectp_dest.9_9] = vect__8.8_10;
i_15 = i_19 + 1;
ivtmp_34 = ivtmp_35 - 1;
vectp_src.3_23 = vectp_src.3_24 + 16;
vectp_dest.9_29 = vectp_dest.9_9 + 16;
ivtmp_28 = ivtmp_26 + 1;
if (ivtmp_28 < 1)
goto <bb 5>; [0.00%]
else
goto <bb 4>; [100.00%]
<bb 5> [local count: 0]:
goto <bb 3>; [100.00%]
<bb 4> [local count: 119292720]:
return;
}
More information about the Gcc-bugs
mailing list