[Bug tree-optimization/98113] New: [11 Regression] popcnt is not vectorized on s390 since f5e18dd9c7da
iii at linux dot ibm.com
gcc-bugzilla@gcc.gnu.org
Thu Dec 3 01:28:53 GMT 2020
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98113
Bug ID: 98113
Summary: [11 Regression] popcnt is not vectorized on s390 since
f5e18dd9c7da
Product: gcc
Version: 11.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: iii at linux dot ibm.com
Target Milestone: ---
s390's vxe/popcount-1.c began to fail after PR96789 fix.
The reason is that for the following source code
uv4si __attribute__((noinline))
vpopctf (uv4si a)
{
uv4si r;
int i;
for (i = 0; i < 4; i++)
r[i] = __builtin_popcount (a[i]);
return r;
}
FRE turned
_4 = BIT_FIELD_REF <aD.2283, 32, 0>;
_11 = __builtin_popcountD.1211 (_4);
_18 = (unsigned intD.9) _11;
BIT_FIELD_REF <rD.2286, 32, 0> = _18;
i_20 = 1;
ivtmp_21 = 3;
_25 = VIEW_CONVERT_EXPR<unsigned intD.9[4]>(aD.2283)[i_20];
_26 = __builtin_popcountD.1211 (_25);
_27 = (unsigned intD.9) _26;
VIEW_CONVERT_EXPR<unsigned intD.9[4]>(rD.2286)[i_20] = _27;
i_29 = i_20 + 1;
ivtmp_30 = ivtmp_21 + 4294967295;
_34 = VIEW_CONVERT_EXPR<unsigned intD.9[4]>(aD.2283)[i_29];
_35 = __builtin_popcountD.1211 (_34);
_36 = (unsigned intD.9) _35;
VIEW_CONVERT_EXPR<unsigned intD.9[4]>(rD.2286)[i_29] = _36;
i_38 = i_29 + 1;
ivtmp_39 = ivtmp_30 + 4294967295;
_1 = VIEW_CONVERT_EXPR<unsigned intD.9[4]>(aD.2283)[i_38];
_2 = __builtin_popcountD.1211 (_1);
_3 = (unsigned intD.9) _2;
VIEW_CONVERT_EXPR<unsigned intD.9[4]>(rD.2286)[i_38] = _3;
i_10 = i_38 + 1;
ivtmp_16 = ivtmp_39 + 4294967295;
_7 = rD.2286;
rD.2286 ={v} {CLOBBER};
return _7;
into
_4 = BIT_FIELD_REF <a_17(D), 32, 0>;
_11 = __builtin_popcountD.1211 (_4);
_18 = (unsigned intD.9) _11;
r_14 = BIT_INSERT_EXPR <r_15(D), _18, 0 (32 bits)>;
_25 = BIT_FIELD_REF <a_17(D), 32, 32>;
_26 = __builtin_popcountD.1211 (_25);
_27 = (unsigned intD.9) _26;
r_33 = BIT_INSERT_EXPR <r_14, _27, 32 (32 bits)>;
_34 = BIT_FIELD_REF <a_17(D), 32, 64>;
_35 = __builtin_popcountD.1211 (_34);
_36 = (unsigned intD.9) _35;
r_32 = BIT_INSERT_EXPR <r_33, _36, 64 (32 bits)>;
_1 = BIT_FIELD_REF <a_17(D), 32, 96>;
_2 = __builtin_popcountD.1211 (_1);
_3 = (unsigned intD.9) _2;
r_31 = BIT_INSERT_EXPR <r_32, _3, 96 (32 bits)>;
_7 = r_31;
return _7;
that is, replaced a sequence of stores with a sequence of
BIT_INSERT_EXPRs.
slp1 now says: "missed: not vectorized: no grouped stores in basic
block", presumably because it doesn't understand BIT_INSERT_EXPRs.
More information about the Gcc-bugs
mailing list