[Bug target/93183] New: SVE only makes the last thing conditional with ifconversion
pinskia at gcc dot gnu.org
gcc-bugzilla@gcc.gnu.org
Tue Jan 7 10:35:00 GMT 2020
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93183
Bug ID: 93183
Summary: SVE only makes the last thing conditional with
ifconversion
Product: gcc
Version: 10.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: enhancement
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: pinskia at gcc dot gnu.org
Target Milestone: ---
Target: aarch64-linux-gnu
Take:
typedef unsigned char uint8_t;
static inline uint8_t x264_clip_uint8( uint8_t x )
{
uint8_t t = -x;
t = t>>7;
uint8_t t1 = x&(~63);
return t1 ? t : x;
}
void mc_weight( uint8_t *__restrict dst, uint8_t *__restrict src, int n)
{
for( int x = 0; x < n*16; x++ )
dst[x] = x264_clip_uint8(src[x]);
}
---- CUT ----
Currently (with -O3 -mcpu=generic+sve), we get for the inner loop:
.L3:
ld1b z0.b, p0/z, [x1, x3]
movprfx z2, z0
and z2.b, z2.b, #0xc0
neg z1.b, p1/m, z0.b
cmpne p2.b, p1/z, z2.b, #0
movprfx z0.b, p2/m, z1.b
lsr z0.b, p2/m, z0.b, #7
st1b z0.b, p0, [x0, x3]
incb x3
whilelo p0.b, w3, w2
b.any .L3
--- CUT ---
But we should be able to do:
.L3:
ld1b z0.b, p0/z, [x1, x3]
and z2.b, z0.b, #0xc0
cmpne p2.b, p1/z, z2.b, #0 ;;; << I think p1/z here really should be
p0/z also
neg z0.b, p2/m, z0.b ;;; <<< This one should be conditional
lsr z0.b, p2/m, z0.b, #7
st1b z0.b, p0, [x0, x3]
incb x3
whilelo p0.b, w3, w2
b.any .L3
More information about the Gcc-bugs
mailing list