[Bug target/95524] New: Subtimal codegen for shift by constant for v16qi/v32qi under -march=skylake
crazylht at gmail dot com
gcc-bugzilla@gcc.gnu.org
Thu Jun 4 06:24:50 GMT 2020
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95524
Bug ID: 95524
Summary: Subtimal codegen for shift by constant for v16qi/v32qi
under -march=skylake
Product: gcc
Version: 11.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: crazylht at gmail dot com
Target Milestone: ---
Target: x86_64-*-* i?86-*-*
cat test.c
---
typedef char v16qi __attribute__ ((vector_size (16)));
typedef char v32qi __attribute__ ((vector_size (32)));
typedef unsigned char v16uqi __attribute__ ((vector_size (16)));
typedef unsigned char v32uqi __attribute__ ((vector_size (32)));
v16qi
ashift (v16qi a)
{
return a<<5;
}
v32qi
ashift2 (v32qi a, v32qi b)
{
return a<<5;
}
v16qi
ashiftrt (v16qi a)
{
return a>>5;
}
v32qi
arshiftrt2 (v32qi a)
{
return a>>5;
}
v16uqi
lshiftrt (v16uqi a)
{
return a>>5;
}
v32uqi
lshiftrt2 (v32uqi a)
{
return a>>5;
}
---
gcc11 -O2 -march=skylake
---
ashift(char __vector(16)):
vpaddb xmm0, xmm0, xmm0
vpaddb xmm0, xmm0, xmm0
vpaddb xmm0, xmm0, xmm0
vpaddb xmm0, xmm0, xmm0
vpaddb xmm0, xmm0, xmm0
ret
ashift2(char __vector(32), char __vector(32)):
vpaddb ymm0, ymm0, ymm0
vpaddb ymm0, ymm0, ymm0
vpaddb ymm0, ymm0, ymm0
vpaddb ymm0, ymm0, ymm0
vpaddb ymm0, ymm0, ymm0
ret
ashiftrt(char __vector(16)):
vpmovsxbw xmm2, xmm0
vpsrldq xmm1, xmm0, 8
vpmovsxbw xmm1, xmm1
vpsraw xmm0, xmm2, 5
vmovdqa xmm2, XMMWORD PTR .LC0[rip]
vpsraw xmm1, xmm1, 5
vpand xmm0, xmm2, xmm0
vpand xmm2, xmm2, xmm1
vpackuswb xmm0, xmm0, xmm2
ret
arshiftrt2(char __vector(32)):
vmovdqa ymm1, ymm0
vextracti128 xmm1, ymm1, 0x1
vmovdqa ymm2, YMMWORD PTR .LC1[rip]
vpmovsxbw ymm0, xmm0
vpmovsxbw ymm1, xmm1
vpsraw ymm1, ymm1, 5
vpsraw ymm0, ymm0, 5
vpand ymm0, ymm2, ymm0
vpand ymm2, ymm2, ymm1
vpackuswb ymm0, ymm0, ymm2
vpermq ymm0, ymm0, 216
ret
lshiftrt(unsigned char __vector(16)):
vpmovzxbw xmm2, xmm0
vpsrldq xmm1, xmm0, 8
vpmovzxbw xmm1, xmm1
vpsrlw xmm0, xmm2, 5
vmovdqa xmm2, XMMWORD PTR .LC0[rip]
vpsrlw xmm1, xmm1, 5
vpand xmm0, xmm2, xmm0
vpand xmm2, xmm2, xmm1
vpackuswb xmm0, xmm0, xmm2
ret
lshiftrt2(unsigned char __vector(32)):
vmovdqa ymm1, ymm0
vextracti128 xmm1, ymm1, 0x1
vmovdqa ymm2, YMMWORD PTR .LC1[rip]
vpmovzxbw ymm0, xmm0
vpmovzxbw ymm1, xmm1
vpsrlw ymm1, ymm1, 5
vpsrlw ymm0, ymm0, 5
vpand ymm0, ymm2, ymm0
vpand ymm2, ymm2, ymm1
vpackuswb ymm0, ymm0, ymm2
vpermq ymm0, ymm0, 216
ret
.LC0:
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.LC1:
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
.value 255
---
icc has
---
ashift(char __vector(16)):
vpsllw xmm1, xmm0, 5 #9.16
vpand xmm0, xmm1, XMMWORD PTR .L_2il0floatpacket.0[rip] #9.16
ret #9.16
ashift2(char __vector(32), char __vector(32)):
vpsllw ymm2, ymm0, 5 #15.16
vpand ymm0, ymm2, YMMWORD PTR .L_2il0floatpacket.1[rip] #15.16
ret #15.16
ashiftrt(char __vector(16)):
vpsrlw xmm1, xmm0, 5 #21.16
vpand xmm0, xmm1, XMMWORD PTR .L_2il0floatpacket.2[rip] #21.16
ret #21.16
arshiftrt2(char __vector(32)):
vpsrlw ymm1, ymm0, 5 #27.16
vpand ymm0, ymm1, YMMWORD PTR .L_2il0floatpacket.3[rip] #27.16
ret #27.16
lshiftrt(unsigned char __vector(16)):
vpsrlw xmm1, xmm0, 5 #33.16
vpand xmm0, xmm1, XMMWORD PTR .L_2il0floatpacket.2[rip] #33.16
ret #33.16
lshiftrt2(unsigned char __vector(32)):
vpsrlw ymm1, ymm0, 5 #39.16
vpand ymm0, ymm1, YMMWORD PTR .L_2il0floatpacket.3[rip] #39.16
ret #39.16
.L_2il0floatpacket.1:
.long
0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0
.L_2il0floatpacket.3:
.long
0x07070707,0x07070707,0x07070707,0x07070707,0x07070707,0x07070707,0x07070707,0x07070707
.L_2il0floatpacket.0:
.long 0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0
.L_2il0floatpacket.2:
.long 0x07070707,0x07070707,0x07070707,0x07070707
---
icc take much less instructions than gcc.
More information about the Gcc-bugs
mailing list