[Bug middle-end/99395] New: s116 benchmark of TSVC is vectorized by clang and not by gcc
hubicka at gcc dot gnu.org
gcc-bugzilla@gcc.gnu.org
Thu Mar 4 23:01:14 GMT 2021
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99395
Bug ID: 99395
Summary: s116 benchmark of TSVC is vectorized by clang and not
by gcc
Product: gcc
Version: 11.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: hubicka at gcc dot gnu.org
Target Milestone: ---
s116 loop is:
real_t s116(struct args_t * func_args)
{
// linear dependence testing
initialise_arrays(__func__);
gettimeofday(&func_args->t1, NULL);
for (int nl = 0; nl < iterations*10; nl++) {
for (int i = 0; i < LEN_1D - 5; i += 5) {
a[i] = a[i + 1] * a[i];
a[i + 1] = a[i + 2] * a[i + 1];
a[i + 2] = a[i + 3] * a[i + 2];
a[i + 3] = a[i + 4] * a[i + 3];
a[i + 4] = a[i + 5] * a[i + 4];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
gettimeofday(&func_args->t2, NULL);
return calc_checksum(__func__);
}
and vectorized code produced by clang11 is about 2 times faster on zen3 machine
0000000000401d00 <s116>:
401d00: 41 56 push %r14
401d02: 53 push %rbx
401d03: 50 push %rax
401d04: 49 89 fe mov %rdi,%r14
401d07: bf 66 e1 42 00 mov $0x42e166,%edi
401d0c: e8 ff 58 01 00 call 417610 <initialise_arrays>
401d11: 31 db xor %ebx,%ebx
401d13: 4c 89 f7 mov %r14,%rdi
401d16: 31 f6 xor %esi,%esi
401d18: e8 43 f3 ff ff call 401060 <gettimeofday@plt>
401d1d: eb 47 jmp 401d66 <s116+0x66>
401d1f: 90 nop
401d20: bf 00 25 45 00 mov $0x452500,%edi
401d25: be 00 31 43 00 mov $0x433100,%esi
401d2a: ba 00 19 47 00 mov $0x471900,%edx
401d2f: b9 00 0d 49 00 mov $0x490d00,%ecx
401d34: 41 b8 00 01 4b 00 mov $0x4b0100,%r8d
401d3a: 41 b9 00 f5 4c 00 mov $0x4cf500,%r9d
401d40: c5 f8 57 c0 vxorps %xmm0,%xmm0,%xmm0
401d44: 68 00 f5 54 00 push $0x54f500
401d49: 68 00 f5 50 00 push $0x50f500
401d4e: e8 6d 3c 01 00 call 4159c0 <dummy>
401d53: 48 83 c4 10 add $0x10,%rsp
401d57: 83 c3 01 add $0x1,%ebx
401d5a: 81 fb 40 42 0f 00 cmp $0xf4240,%ebx
401d60: 0f 84 9a 00 00 00 je 401e00 <s116+0x100>
401d66: c5 fa 10 05 92 07 05 vmovss 0x50792(%rip),%xmm0 #
452500 <a>
401d6d: 00
401d6e: 31 c0 xor %eax,%eax
401d70: c5 fa 10 0c 85 04 25 vmovss 0x452504(,%rax,4),%xmm1
401d77: 45 00
401d79: c5 fa 59 c1 vmulss %xmm1,%xmm0,%xmm0
401d7d: c5 fa 11 04 85 00 25 vmovss %xmm0,0x452500(,%rax,4)
401d84: 45 00
401d86: c5 f8 10 04 85 08 25 vmovups 0x452508(,%rax,4),%xmm0
401d8d: 45 00
401d8f: c5 f0 c6 c8 00 vshufps $0x0,%xmm0,%xmm1,%xmm1
401d94: c5 f0 c6 c8 98 vshufps $0x98,%xmm0,%xmm1,%xmm1
401d99: c5 f8 59 c9 vmulps %xmm1,%xmm0,%xmm1
401d9d: c5 f8 11 0c 85 04 25 vmovups %xmm1,0x452504(,%rax,4)
401da4: 45 00
401da6: 48 3d f5 7c 00 00 cmp $0x7cf5,%rax
401dac: 0f 87 6e ff ff ff ja 401d20 <s116+0x20>
401db2: c4 e3 79 04 c0 e7 vpermilps $0xe7,%xmm0,%xmm0
401db8: c5 fa 10 0c 85 18 25 vmovss 0x452518(,%rax,4),%xmm1
401dbf: 45 00
401dc1: c5 fa 59 c1 vmulss %xmm1,%xmm0,%xmm0
401dc5: c5 fa 11 04 85 14 25 vmovss %xmm0,0x452514(,%rax,4)
401dcc: 45 00
401dce: c5 f8 10 04 85 1c 25 vmovups 0x45251c(,%rax,4),%xmm0
401dd5: 45 00
401dd7: c5 f0 c6 c8 00 vshufps $0x0,%xmm0,%xmm1,%xmm1
401ddc: c5 f0 c6 c8 98 vshufps $0x98,%xmm0,%xmm1,%xmm1
401de1: c5 f8 59 c9 vmulps %xmm1,%xmm0,%xmm1
401de5: c5 fa 10 04 85 28 25 vmovss 0x452528(,%rax,4),%xmm0
401dec: 45 00
401dee: c5 f8 11 0c 85 18 25 vmovups %xmm1,0x452518(,%rax,4)
401df5: 45 00
401df7: 48 83 c0 0a add $0xa,%rax
401dfb: e9 70 ff ff ff jmp 401d70 <s116+0x70>
401e00: 49 83 c6 10 add $0x10,%r14
401e04: 4c 89 f7 mov %r14,%rdi
401e07: 31 f6 xor %esi,%esi
401e09: e8 52 f2 ff ff call 401060 <gettimeofday@plt>
401e0e: bf 66 e1 42 00 mov $0x42e166,%edi
401e13: 48 83 c4 08 add $0x8,%rsp
401e17: 5b pop %rbx
401e18: 41 5e pop %r14
401e1a: e9 e1 51 02 00 jmp 427000 <calc_checksum>
401e1f: 90 nop
More information about the Gcc-bugs
mailing list