[Bug c++/64704] New: software crashed when using vectorizing optimization
zhangyajie_koy at 126 dot com
gcc-bugzilla@gcc.gnu.org
Wed Jan 21 02:37:00 GMT 2015
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64704
Bug ID: 64704
Summary: software crashed when using vectorizing optimization
Product: gcc
Version: 4.8.2
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
Assignee: unassigned at gcc dot gnu.org
Reporter: zhangyajie_koy at 126 dot com
when executing the following for() loop,the system crash.
uint16 MessageBuffer::icmp6Checksum(int update)
{
TRACE_FUNCTION_ENTRY("");
register uint32 sum = 0xffff;
struct icmp6_hdr *icmp6Ptr = NULL;
uint8 type = findPayloadType((void**)&icmp6Ptr);
register int i;
uint16 len = getLength();
register uint16 *ptr = (uint16 *)icmp6Ptr;
for (i = 0; i < len - 1; i += 2)
{
sum += *ptr++;
}
return (sum);
}
this code runs OK,when the compiler is 4.4.1, OS is Ubuntu9.10. when the
compiler is 4.8.2,OS is Ubuntu14.04,it is crash. I check the assemble code of
this for()loop, when using 4.8.2, it optimized by 2 ways. first, loop
unrolling, it preunrolled by 10 times. second, the auto vectorizing
optimization.
after several test,i find that, when the actual loops are less than 10,it runs
OK,while, if greater than 10, it is crashed. so, it must be something wrong
with the auto vectorizing.
when i modify the makefile to close the auto vectorizing optimization using
-O3 -fno-tree-vectorize,it is OK. the assemble code for the for() loop is shown
as below.
for loop unrolling optimization begin:
13081bc: 45 8d 4d ff lea -0x1(%r13),%r9d
13081c0: 45 85 c9 test %r9d,%r9d
13081c3: 0f 8e 9e 02 00 00 jle 1308467
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3a7>
13081c9: 41 8d 75 fe lea -0x2(%r13),%esi
13081cd: 48 89 da mov %rbx,%rdx
13081d0: 83 e2 0f and $0xf,%edx
13081d3: d1 ee shr %esi
13081d5: 48 d1 ea shr %rdx
13081d8: 8d 7e 01 lea 0x1(%rsi),%edi
13081db: 48 f7 da neg %rdx
13081de: 83 e2 07 and $0x7,%edx
13081e1: 39 d7 cmp %edx,%edi
13081e3: 89 f9 mov %edi,%ecx
13081e5: 0f 46 d7 cmovbe %edi,%edx
13081e8: 83 ff 0a cmp $0xa,%edi
13081eb: 0f 87 0f 02 00 00 ja 1308400
<_ZN13MessageBuffer13icmp6ChecksumEi+0x340>
13081f1: 44 0f b7 03 movzwl (%rbx),%r8d
13081f5: 48 8d 53 02 lea 0x2(%rbx),%rdx
13081f9: 44 01 c0 add %r8d,%eax
13081fc: 83 f9 01 cmp $0x1,%ecx
13081ff: 0f 86 95 02 00 00 jbe 130849a
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3da>
1308205: 44 0f b7 43 02 movzwl 0x2(%rbx),%r8d
130820a: 48 8d 53 04 lea 0x4(%rbx),%rdx
130820e: 44 01 c0 add %r8d,%eax
1308211: 83 f9 02 cmp $0x2,%ecx
1308214: 0f 86 75 02 00 00 jbe 130848f
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3cf>
130821a: 44 0f b7 43 04 movzwl 0x4(%rbx),%r8d
130821f: 48 8d 53 06 lea 0x6(%rbx),%rdx
1308223: 44 01 c0 add %r8d,%eax
1308226: 83 f9 03 cmp $0x3,%ecx
1308229: 0f 86 97 02 00 00 jbe 13084c6
<_ZN13MessageBuffer13icmp6ChecksumEi+0x406>
130822f: 44 0f b7 43 06 movzwl 0x6(%rbx),%r8d
1308234: 48 8d 53 08 lea 0x8(%rbx),%rdx
1308238: 44 01 c0 add %r8d,%eax
130823b: 83 f9 04 cmp $0x4,%ecx
130823e: 0f 86 77 02 00 00 jbe 13084bb
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3fb>
1308244: 44 0f b7 43 08 movzwl 0x8(%rbx),%r8d
1308249: 48 8d 53 0a lea 0xa(%rbx),%rdx
130824d: 44 01 c0 add %r8d,%eax
1308250: 83 f9 05 cmp $0x5,%ecx
1308253: 0f 86 57 02 00 00 jbe 13084b0
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3f0>
1308259: 44 0f b7 43 0a movzwl 0xa(%rbx),%r8d
130825e: 48 8d 53 0c lea 0xc(%rbx),%rdx
1308262: 44 01 c0 add %r8d,%eax
1308265: 83 f9 06 cmp $0x6,%ecx
1308268: 0f 86 37 02 00 00 jbe 13084a5
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3e5>
130826e: 44 0f b7 43 0c movzwl 0xc(%rbx),%r8d
1308273: 48 8d 53 0e lea 0xe(%rbx),%rdx
1308277: 44 01 c0 add %r8d,%eax
130827a: 83 f9 07 cmp $0x7,%ecx
130827d: 0f 86 f6 01 00 00 jbe 1308479
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3b9>
1308283: 44 0f b7 43 0e movzwl 0xe(%rbx),%r8d
1308288: 48 8d 53 10 lea 0x10(%rbx),%rdx
130828c: 44 01 c0 add %r8d,%eax
130828f: 83 f9 08 cmp $0x8,%ecx
1308292: 0f 86 d6 01 00 00 jbe 130846e
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3ae>
1308298: 44 0f b7 43 10 movzwl 0x10(%rbx),%r8d
130829d: 48 8d 53 12 lea 0x12(%rbx),%rdx
13082a1: 44 01 c0 add %r8d,%eax
13082a4: 83 f9 09 cmp $0x9,%ecx
13082a7: 0f 86 d7 01 00 00 jbe 1308484
<_ZN13MessageBuffer13icmp6ChecksumEi+0x3c4>
13082ad: 44 0f b7 43 12 movzwl 0x12(%rbx),%r8d
13082b2: 48 8d 53 14 lea 0x14(%rbx),%rdx
13082b6: 44 01 c0 add %r8d,%eax
13082b9: 41 b8 14 00 00 00 mov $0x14,%r8d
13082bf: 39 f9 cmp %edi,%ecx
13082c1: 0f 84 e2 00 00 00 je 13083a9
<_ZN13MessageBuffer13icmp6ChecksumEi+0x2e9>
for loop unrolling optimization end:
for loop auto vectorizing optimization begin:
13082c7: 41 89 fe mov %edi,%r14d
13082ca: 41 89 ca mov %ecx,%r10d
13082cd: 41 29 ce sub %ecx,%r14d
13082d0: 44 89 f5 mov %r14d,%ebp
13082d3: c1 ed 03 shr $0x3,%ebp
13082d6: 8d 0c ed 00 00 00 00 lea 0x0(,%rbp,8),%ecx
13082dd: 85 c9 test %ecx,%ecx
13082df: 74 69 je 130834a
<_ZN13MessageBuffer13icmp6ChecksumEi+0x28a>
13082e1: 66 0f ef c0 pxor %xmm0,%xmm0
13082e5: 4e 8d 1c 53 lea (%rbx,%r10,2),%r11
13082e9: 66 0f ef d2 pxor %xmm2,%xmm2
13082ed: 45 31 d2 xor %r10d,%r10d
13082f0: 66 41 0f 6f 0b movdqa (%r11),%xmm1
13082f5: 41 83 c2 01 add $0x1,%r10d
13082f9: 49 83 c3 10 add $0x10,%r11
13082fd: 44 39 d5 cmp %r10d,%ebp
1308300: 66 0f 6f e1 movdqa %xmm1,%xmm4
1308304: 66 0f 69 ca punpckhwd %xmm2,%xmm1
1308308: 66 0f 61 e2 punpcklwd %xmm2,%xmm4
130830c: 66 0f fe c4 paddd %xmm4,%xmm0
1308310: 66 0f fe c1 paddd %xmm1,%xmm0
1308314: 77 da ja 13082f0
<_ZN13MessageBuffer13icmp6ChecksumEi+0x230>
1308316: 66 0f 6f e8 movdqa %xmm0,%xmm5
130831a: 41 89 ca mov %ecx,%r10d
130831d: 45 8d 04 48 lea (%r8,%rcx,2),%r8d
1308321: 4a 8d 14 52 lea (%rdx,%r10,2),%rdx
1308325: 66 0f 73 dd 08 psrldq $0x8,%xmm5
130832a: 66 0f fe c5 paddd %xmm5,%xmm0
130832e: 66 0f 6f f0 movdqa %xmm0,%xmm6
1308332: 66 0f 73 de 04 psrldq $0x4,%xmm6
1308337: 66 0f fe c6 paddd %xmm6,%xmm0
130833b: 66 0f 7e 44 24 0c movd %xmm0,0xc(%rsp)
1308341: 03 44 24 0c add 0xc(%rsp),%eax
1308345: 41 39 ce cmp %ecx,%r14d
1308348: 74 5f je 13083a9
<_ZN13MessageBuffer13icmp6ChecksumEi+0x2e9>
130834a: 0f b7 0a movzwl (%rdx),%ecx
130834d: 01 c8 add %ecx,%eax
130834f: 41 8d 48 02 lea 0x2(%r8),%ecx
1308353: 44 39 c9 cmp %r9d,%ecx
1308356: 7d 51 jge 13083a9
<_ZN13MessageBuffer13icmp6ChecksumEi+0x2e9>
1308358: 0f b7 4a 02 movzwl 0x2(%rdx),%ecx
130835c: 01 c8 add %ecx,%eax
130835e: 41 8d 48 04 lea 0x4(%r8),%ecx
1308362: 41 39 c9 cmp %ecx,%r9d
1308365: 7e 42 jle 13083a9
<_ZN13MessageBuffer13icmp6ChecksumEi+0x2e9>
1308367: 0f b7 4a 04 movzwl 0x4(%rdx),%ecx
130836b: 01 c8 add %ecx,%eax
130836d: 41 8d 48 06 lea 0x6(%r8),%ecx
1308371: 41 39 c9 cmp %ecx,%r9d
1308374: 7e 33 jle 13083a9
<_ZN13MessageBuffer13icmp6ChecksumEi+0x2e9>
1308376: 0f b7 4a 06 movzwl 0x6(%rdx),%ecx
130837a: 01 c8 add %ecx,%eax
130837c: 41 8d 48 08 lea 0x8(%r8),%ecx
1308380: 41 39 c9 cmp %ecx,%r9d
1308383: 7e 24 jle 13083a9
<_ZN13MessageBuffer13icmp6ChecksumEi+0x2e9>
1308385: 0f b7 4a 08 movzwl 0x8(%rdx),%ecx
1308389: 01 c8 add %ecx,%eax
130838b: 41 8d 48 0a lea 0xa(%r8),%ecx
130838f: 41 39 c9 cmp %ecx,%r9d
1308392: 7e 15 jle 13083a9
<_ZN13MessageBuffer13icmp6ChecksumEi+0x2e9>
1308394: 0f b7 4a 0a movzwl 0xa(%rdx),%ecx
1308398: 41 83 c0 0c add $0xc,%r8d
130839c: 01 c8 add %ecx,%eax
130839e: 45 39 c1 cmp %r8d,%r9d
13083a1: 7e 06 jle 13083a9
<_ZN13MessageBuffer13icmp6ChecksumEi+0x2e9>
13083a3: 0f b7 52 0c movzwl 0xc(%rdx),%edx
13083a7: 01 d0 add %edx,%eax
13083a9: 48 8d 5c 73 02 lea 0x2(%rbx,%rsi,2),%rbx
13083ae: 01 ff add %edi,%edi
for loop auto vectorizing optimization end:
our cpu info is:
Intel(R) Xeon(R) CPU E5-2687W 0 @ 3.10GHz
i have 2cpu, 8cores for each cpu.
More information about the Gcc-bugs
mailing list