This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Potential builtin memcpy bug in 4.9
- From: Zan Lynx <zlynx at acm dot org>
- To: gcc at gcc dot gnu dot org
- Date: Tue, 10 Mar 2015 15:01:42 -0600
- Subject: Potential builtin memcpy bug in 4.9
- Authentication-results: sourceware.org; auth=none
I am trying to track down a bug that I only see on Fedora 21 with the
GCC 4.9.2 compiler building x86_64 code. It might have started happening
earlier. GCC 4.8 built without this problem.
I am building the c-ares library as part of a larger project and getting
malloc failures. Valgrind claims that code is writing outside its
allocated blocks. I traced it to the memcpy call
memcpy(query->tcpbuf + 2, qbuf, qlen);
In that call qlen == 35. I checked the malloc and it allocates 37 bytes
for tcpbuf. And it has worked on older compilers for a long time.
As best I can tell the builtin memcpy that is being used here (and it is
definitely the builtin because turning off builtins builds working code)
is writing way past the end of the buffer.
But for whatever reason I can't seem to build a stand-alone example.
Looking for some ideas. Maybe someone could audit the ASM code for the
memcpy builtin, see if anything jumps out at you. I haven't tried that
yet. Is it all one piece, or is it multiple chunks? Could it have bad
ASM specifications which are allowing the optimizer to write into a
register that should be preserve?
Here's the asm for the function along with some commentary:
00000000001b1a57 <ares_send>:
1b1a57: 41 57 push %r15
1b1a59: 41 56 push %r14
1b1a5b: 41 55 push %r13
1b1a5d: 41 54 push %r12
1b1a5f: 55 push %rbp
1b1a60: 53 push %rbx
1b1a61: 48 83 ec 28 sub $0x28,%rsp
1b1a65: 89 d5 mov %edx,%ebp
1b1a67: 49 89 ce mov %rcx,%r14
1b1a6a: 4d 89 c5 mov %r8,%r13
1b1a6d: 8d 42 f4 lea -0xc(%rdx),%eax
1b1a70: 3d f3 ff 00 00 cmp $0xfff3,%eax
1b1a75: 76 21 jbe 1b1a98 <ares_send+0x41>
1b1a77: 45 31 c0 xor %r8d,%r8d
1b1a7a: 31 c9 xor %ecx,%ecx
1b1a7c: 31 d2 xor %edx,%edx
1b1a7e: be 07 00 00 00 mov $0x7,%esi
1b1a83: 4c 89 ef mov %r13,%rdi
1b1a86: 41 ff d6 callq *%r14
1b1a89: 48 83 c4 28 add $0x28,%rsp
1b1a8d: 5b pop %rbx
1b1a8e: 5d pop %rbp
1b1a8f: 41 5c pop %r12
1b1a91: 41 5d pop %r13
1b1a93: 41 5e pop %r14
1b1a95: 41 5f pop %r15
1b1a97: c3 retq
1b1a98: 49 89 fc mov %rdi,%r12
1b1a9b: 49 89 f7 mov %rsi,%r15
1b1a9e: bf c8 00 00 00 mov $0xc8,%edi
1b1aa3: e8 78 f4 ea ff callq 60f20 <malloc@plt>
1b1aa8: 48 89 c3 mov %rax,%rbx
1b1aab: 48 85 c0 test %rax,%rax
1b1aae: 0f 84 a0 02 00 00 je 1b1d54 <ares_send+0x2fd>
1b1ab4: 8d 45 02 lea 0x2(%rbp),%eax
1b1ab7: 89 44 24 0c mov %eax,0xc(%rsp)
1b1abb: 48 63 f8 movslq %eax,%rdi
1b1abe: e8 5d f4 ea ff callq 60f20 <malloc@plt>
1b1ac3: 48 89 43 78 mov %rax,0x78(%rbx)
1b1ac7: 48 85 c0 test %rax,%rax
1b1aca: 0f 84 7c 02 00 00 je 1b1d4c <ares_send+0x2f5>
1b1ad0: 48 89 04 24 mov %rax,(%rsp)
1b1ad4: 49 63 bc 24 98 00 00 movslq 0x98(%r12),%rdi
1b1adb: 00
1b1adc: 89 7c 24 08 mov %edi,0x8(%rsp)
1b1ae0: 48 c1 e7 03 shl $0x3,%rdi
1b1ae4: e8 37 f4 ea ff callq 60f20 <malloc@plt>
1b1ae9: 48 89 c7 mov %rax,%rdi
1b1aec: 48 89 83 b0 00 00 00 mov %rax,0xb0(%rbx)
1b1af3: 48 85 c0 test %rax,%rax
1b1af6: 8b 4c 24 08 mov 0x8(%rsp),%ecx
1b1afa: 48 8b 14 24 mov (%rsp),%rdx
1b1afe: 0f 84 40 02 00 00 je 1b1d44 <ares_send+0x2ed>
1b1b04: 41 0f b6 07 movzbl (%r15),%eax
1b1b08: c1 e0 08 shl $0x8,%eax
1b1b0b: 45 0f b6 47 01 movzbl 0x1(%r15),%r8d
1b1b10: 44 09 c0 or %r8d,%eax
1b1b13: 66 89 03 mov %ax,(%rbx)
1b1b16: 48 c7 43 08 00 00 00 movq $0x0,0x8(%rbx)
1b1b1d: 00
1b1b1e: 48 c7 43 10 00 00 00 movq $0x0,0x10(%rbx)
1b1b25: 00
1b1b26: 89 e8 mov %ebp,%eax
1b1b28: c1 f8 08 sar $0x8,%eax
1b1b2b: 88 02 mov %al,(%rdx)
1b1b2d: 40 88 6a 01 mov %bpl,0x1(%rdx)
*** HERE IS WHERE IT STARTS: Getting query->tcpbuf + 2 ***
*** ebp has the length value 35 ***
*** memcpy(query->tcpbuf + 2, qbuf, qlen); ***
1b1b31: 4c 8d 4a 02 lea 0x2(%rdx),%r9
1b1b35: 89 e8 mov %ebp,%eax
1b1b37: 4d 89 c8 mov %r9,%r8
1b1b3a: 4c 89 fe mov %r15,%rsi
1b1b3d: 83 fd 20 cmp $0x20,%ebp
1b1b40: 0f 83 96 01 00 00 jae 1b1cdc <ares_send+0x285>
*** That jae took the jump down to 1b1cdc ***
1b1b46: 83 e0 1f and $0x1f,%eax
1b1b49: 74 15 je 1b1b60 <ares_send+0x109>
1b1b4b: 31 d2 xor %edx,%edx
1b1b4d: 41 89 d2 mov %edx,%r10d
1b1b50: 46 0f b6 1c 16 movzbl (%rsi,%r10,1),%r11d
1b1b55: 47 88 1c 10 mov %r11b,(%r8,%r10,1)
1b1b59: 83 c2 01 add $0x1,%edx
1b1b5c: 39 c2 cmp %eax,%edx
1b1b5e: 72 ed jb 1b1b4d <ares_send+0xf6>
1b1b60: 8b 44 24 0c mov 0xc(%rsp),%eax
1b1b64: 89 83 80 00 00 00 mov %eax,0x80(%rbx)
1b1b6a: 4c 89 8b 88 00 00 00 mov %r9,0x88(%rbx)
1b1b71: 89 ab 90 00 00 00 mov %ebp,0x90(%rbx)
1b1b77: 4c 89 b3 98 00 00 00 mov %r14,0x98(%rbx)
1b1b7e: 4c 89 ab a0 00 00 00 mov %r13,0xa0(%rbx)
1b1b85: c7 83 a8 00 00 00 00 movl $0x0,0xa8(%rbx)
1b1b8c: 00 00 00
1b1b8f: 41 8b 84 24 b0 01 00 mov 0x1b0(%r12),%eax
1b1b96: 00
1b1b97: 89 83 ac 00 00 00 mov %eax,0xac(%rbx)
1b1b9d: 41 83 7c 24 10 01 cmpl $0x1,0x10(%r12)
1b1ba3: 0f 84 cc 01 00 00 je 1b1d75 <ares_send+0x31e>
1b1ba9: 85 c9 test %ecx,%ecx
1b1bab: 7e 64 jle 1b1c11 <ares_send+0x1ba>
1b1bad: 8d 51 fe lea -0x2(%rcx),%edx
1b1bb0: d1 ea shr %edx
1b1bb2: 83 c2 01 add $0x1,%edx
1b1bb5: 8d 04 12 lea (%rdx,%rdx,1),%eax
1b1bb8: 8d 71 ff lea -0x1(%rcx),%esi
1b1bbb: 83 fe 01 cmp $0x1,%esi
1b1bbe: 0f 86 db 01 00 00 jbe 1b1d9f <ares_send+0x348>
1b1bc4: 31 f6 xor %esi,%esi
1b1bc6: 66 0f ef c0 pxor %xmm0,%xmm0
1b1bca: 49 89 f0 mov %rsi,%r8
1b1bcd: 49 c1 e0 04 shl $0x4,%r8
1b1bd1: f3 42 0f 7f 04 07 movdqu %xmm0,(%rdi,%r8,1)
1b1bd7: 48 83 c6 01 add $0x1,%rsi
1b1bdb: 39 f2 cmp %esi,%edx
1b1bdd: 77 eb ja 1b1bca <ares_send+0x173>
1b1bdf: 39 c8 cmp %ecx,%eax
1b1be1: 74 2e je 1b1c11 <ares_send+0x1ba>
1b1be3: 48 63 d0 movslq %eax,%rdx
1b1be6: 48 8d 14 d7 lea (%rdi,%rdx,8),%rdx
1b1bea: c7 02 00 00 00 00 movl $0x0,(%rdx)
1b1bf0: c7 42 04 00 00 00 00 movl $0x0,0x4(%rdx)
1b1bf7: 83 c0 01 add $0x1,%eax
1b1bfa: 39 c1 cmp %eax,%ecx
1b1bfc: 7e 13 jle 1b1c11 <ares_send+0x1ba>
1b1bfe: 48 98 cltq
1b1c00: 48 8d 04 c7 lea (%rdi,%rax,8),%rax
1b1c04: c7 00 00 00 00 00 movl $0x0,(%rax)
1b1c0a: c7 40 04 00 00 00 00 movl $0x0,0x4(%rax)
1b1c11: 41 8b 04 24 mov (%r12),%eax
1b1c15: f6 c4 01 test $0x1,%ah
1b1c18: 0f 85 6a 01 00 00 jne 1b1d88 <ares_send+0x331>
1b1c1e: ba 00 02 00 00 mov $0x200,%edx
1b1c23: a8 01 test $0x1,%al
1b1c25: 0f 85 6a 01 00 00 jne 1b1d95 <ares_send+0x33e>
1b1c2b: 31 c0 xor %eax,%eax
1b1c2d: 39 ea cmp %ebp,%edx
1b1c2f: 0f 9c c0 setl %al
1b1c32: 89 83 b8 00 00 00 mov %eax,0xb8(%rbx)
1b1c38: c7 83 bc 00 00 00 0b movl $0xb,0xbc(%rbx)
1b1c3f: 00 00 00
1b1c42: c7 83 c0 00 00 00 00 movl $0x0,0xc0(%rbx)
1b1c49: 00 00 00
1b1c4c: 48 8d 6b 18 lea 0x18(%rbx),%rbp
1b1c50: 48 89 de mov %rbx,%rsi
1b1c53: 48 89 ef mov %rbp,%rdi
1b1c56: e8 b9 ca ff ff callq 1ae714 <ares__init_list_node>
1b1c5b: 48 8d 7b 30 lea 0x30(%rbx),%rdi
1b1c5f: 48 89 de mov %rbx,%rsi
1b1c62: e8 ad ca ff ff callq 1ae714 <ares__init_list_node>
1b1c67: 48 8d 7b 48 lea 0x48(%rbx),%rdi
1b1c6b: 48 89 de mov %rbx,%rsi
1b1c6e: e8 a1 ca ff ff callq 1ae714 <ares__init_list_node>
1b1c73: 4c 8d 6b 60 lea 0x60(%rbx),%r13
1b1c77: 48 89 de mov %rbx,%rsi
1b1c7a: 4c 89 ef mov %r13,%rdi
1b1c7d: e8 92 ca ff ff callq 1ae714 <ares__init_list_node>
1b1c82: 49 8d b4 24 b8 01 00 lea 0x1b8(%r12),%rsi
1b1c89: 00
1b1c8a: 4c 89 ef mov %r13,%rdi
1b1c8d: e8 ac ca ff ff callq 1ae73e <ares__insert_in_list>
1b1c92: 0f b7 03 movzwl (%rbx),%eax
1b1c95: 25 ff 07 00 00 and $0x7ff,%eax
1b1c9a: 48 8d 04 40 lea (%rax,%rax,2),%rax
1b1c9e: 49 8d b4 c4 d0 01 00 lea 0x1d0(%r12,%rax,8),%rsi
1b1ca5: 00
1b1ca6: 48 89 ef mov %rbp,%rdi
1b1ca9: e8 90 ca ff ff callq 1ae73e <ares__insert_in_list>
1b1cae: e8 78 0e 00 00 callq 1b2b2b <ares__tvnow>
1b1cb3: 48 89 44 24 10 mov %rax,0x10(%rsp)
1b1cb8: 48 89 54 24 18 mov %rdx,0x18(%rsp)
1b1cbd: 48 8d 54 24 10 lea 0x10(%rsp),%rdx
1b1cc2: 48 89 de mov %rbx,%rsi
1b1cc5: 4c 89 e7 mov %r12,%rdi
1b1cc8: e8 c1 de ff ff callq 1afb8e <ares__send_query>
1b1ccd: 48 83 c4 28 add $0x28,%rsp
1b1cd1: 5b pop %rbx
1b1cd2: 5d pop %rbp
1b1cd3: 41 5c pop %r12
1b1cd5: 41 5d pop %r13
1b1cd7: 41 5e pop %r14
1b1cd9: 41 5f pop %r15
1b1cdb: c3 retq
*** Jumped here from the jae at 1b1b40 ***
*** I assume this tests the destination alignment ***
1b1cdc: 41 f6 c1 02 test $0x2,%r9b
1b1ce0: 0f 85 d5 00 00 00 jne 1b1dbb <ares_send+0x364>
*** It takes the above jump to 1b1dbb ***
*** Code at 1b1dbb jumps back to here ***
1b1ce6: 41 f6 c0 04 test $0x4,%r8b
1b1cea: 0f 85 b6 00 00 00 jne 1b1da6 <ares_send+0x34f>
*** It takes the above jump ***
*** Jumped here from 1b1db6 ***
1b1cf0: 89 c2 mov %eax,%edx
1b1cf2: 83 e2 e0 and $0xffffffe0,%edx
*** Here is where I think it goes wrong actually. edx was 0x1d.
The and makes edx == 0
The loop following though copies 32 or 0x20 bytes. That is more than
0x1d. ***
1b1cf5: 89 54 24 08 mov %edx,0x8(%rsp)
1b1cf9: 45 31 d2 xor %r10d,%r10d
1b1cfc: 89 04 24 mov %eax,(%rsp)
1b1cff: 44 89 d2 mov %r10d,%edx
1b1d02: 4c 8b 7c 16 08 mov 0x8(%rsi,%rdx,1),%r15
1b1d07: 4c 8b 5c 16 10 mov 0x10(%rsi,%rdx,1),%r11
1b1d0c: 48 8b 44 16 18 mov 0x18(%rsi,%rdx,1),%rax
1b1d11: 49 89 44 10 18 mov %rax,0x18(%r8,%rdx,1)
1b1d16: 48 8b 04 16 mov (%rsi,%rdx,1),%rax
1b1d1a: 49 89 04 10 mov %rax,(%r8,%rdx,1)
1b1d1e: 4d 89 7c 10 08 mov %r15,0x8(%r8,%rdx,1)
1b1d23: 4d 89 5c 10 10 mov %r11,0x10(%r8,%rdx,1)
1b1d28: 41 83 c2 20 add $0x20,%r10d
1b1d2c: 44 3b 54 24 08 cmp 0x8(%rsp),%r10d
1b1d31: 72 cc jb 1b1cff <ares_send+0x2a8>
1b1d33: 8b 04 24 mov (%rsp),%eax
1b1d36: 44 89 d2 mov %r10d,%edx
1b1d39: 49 01 d0 add %rdx,%r8
1b1d3c: 48 01 d6 add %rdx,%rsi
1b1d3f: e9 02 fe ff ff jmpq 1b1b46 <ares_send+0xef>
1b1d44: 48 89 d7 mov %rdx,%rdi
1b1d47: e8 24 f0 ea ff callq 60d70 <free@plt>
1b1d4c: 48 89 df mov %rbx,%rdi
1b1d4f: e8 1c f0 ea ff callq 60d70 <free@plt>
1b1d54: 45 31 c0 xor %r8d,%r8d
1b1d57: 31 c9 xor %ecx,%ecx
1b1d59: 31 d2 xor %edx,%edx
1b1d5b: be 0f 00 00 00 mov $0xf,%esi
1b1d60: 4c 89 ef mov %r13,%rdi
1b1d63: 41 ff d6 callq *%r14
1b1d66: 48 83 c4 28 add $0x28,%rsp
1b1d6a: 5b pop %rbx
1b1d6b: 5d pop %rbp
1b1d6c: 41 5c pop %r12
1b1d6e: 41 5d pop %r13
1b1d70: 41 5e pop %r14
1b1d72: 41 5f pop %r15
1b1d74: c3 retq
1b1d75: 83 c0 01 add $0x1,%eax
1b1d78: 99 cltd
1b1d79: f7 f9 idiv %ecx
1b1d7b: 41 89 94 24 b0 01 00 mov %edx,0x1b0(%r12)
1b1d82: 00
1b1d83: e9 21 fe ff ff jmpq 1b1ba9 <ares_send+0x152>
1b1d88: 41 8b 54 24 50 mov 0x50(%r12),%edx
1b1d8d: a8 01 test $0x1,%al
1b1d8f: 0f 84 96 fe ff ff je 1b1c2b <ares_send+0x1d4>
1b1d95: b8 01 00 00 00 mov $0x1,%eax
1b1d9a: e9 93 fe ff ff jmpq 1b1c32 <ares_send+0x1db>
1b1d9f: 31 c0 xor %eax,%eax
1b1da1: e9 3d fe ff ff jmpq 1b1be3 <ares_send+0x18c>
*** Jumped here from 1b1cea ***
1b1da6: 8b 16 mov (%rsi),%edx
1b1da8: 41 89 10 mov %edx,(%r8)
1b1dab: 49 83 c0 04 add $0x4,%r8
1b1daf: 48 83 c6 04 add $0x4,%rsi
1b1db3: 83 e8 04 sub $0x4,%eax
1b1db6: e9 35 ff ff ff jmpq 1b1cf0 <ares_send+0x299>
*** Jumped here from 1b1ce0 ***
1b1dbb: 41 0f b7 07 movzwl (%r15),%eax
1b1dbf: 66 89 42 02 mov %ax,0x2(%rdx)
1b1dc3: 4c 8d 42 04 lea 0x4(%rdx),%r8
1b1dc7: 48 83 c6 02 add $0x2,%rsi
1b1dcb: 8d 45 fe lea -0x2(%rbp),%eax
1b1dce: e9 13 ff ff ff jmpq 1b1ce6 <ares_send+0x28f>