This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug c++/64704] software crashed when using vectorizing optimization
- From: "maltsevm at gmail dot com" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Sat, 24 Jan 2015 03:31:08 +0000
- Subject: [Bug c++/64704] software crashed when using vectorizing optimization
- Auto-submitted: auto-generated
- References: <bug-64704-4 at http dot gcc dot gnu dot org/bugzilla/>
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64704
--- Comment #9 from Mikhail Maltsev <maltsevm at gmail dot com> ---
>what can i do to make the ptr aligned by 16-byte.
Well, you may skip first few bytes (of course not just discard them, but
process one-by-one).
Fortunately, you don't need to do it manually, it can be done by the compiler.
The problem is that when you use a pointer to uint16, GCC assumes that it's
already aligned by 2 byte boundary (if it's not true, the behavior is
undefined). Consider this program:
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <linux/icmpv6.h>
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
uint8 buf[1024] = { 0xFF, 0x01, 0x00, 0x02, 0x00 };
class MessageBuffer
{
public:
MessageBuffer(uint8 *data, uint16 len) :
data_(data), len_(len) { }
uint16 getLength() { return len_ - 1; }
uint16 __attribute__((noinline)) icmp6Checksum_ub (int update);
uint16 __attribute__((noinline)) icmp6Checksum_naive (int update);
uint8 __attribute__((noinline)) findPayloadType (void **payloadStart)
{
uint8 *p;
asm volatile ("leaq 1(%0), %1" : "=r"(p) : "r"(data_) : );
/* p = data_ + 1; GCC will not use this information during tree
optimization */
*payloadStart = p;
return ICMPV6_ECHO_REQUEST;
}
private:
uint8 *data_;
uint16 len_;
};
uint16 MessageBuffer::icmp6Checksum_ub(int)
{
register uint32 sum = 0xffff;
struct icmp6_hdr *icmp6Ptr = NULL;
uint8 type = findPayloadType((void**)&icmp6Ptr);
(void)type; /* inhibit warning */
register int i;
uint16 len = getLength();
register uint16 *ptr = (uint16 *)icmp6Ptr;
for (i = 0; i < len - 1; i += 2) {
sum += *ptr++;
}
return (sum);
}
uint16 MessageBuffer::icmp6Checksum_naive(int)
{
register uint32 sum = 0xffff;
uint8 *data;
findPayloadType((void**)&data);
uint16 len = getLength();
for (int i = 0; i < len - 1; i += 2) {
sum += data[i] | (data[i + 1] << 8);
}
return (sum);
}
int main()
{
MessageBuffer buffer(buf, 1000);
printf("0x%.4x\n", buffer.icmp6Checksum_naive(0));
printf("0x%.4x\n", buffer.icmp6Checksum_ub(0));
}
icmp6Checksum_naive calculates the checksum (I hope at least) and
icmp6Checksum_ub causes segfault (I tried on g++ -O3 -funroll-loops -msse2, GCC
4.8.2).
>i heard of that it is not necesary to aligned by 16-byte in x86
Maybe you confuse movdqa and movdqu (or some other instruction)?
Here is a universal implementation from Linux kernel (there are also
platform-specific versions):
http://lxr.free-electrons.com/source/lib/checksum.c
Notice that the case when address is odd is handled separately (especially in
platform-specific code).