This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug c++/64704] software crashed when using vectorizing optimization


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64704

--- Comment #9 from Mikhail Maltsev <maltsevm at gmail dot com> ---
>what can i do to make the ptr aligned by 16-byte.
Well, you may skip first few bytes (of course not just discard them, but
process one-by-one). 
Fortunately, you don't need to do it manually, it can be done by the compiler.
The problem is that when you use a pointer to uint16, GCC assumes that it's
already aligned by 2 byte boundary (if it's not true, the behavior is
undefined). Consider this program:

#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <linux/icmpv6.h>

typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;

uint8 buf[1024] = { 0xFF, 0x01, 0x00, 0x02, 0x00 };

class MessageBuffer
{
public:
            MessageBuffer(uint8 *data, uint16 len) :
                            data_(data), len_(len) { }
    uint16  getLength() { return len_ - 1; }
    uint16  __attribute__((noinline)) icmp6Checksum_ub (int update);
    uint16  __attribute__((noinline)) icmp6Checksum_naive (int update);
    uint8   __attribute__((noinline)) findPayloadType (void **payloadStart)
    {
        uint8 *p;
        asm volatile ("leaq 1(%0), %1" : "=r"(p) : "r"(data_) : );
        /* p = data_ + 1; GCC will not use this information during tree
optimization */
        *payloadStart = p;
        return ICMPV6_ECHO_REQUEST;
    }
private:
    uint8 *data_;
    uint16 len_;
};

uint16 MessageBuffer::icmp6Checksum_ub(int)
{
    register uint32 sum = 0xffff;

    struct icmp6_hdr *icmp6Ptr = NULL;
    uint8 type = findPayloadType((void**)&icmp6Ptr);
    (void)type; /* inhibit warning */
    register int i;
    uint16 len = getLength();
    register uint16 *ptr = (uint16 *)icmp6Ptr;
    for (i = 0; i < len - 1; i += 2) {
        sum += *ptr++;
    }
    return (sum);
}

uint16 MessageBuffer::icmp6Checksum_naive(int)
{
    register uint32 sum = 0xffff;

    uint8 *data;
    findPayloadType((void**)&data);
    uint16 len = getLength();
    for (int i = 0; i < len - 1; i += 2) {
        sum += data[i] | (data[i + 1] << 8);
    }
    return (sum);
}

int main()
{
    MessageBuffer buffer(buf, 1000);
    printf("0x%.4x\n", buffer.icmp6Checksum_naive(0));
    printf("0x%.4x\n", buffer.icmp6Checksum_ub(0));
}

icmp6Checksum_naive calculates the checksum (I hope at least) and
icmp6Checksum_ub causes segfault (I tried on g++ -O3 -funroll-loops -msse2, GCC
4.8.2).

>i heard of that it is not necesary to aligned by 16-byte in x86
Maybe you confuse movdqa and movdqu (or some other instruction)?

Here is a universal implementation from Linux kernel (there are also
platform-specific versions):
http://lxr.free-electrons.com/source/lib/checksum.c
Notice that the case when address is odd is handled separately (especially in
platform-specific code).


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]