This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Memory buffer handling - additional optimization proposal


Hi
I have checked how gcc treats temporary buffers allocated in different ways (local buffer on stack, malloced locally in function, malloced outside and passed via argument) and found that gcc could do better work there. I have few proposals how to make things better:

1. Introduce __builtin_assume_temporary(ptr) and/or __attribute__((temporary)) which will tell gcc that given function argument or variable is a temporary buffer and its contents after function execution will not be used. By doing so gcc could treat it as if it was allocated locally in function and optimize out unnecessary memory writes.

2. One of function versions used malloc() to allocate buffer of constant size at beginning of function, and free() to delete it at the end. When this function was inlined and called twice, its body was inserted twice in my code. Other function versions with buffer on stack or malloced outside and passed via argument were inserted once. I am not sure if this is some bug or feature, someone familiar with this should check it.

3. If there is valid reason to actually duplicate this code (e.g. loop unrolling), gcc could detect that code calls malloc and free in loop and repeatedly allocate/free buffer of the same size. In such case gcc could optimize this to call malloc and free once. If someone will use calloc, gcc could simply clear buffer before each loop iteration.

I tested this with gcc 5.4.0 shipped by default in Cygwin 64 bit. Test code used by me is below.

Regards,
Daniel

#include <stdio.h>
#include <stdlib.h>

#define CNT 4

#define ATTR_INLINE __attribute__((hot, always_inline)) static inline
//#define ATTR_INLINE

ATTR_INLINE int func_1(int* data)
{
    int buff[CNT * CNT];

    for (int i = 0; i < CNT; ++i)
    {
        for (int j = 0; j < CNT; ++j)
        {
            buff[i * CNT + j] = data[i];
        }
    }

    for (int n = 0; n < CNT; ++n)
    {
        for (int i = 0; i < n; ++i)
        {
            for (int j = 0; j < n; ++j)
            {
buff[i * CNT + j] = buff[(i+1) * CNT + j] + buff[i * CNT + j+1];
            }
        }
    }

    return buff[0];
}

ATTR_INLINE int func_2(int* data)
{
    int* buff = (int*)malloc(CNT * CNT * sizeof(int));

    for (int i = 0; i < CNT; ++i)
    {
        for (int j = 0; j < CNT; ++j)
        {
            buff[i * CNT + j] = data[i];
        }
    }

    for (int n = 0; n < CNT; ++n)
    {
        for (int i = 0; i < n; ++i)
        {
            for (int j = 0; j < n; ++j)
            {
buff[i * CNT + j] = buff[(i+1) * CNT + j] + buff[i * CNT + j+1];
            }
        }
    }

    int result = buff[0];
    free(buff);
    return result;
}

ATTR_INLINE int func_3(int* __restrict__ data, int* __restrict__ buff)
{
    for (int i = 0; i < CNT; ++i)
    {
        for (int j = 0; j < CNT; ++j)
        {
            buff[i * CNT + j] = data[i];
        }
    }

    for (int n = 0; n < CNT; ++n)
    {
        for (int i = 0; i < n; ++i)
        {
            for (int j = 0; j < n; ++j)
            {
buff[i * CNT + j] = buff[(i+1) * CNT + j] + buff[i * CNT + j+1];
            }
        }
    }

    return buff[0];
}

ATTR_INLINE int func_4(int* data, int* buff)
{
    for (int i = 0; i < CNT; ++i)
    {
        for (int j = 0; j < CNT; ++j)
        {
            buff[i * CNT + j] = data[i];
        }
    }

    for (int n = 0; n < CNT; ++n)
    {
        for (int i = 0; i < n; ++i)
        {
            for (int j = 0; j < n; ++j)
            {
buff[i * CNT + j] = buff[(i+1) * CNT + j] + buff[i * CNT + j+1];
            }
        }
    }

    return buff[0];
}

int main()
{
    int* data = (int*)malloc(CNT * sizeof(int));
    int* buff = (int*)malloc(CNT * sizeof(int));
    for (int n = 0; n < CNT; ++n)
        data[n] = rand();

    int sum = 0;
    printf("1");
    sum += func_1(data);
    sum += func_1(data);
    printf("2");
    sum += func_2(data);
    sum += func_2(data);
    printf("3");
    sum += func_3(data, buff);
    sum += func_3(data, buff);
    printf("4");
    sum += func_4(data, buff);
    sum += func_4(data, buff);
    printf("4");

    return sum;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]