This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug other/26546] New: Passing unions of _vector_ types and struct or array of the same size as value to inline functions causes unecessary load/stores on the stack even if no members except the _vector_ is accessed


Consider this example:

#include <xmmintrin.h>

typedef union
{
  __m128 vec;
  float data[4];
  struct
  {
    float x,y,z,w;
  };
} vec4f_t;

static inline float __attribute__((__always_inline__))
acc(vec4f_t src)
{
  float a;
  src.vec = _mm_add_ps(src.vec, _mm_movehl_ps(src.vec, src.vec,));
  _mm_store_ss(&a, _mm_add_ss(src.vec, _mm_shuffle_ps(src.vec, src.vec,
_MM_SHUFFLE(3,2,1,1))));
  return a;
}

int
main(int argc, char *argv[])
{
  vec4f_t b;
  printf("%f\n", acc(b));
  return 0;
}

This gets compiled to:

        .section        .rodata.str1.1,"aMS",@progbits,1
.LC0:
        .string "%f\n"
        .text
        .p2align 4,,15
.globl main
        .type   main, @function
main:
.LFB506:
        subq    $40, %rsp
.LCFI0:
        movl    $.LC0, %edi
        movq    16(%rsp), %rax
        movq    %rax, (%rsp)
        movq    24(%rsp), %rax
        movq    %rax, 8(%rsp)
        movl    $1, %eax
        movaps  (%rsp), %xmm1
        movaps  %xmm1, %xmm0
        movhlps %xmm1, %xmm0
        addps   %xmm1, %xmm0
        movaps  %xmm0, %xmm1
        shufps  $229, %xmm0, %xmm1
        addss   %xmm1, %xmm0
        cvtss2sd        %xmm0, %xmm0
        call    printf
        xorl    %eax, %eax
        addq    $40, %rsp
        ret

As we can see the union is passed on the stack instead of a value in %xmm0 this
would make sense if this would not be an inline function and members other than
the __m128 would be accessed.

Using the same code as above but passing __m128 directly instead of the union
gets compiled to:

        .section        .rodata.str1.1,"aMS",@progbits,1
.LC0:
        .string "%f\n"
        .text
        .p2align 4,,15
.globl main
        .type   main, @function
main:
.LFB506:
        movhlps %xmm0, %xmm0
        subq    $8, %rsp
.LCFI0:
        movl    $.LC0, %edi
        movl    $1, %eax
        addps   %xmm0, %xmm0
        movaps  %xmm0, %xmm1
        shufps  $229, %xmm0, %xmm1
        addss   %xmm1, %xmm0
        cvtss2sd        %xmm0, %xmm0
        call    printf
        xorl    %eax, %eax
        addq    $8, %rsp
        ret


-- 
           Summary: Passing unions of _vector_ types and struct or array of
                    the same size as value to inline functions causes
                    unecessary load/stores on the stack even if no members
                    except the _vector_ is accessed
           Product: gcc
           Version: 4.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: other
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: j_daniel at rbg dot informatik dot tu-darmstadt dot de
 GCC build triplet: x86_64-pc-linux-gnu
  GCC host triplet: x86_64-pc-linux-gnu
GCC target triplet: x86_64-pc-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26546


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]