This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug other/26546] New: Passing unions of _vector_ types and struct or array of the same size as value to inline functions causes unecessary load/stores on the stack even if no members except the _vector_ is accessed
- From: "j_daniel at rbg dot informatik dot tu-darmstadt dot de" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: 3 Mar 2006 11:44:27 -0000
- Subject: [Bug other/26546] New: Passing unions of _vector_ types and struct or array of the same size as value to inline functions causes unecessary load/stores on the stack even if no members except the _vector_ is accessed
- Reply-to: gcc-bugzilla at gcc dot gnu dot org
Consider this example:
#include <xmmintrin.h>
typedef union
{
__m128 vec;
float data[4];
struct
{
float x,y,z,w;
};
} vec4f_t;
static inline float __attribute__((__always_inline__))
acc(vec4f_t src)
{
float a;
src.vec = _mm_add_ps(src.vec, _mm_movehl_ps(src.vec, src.vec,));
_mm_store_ss(&a, _mm_add_ss(src.vec, _mm_shuffle_ps(src.vec, src.vec,
_MM_SHUFFLE(3,2,1,1))));
return a;
}
int
main(int argc, char *argv[])
{
vec4f_t b;
printf("%f\n", acc(b));
return 0;
}
This gets compiled to:
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "%f\n"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB506:
subq $40, %rsp
.LCFI0:
movl $.LC0, %edi
movq 16(%rsp), %rax
movq %rax, (%rsp)
movq 24(%rsp), %rax
movq %rax, 8(%rsp)
movl $1, %eax
movaps (%rsp), %xmm1
movaps %xmm1, %xmm0
movhlps %xmm1, %xmm0
addps %xmm1, %xmm0
movaps %xmm0, %xmm1
shufps $229, %xmm0, %xmm1
addss %xmm1, %xmm0
cvtss2sd %xmm0, %xmm0
call printf
xorl %eax, %eax
addq $40, %rsp
ret
As we can see the union is passed on the stack instead of a value in %xmm0 this
would make sense if this would not be an inline function and members other than
the __m128 would be accessed.
Using the same code as above but passing __m128 directly instead of the union
gets compiled to:
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "%f\n"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB506:
movhlps %xmm0, %xmm0
subq $8, %rsp
.LCFI0:
movl $.LC0, %edi
movl $1, %eax
addps %xmm0, %xmm0
movaps %xmm0, %xmm1
shufps $229, %xmm0, %xmm1
addss %xmm1, %xmm0
cvtss2sd %xmm0, %xmm0
call printf
xorl %eax, %eax
addq $8, %rsp
ret
--
Summary: Passing unions of _vector_ types and struct or array of
the same size as value to inline functions causes
unecessary load/stores on the stack even if no members
except the _vector_ is accessed
Product: gcc
Version: 4.1.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: other
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: j_daniel at rbg dot informatik dot tu-darmstadt dot de
GCC build triplet: x86_64-pc-linux-gnu
GCC host triplet: x86_64-pc-linux-gnu
GCC target triplet: x86_64-pc-linux-gnu
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26546