[Bug rtl-optimization/29881] union causes inefficient code
pinskia at gcc dot gnu dot org
gcc-bugzilla@gcc.gnu.org
Sat Nov 18 00:14:00 GMT 2006
------- Comment #1 from pinskia at gcc dot gnu dot org 2006-11-18 00:14 -------
The problem here I think is unions.
Here is how I would have written this code (without using unions in fact):
void array_sample_fun(__m128 *dst, const __m128 *src, int length) {
__m128 af = _mm_set1_ps(1.20f);
__m128 bf = _mm_set1_ps(2.88f);
__m128 cf = _mm_set1_ps(-2.44f);
__m128 df = _mm_set1_ps(4.06f);
__m128 ef = _mm_set1_ps(-12.04f);
__m128i mask = _mm_set1_epi32(0xff << 23);
__m128i bias = _mm_set1_epi32(0x7f << 23);
__m128i t;
while (length-- != 0) {
__m128 vec;
vec = (*src++);
__m128 arg =
_mm_cvtepi32_ps(_mm_srai_epi32(_mm_sub_epi32(_mm_and_si128((__m128i)vec, mask),
bias),
23));
vec = (__m128)_mm_or_si128(_mm_andnot_si128(mask,
(__m128i)vec), bias);
*dst++ = _mm_add_ps(arg,
_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(
_mm_mul_ps(af, vec), bf), vec), cf), vec), df),
vec), ef));
}
}
----------------------
The above gives good results for 32bit:
.L4:
movaps (%eax), %xmm0
movdqa %xmm4, %xmm1
addl $1, %ecx
addl $16, %eax
movdqa %xmm0, %xmm2
pandn %xmm0, %xmm1
movaps .LC0, %xmm0
por %xmm3, %xmm1
pand %xmm4, %xmm2
psubd %xmm3, %xmm2
mulps %xmm1, %xmm0
psrad $23, %xmm2
cvtdq2ps %xmm2, %xmm2
addps .LC1, %xmm0
mulps %xmm1, %xmm0
addps %xmm7, %xmm0
mulps %xmm1, %xmm0
addps %xmm6, %xmm0
mulps %xmm1, %xmm0
addps %xmm5, %xmm0
addps %xmm0, %xmm2
movaps %xmm2, (%edx)
addl $16, %edx
cmpl %ebx, %ecx
jne .L4
While your orginal example on 32bits has a store to the stack.
So this is just a case where union cause missed optimization
--
pinskia at gcc dot gnu dot org changed:
What |Removed |Added
----------------------------------------------------------------------------
GCC build triplet| x86_64-unknown-linux-gnu |x86_64-unknown-linux-gnu
GCC host triplet| x86_64-unknown-linux-gnu |x86_64-unknown-linux-gnu
GCC target triplet| x86_64-unknown-linux-gnu |x86_64-unknown-linux-gnu
Summary|inefficient/incorrect xmm |union causes inefficient
|registers usage |code
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29881
More information about the Gcc-bugs
mailing list