PATCH: Optimize V8HImode/V16QImode initialization
H.J. Lu
hjl.tools@gmail.com
Fri May 16 00:25:00 GMT 2008
Hi,
This patch optimizes V8HImode/V16QImode initialization. Before
the change, I got
[hjl@gnu-6 sse-1]$ cat v8hi-1.c
#include <emmintrin.h>
__m128i
foo1 (short x1, short x2, short x3, short x4,
short x5, short x6, short x7, short x8)
{
return _mm_set_epi16 (x1, x2, x3, x4, x5, x6, x7, x8);
}
[hjl@gnu-6 sse-1]$ /usr/gcc-4.4/bin/gcc -S -O2 v8hi-1.c
[hjl@gnu-6 sse-1]$ cat v8hi-1.s
.file "v8hi-1.c"
.text
.p2align 4,,15
.globl foo1
.type foo1, @function
foo1:
.LFB518:
movzwl 8(%rsp), %eax
movzwl %r8w, %r8d
movzwl %di, %edi
movzwl %r9w, %r9d
salq $16, %r8
movzwl %si, %esi
salq $16, %rdi
orq %r9, %r8
movzwl %dx, %edx
orq %rsi, %rdi
salq $16, %r8
movzwl 16(%rsp), %r9d
salq $16, %rdi
orq %rax, %r8
movzwl %cx, %ecx
orq %rdx, %rdi
salq $16, %r8
salq $16, %rdi
movq %r8, %rax
movq %rdi, %rdx
orq %r9, %rax
orq %rcx, %rdx
movq %rax, -24(%rsp)
movq %rdx, -16(%rsp)
movdqa -24(%rsp), %xmm0
ret
After the change,
[hjl@gnu-6 sse-1]$ cat v8hi-1.s
.file "v8hi-1.c"
.text
.p2align 4,,15
.globl foo1
.type foo1, @function
foo1:
.LFB518:
pxor %xmm3, %xmm3
movq %r9, -8(%rsp)
movq -8(%rsp), %xmm1
movq %rcx, -8(%rsp)
pxor %xmm2, %xmm2
movss %xmm1, %xmm3
pxor %xmm1, %xmm1
movq -8(%rsp), %xmm4
movq %rsi, -8(%rsp)
movd 16(%rsp), %xmm0
movss %xmm4, %xmm2
movq -8(%rsp), %xmm4
pinsrw $1, 8(%rsp), %xmm0
movss %xmm4, %xmm1
pinsrw $1, %r8d, %xmm3
pinsrw $1, %edx, %xmm2
pinsrw $1, %edi, %xmm1
punpckldq %xmm3, %xmm0
punpckldq %xmm1, %xmm2
punpcklqdq %xmm2, %xmm0
ret
There is similar improvement for V16QI. OK for trunk?
Thanks.
H.J.
---
gcc/
2008-05-15 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_expand_vector_init_general): Optimize
V8HImode for SSE2 and V16QImode for SSE4.1.
gcc/testsuite/
2008-05-15 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/m128-check.h: New.
* gcc.target/i386/set-v16qi-1.h: Likewise.
* gcc.target/i386/set-v16qi-2.h: Likewise.
* gcc.target/i386/set-v8hi-1.h: Likewise.
* gcc.target/i386/set-v8hi-2.h: Likewise.
* gcc.target/i386/sse2-set-v16qi-1.c: Likewise.
* gcc.target/i386/sse2-set-v16qi-2.c: Likewise.
* gcc.target/i386/sse2-set-v8hi-1.c: Likewise.
* gcc.target/i386/sse2-set-v8hi-2.c: Likewise.
* gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise.
* gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise.
* gcc.target/i386/sse2-check.h: Include m128-check.h. Don't
include <stdio.h>.
* gcc.target/i386/sse4_1-check.h: Likewise.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: gcc-sse-insr-1.patch
Type: text/x-patch
Size: 13122 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20080516/a3a51846/attachment.bin>
More information about the Gcc-patches
mailing list