PATCH: Optimize V8HImode/V16QImode initialization

H.J. Lu hjl.tools@gmail.com
Fri May 16 00:25:00 GMT 2008


Hi,

This patch optimizes V8HImode/V16QImode initialization. Before
the change, I got


[hjl@gnu-6 sse-1]$ cat v8hi-1.c
#include <emmintrin.h>

__m128i
foo1 (short x1, short x2, short x3, short x4,
      short x5, short x6, short x7, short x8)
{
  return _mm_set_epi16 (x1, x2, x3, x4, x5, x6, x7, x8);
}
[hjl@gnu-6 sse-1]$ /usr/gcc-4.4/bin/gcc -S -O2 v8hi-1.c
[hjl@gnu-6 sse-1]$ cat v8hi-1.s
        .file   "v8hi-1.c"
        .text
        .p2align 4,,15
.globl foo1
        .type   foo1, @function
foo1:
.LFB518:
        movzwl  8(%rsp), %eax
        movzwl  %r8w, %r8d
        movzwl  %di, %edi
        movzwl  %r9w, %r9d
        salq    $16, %r8
        movzwl  %si, %esi
        salq    $16, %rdi
        orq     %r9, %r8
        movzwl  %dx, %edx
        orq     %rsi, %rdi
        salq    $16, %r8
        movzwl  16(%rsp), %r9d
        salq    $16, %rdi
        orq     %rax, %r8
        movzwl  %cx, %ecx
        orq     %rdx, %rdi
        salq    $16, %r8
        salq    $16, %rdi
        movq    %r8, %rax
        movq    %rdi, %rdx
        orq     %r9, %rax
        orq     %rcx, %rdx
        movq    %rax, -24(%rsp)
        movq    %rdx, -16(%rsp)
        movdqa  -24(%rsp), %xmm0
        ret

After the change,

[hjl@gnu-6 sse-1]$ cat v8hi-1.s
        .file   "v8hi-1.c"
        .text
        .p2align 4,,15
.globl foo1
        .type   foo1, @function
foo1:
.LFB518:
        pxor    %xmm3, %xmm3
        movq    %r9, -8(%rsp)
        movq    -8(%rsp), %xmm1
        movq    %rcx, -8(%rsp)
        pxor    %xmm2, %xmm2
        movss   %xmm1, %xmm3
        pxor    %xmm1, %xmm1
        movq    -8(%rsp), %xmm4
        movq    %rsi, -8(%rsp)
        movd    16(%rsp), %xmm0
        movss   %xmm4, %xmm2
        movq    -8(%rsp), %xmm4
        pinsrw  $1, 8(%rsp), %xmm0
        movss   %xmm4, %xmm1
        pinsrw  $1, %r8d, %xmm3
        pinsrw  $1, %edx, %xmm2
        pinsrw  $1, %edi, %xmm1
        punpckldq       %xmm3, %xmm0
        punpckldq       %xmm1, %xmm2
        punpcklqdq      %xmm2, %xmm0
        ret

There is similar improvement for V16QI. OK for trunk?

Thanks.


H.J.
---
gcc/

2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>

        * config/i386/i386.c (ix86_expand_vector_init_general): Optimize
        V8HImode for SSE2 and V16QImode for SSE4.1.

gcc/testsuite/

2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>

        * gcc.target/i386/m128-check.h: New.
        * gcc.target/i386/set-v16qi-1.h: Likewise.
        * gcc.target/i386/set-v16qi-2.h: Likewise.
        * gcc.target/i386/set-v8hi-1.h: Likewise.
        * gcc.target/i386/set-v8hi-2.h: Likewise.
        * gcc.target/i386/sse2-set-v16qi-1.c: Likewise.
        * gcc.target/i386/sse2-set-v16qi-2.c: Likewise.
        * gcc.target/i386/sse2-set-v8hi-1.c: Likewise.
        * gcc.target/i386/sse2-set-v8hi-2.c: Likewise.
        * gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise.
        * gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise.

        * gcc.target/i386/sse2-check.h: Include m128-check.h. Don't
        include <stdio.h>.
        * gcc.target/i386/sse4_1-check.h: Likewise.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: gcc-sse-insr-1.patch
Type: text/x-patch
Size: 13122 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20080516/a3a51846/attachment.bin>


More information about the Gcc-patches mailing list