This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug middle-end/32725] New: Unnecessary reg-reg moves


Compiling the following code

// g++-4.3-070710 -O3 -msse3 -mtune=core2 -S
#include <emmintrin.h>
typedef unsigned long long u64;
void foo(int* dest, unsigned short* src, long* indexes, __m128i _m1, __m128i
_e, __m128i _m2) {
  // required by the API, and makes the bug worse                               
   u64 e = _mm_cvtsi128_si64x(_e);
   u64 m1 =_mm_cvtsi128_si64x(_m1);
   u64 m2 = _mm_cvtsi128_si64x(_m2);

   for(long i=0; i < 3; i++) {
      u64 data = src[indexes[i]];
      __uint128_t result = (__uint128_t) (data & m1) * e;
      dest[i] = (result >> 64) & m2;
   }
}

Produces redundant reg-reg moves

_Z3fooPiPtPlU8__vectorxS2_S2_:
.LFB527:
        pushq   %rbx
.LCFI0:
        movq    %rdx, %r11
        movd    %xmm1, %r10
        movd    %xmm0, %r8
        movd    %xmm2, %r9
        movq    (%rdx), %rax
        movzwl  (%rsi,%rax,2), %eax
        movq    %rax, %rbx              << 1
        andq    %r8, %rbx
        movq    %rbx, %rax              << 2
        mulq    %r10
        movl    %r9d, %eax              << 3
        andl    %edx, %eax
        movl    %eax, (%rdi)
        movq    8(%r11), %rax
        movzwl  (%rsi,%rax,2), %eax
        movq    %rax, %rbx              << 1
        andq    %r8, %rbx
        movq    %rbx, %rax              << 2
        popq    %rbx
        mulq    %r10
        movl    %r9d, %eax              << 3
        andl    %edx, %eax
        movl    %eax, 4(%rdi)
        movq    16(%r11), %rax
        movzwl  (%rsi,%rax,2), %eax
        andq    %rax, %r8               << Almost what 1 should be
        movq    %r8, %rax               << 2
        mulq    %r10 
        andl    %edx, %r9d              << Essentially what 3 should be
        movl    %r9d, 8(%rdi)
        ret

The output of a single iteration should look something like this (33% fewer
instructions):
        movq    8(%r11), %rax
        movzwl  (%rsi,%rax,2), %eax
        andq    %r8, %rax
        mulq    %r10
        andl    %r9d, %edx
        movl    %edx, 4(%rdi)

Methinks cases (2) and (3) are related to bug 15158 and bug 21202, but that
case (1) is something else.

There's also that odd choice to use %rbp, even though there are plenty of
call-clobber regs to use instead...


-- 
           Summary: Unnecessary reg-reg moves
           Product: gcc
           Version: 4.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: scovich at gmail dot com
GCC target triplet: x86_64-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32725


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]