This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug rtl-optimization/53687] New: _mm_cmpistri generates redundant movslq %ecx,%rcx on x86-64


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53687

             Bug #: 53687
           Summary: _mm_cmpistri generates redundant movslq %ecx,%rcx on
                    x86-64
    Classification: Unclassified
           Product: gcc
           Version: 4.8.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: rtl-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: jbemmel@zonnet.nl


Compile the following strcmp() implementation with -O5 -march=corei7

#include <nmmintrin.h>

static inline int __strcmp(const char * cs, const char * ct)
{
    // Works for both 32-bit and 64-bit code

    // see http://www.strchr.com/strcmp_and_strlen_using_sse_4.2

    long diff = cs-ct;
    long nextbytes = 16;
    ct -= 16;

loop:
    __m128i ct16cs = _mm_loadu_si128( (const __m128i *) (ct += nextbytes) );
    int offset = _mm_cmpistri( ct16cs, * (const __m128i *) (ct+diff),   
                      _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY );
    __asm__ __volatile__ goto( "ja %l[loop] \n jc %l[not_equal]" : : :  
             "memory" : loop, not_equal );

    return 0;

not_equal:
    return ct[diff+offset] - ct[offset];
}

GCC generates the following code:
00000000004007c0 <strcmp>:
  4007c0:    48 29 f7                 sub    %rsi,%rdi
  4007c3:    48 83 ee 10              sub    $0x10,%rsi
  4007c7:    48 83 c6 10              add    $0x10,%rsi
  4007cb:    f3 0f 6f 06              movdqu (%rsi),%xmm0
  4007cf:    66 0f 3a 63 04 3e 18     pcmpistri $0x18,(%rsi,%rdi,1),%xmm0
  4007d6:    77 ef                    ja     4007c7 <strcmp+0x7>
  4007d8:    72 06                    jb     4007e0 <strcmp+0x20>
  4007da:    31 c0                    xor    %eax,%eax
  4007dc:    c3                       retq   
  4007dd:    0f 1f 00                 nopl   (%rax)
* 4007e0:    48 63 c9                 movslq %ecx,%rcx
  4007e3:    48 01 f7                 add    %rsi,%rdi
  4007e6:    0f be 04 0f              movsbl (%rdi,%rcx,1),%eax
  4007ea:    0f be 14 0e              movsbl (%rsi,%rcx,1),%edx
  4007ee:    29 d0                    sub    %edx,%eax
  4007f0:    c3                       retq   
  4007f1:    66 66 66 66 66 66 2e     data32 data32 data32 data32 data32 nopw   
  4007f8:    0f 1f 84 00 00 00 00    %cs:0x0(%rax,%rax,1)
  4007ff:    00 

The "movslq" instruction is redundant, because pcmpistri clears the upper bits
of RCX when generating an index (verified using gdb)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]