This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug target/32414] New: [4.1/4.2 Regression] Poor code for inner loop on i386


/* { dg-do compile } */
/* { dg-options "-O2 -m32 -mtune=generic" } */

typedef unsigned short int uint16_t;
typedef unsigned int uint32_t;

extern int get_src_stride(void);
extern int get_dst_stride(void);

void
foo (uint32_t *pSrc, uint32_t *pDst, uint16_t width, uint16_t height)
{
  uint32_t *dstLine;
  register uint32_t *dst;
  uint32_t *srcLine;
  register uint32_t *src;
  int dstStride, srcStride;
  uint16_t w;

  srcStride = get_src_stride ();
  dstStride = get_dst_stride ();
  dstLine = pDst;
  srcLine = pSrc;

  while (height--)
    {
      dst = dstLine;
      dstLine += dstStride;
      src = srcLine;
      srcLine += srcStride;
      w = width;

      while (w--)
        *dst++ = *src++ | 0xFF000000;
    }
}

generates extremely poor code for the inner loop in 4.1 and 4.2:
.L6:
        movl    -16(%ebp), %eax # src,
        subw    $1, -34(%ebp)   #, w
        addl    $4, -16(%ebp)   #, src
        movl    (%eax), %ecx    #,
        movl    -20(%ebp), %eax # dst,
        orl     $-16777216, %ecx        #,
        movl    %ecx, (%eax)    #,
        addl    $4, %eax        #,
        cmpw    $-1, -34(%ebp)  #, w
        movl    %eax, -20(%ebp) #, dst
        je      .L4     #,
        jmp     .L6     #

I believe this has been introduced by the
http://gcc.gnu.org/ml/gcc-patches/2005-07/msg02021.html
patch and fixed by
http://gcc.gnu.org/ml/gcc-patches/2007-01/msg02095.html
on the trunk.  The generated loop isn't perfect on the trunk:
.L4:
        movl    (%ebx), %eax    #* src, tmp82
        addl    $4, %ebx        #, src
        subw    $1, -14(%ebp)   #, w
        orl     $-16777216, %eax        #, tmp82
        movl    %eax, (%edi)    # tmp82,* dst
        addl    $4, %edi        #, dst
        cmpw    $-1, -14(%ebp)  #, w
        je      .L3     #,
        jmp     .L4     #
but still far better than what 4.1 and 4.2 generate.

Slightly modified:
typedef unsigned short int uint16_t;
typedef unsigned int uint32_t;

extern int get_src_stride(void);
extern int get_dst_stride(void);

void
foo (uint32_t *pSrc, uint32_t *pDst, uint16_t width, uint16_t height)
{
  uint32_t *dstLine;
  register uint32_t *dst;
  uint32_t *srcLine;
  register uint32_t *src;
  int dstStride, srcStride;
  uint32_t w;

  srcStride = get_src_stride ();
  dstStride = get_dst_stride ();
  dstLine = pDst;
  srcLine = pSrc;

  while (height--)
    {
      dst = dstLine;
      dstLine += dstStride;
      src = srcLine;
      srcLine += srcStride;
      for (w = 0; w < width; w++)
        dst[w] = src[w] | 0xFF000000;
    }
}
generates more compact code:
.L4:
        movl    (%edx,%ecx,4), %eax     #* srcLine, tmp79
        orl     $-16777216, %eax        #, tmp79
        movl    %eax, (%ebx,%ecx,4)     # tmp79,* dstLine
        addl    $1, %ecx        #, w
        cmpl    %esi, %ecx      # width, w
        jae     .L3     #,
        jmp     .L4     #


-- 
           Summary: [4.1/4.2 Regression] Poor code for inner loop on i386
           Product: gcc
           Version: 4.1.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: jakub at gcc dot gnu dot org
GCC target triplet: i386-linux


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32414


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]