This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug c++/15857] New: Wrong code with optimization >= -O1


The included source file "fail_alpha.cc" implements alpha blending on i386/mmx
platforms using the mmx intrinsics from <mmintrin.h>. If compiled without
optimizations (but with -march=pentium2 or -march=athlon to enable the mmx
intrinsics) the program terminates, however if compiled with any optimization
level (-O1, -O2, -O3) the program does not terminate. 

Example: 
 g++ -march=athlon fail_alpha.cc -o fail_alpha
 ./fail_alpha 
 => teminates

 g++ -O1 -march=athlon fail_alpha.cc -o fail_alpha
 ./fail_alpha 
 => hangs

I've also checked the 3.3.2 compiler and found that he shows the same behaviour.

Here is my config line:
% /opt/gcc-3.4.0/bin/g++ -v
Reading specs from /opt/gcc-3.4.0/lib/gcc/i386-slackware-linux/3.4.0/specs
Configured with: ../source/gcc-3.4.0/configure --prefix=/opt/gcc-3.4.0
--enable-shared --enable-threads=posix --enable-__cxa_atexit --disable-checking
--with-gnu-ld --verbose --target=i386-slackware-linux --host=i386-slackware-linux
Thread model: posix
gcc version 3.4.0

/*--- begin fail_alpha.cc ---*/
#include <mmintrin.h>

typedef unsigned char byte;

struct Surface
{
  byte *pixels; 
  unsigned int pitch; 
  unsigned int w,h; 
  unsigned int bytes_per_pixel; 
}; 

struct Rect
{
  int x,y,w,h; 
}; 

struct calpha_blender
{
  __m64 valpha; 
  __m64 one; 
  byte alpha; 
  calpha_blender(byte _alpha) : alpha(_alpha) 
  {
    valpha=_mm_set1_pi16(alpha); 
    one=_mm_set1_pi16(1); 
  }
  virtual  __m64 blend(__m64 s, __m64 d)
  {
    __m64 lo=_mm_setzero_si64(); 
    __m64 hi=_mm_setzero_si64(); 
    
    lo=_mm_sub_pi16(_mm_unpacklo_pi8(s,lo),_mm_unpacklo_pi8(d,lo));
    lo=_mm_mullo_pi16(lo,valpha); 
    
    hi=_mm_sub_pi16(_mm_unpackhi_pi8(s,hi),_mm_unpackhi_pi8(d,hi));
    hi=_mm_mullo_pi16(hi,valpha); 
    
    lo=_mm_add_pi16(lo,one); 
    hi=_mm_add_pi16(hi,one); 
    
    lo=_mm_add_pi16(lo,_mm_srli_pi16(lo,8));
    hi=_mm_add_pi16(hi,_mm_srli_pi16(hi,8));
    lo=_mm_srli_pi16(lo,8); 
    hi=_mm_srli_pi16(hi,8); 
    
    hi=_mm_packs_pu16(lo,hi);
    hi=_mm_add_pi8(d,hi); 
    return hi;     
  }
  inline byte blend(byte s, byte d) 
  {
    unsigned int c=alpha*(s-d)+1; 
    return (c+(c>>8)>>8)+d; 
  }

}; 

void blt(Surface &s,const Rect &sr,Surface &d,const Rect &dr, byte alpha)
{
  byte *sp=s.pixels; 
  byte *dp=d.pixels; 
  int bpp=s.bytes_per_pixel; 
  sp=sp+(sr.x*bpp+sr.y*s.pitch); 
  dp=dp+(dr.x*bpp+dr.y*d.pitch); 
  int lw=sr.w*bpp; 

  calpha_blender blender(alpha); 

  for (int y=0;y<sr.h;y++) 
    {
      int x=0;

      for (;x<lw;x+=8) 
	{
	  *(__m64*)(dp+x)=blender.blend(*(__m64*)(sp+x),*(__m64*)(dp+x));
	}
      for (;x<lw;x++) 
	{
	  dp[x]=blender.blend(sp[x],dp[x]); 
	}
      sp+=s.pitch; 
      dp+=d.pitch; 
    }
  _mm_empty(); 
}

int main() 
{
  const int W=800; 
  const int H=600;
  const int bpp=3; 
  byte data[W*H*bpp]; 
  Surface x={data,W*bpp,W,H,bpp}; 
  Rect r={0,0,W,H}; 
  blt(x,r,x,r,128); 
}

/*--- end fail_alpha.cc ---*/

-- 
           Summary: Wrong code with optimization >= -O1
           Product: gcc
           Version: 3.4.0
            Status: UNCONFIRMED
          Severity: critical
          Priority: P2
         Component: c++
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: marco at technoboredom dot net
                CC: gcc-bugs at gcc dot gnu dot org
 GCC build triplet: i386-slackware-linux
  GCC host triplet: i386-slackware-linux
GCC target triplet: i386-slackware-linux


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15857


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]