This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[Bug c/41424] New: Heavily optimized x86_64-w64 binary produces negative effects

From: "xxcv07 at gmail dot com" <gcc-bugzilla at gcc dot gnu dot org>
To: gcc-bugs at gcc dot gnu dot org
Date: 21 Sep 2009 02:16:52 -0000
Subject: [Bug c/41424] New: Heavily optimized x86_64-w64 binary produces negative effects
Reply-to: gcc-bugzilla at gcc dot gnu dot org

Hello:
I found the optimized binary created by gcc-4_4-branch and trunk, is unstable
in someway.

Program received signal SIGSEGV, Segmentation fault.
[Switching to thread 4116.0x15d4]
0x0000000008d8f304 in ?? ()
(gdb) bt
#0  0x0000000008d8f304 in ?? ()
#1  0x0000000000000000 in ?? ()
(gdb) disass $pc-30 $pc+30
Dump of assembler code from 0x8d8f2e6 to 0x8d8f322:
0x0000000008d8f2e6:     outsl  %ds:(%rsi),(%dx)
0x0000000008d8f2e7:     cltd   0x0000000008d8f2e8:     pushq  $0xf000020
0x0000000008d8f2ed:     outsl  %ds:(%rsi),(%dx)
0x0000000008d8f2ee:     jrcxz  0x8d8f338
0x0000000008d8f2f0:     lea    0x1058(%rcx),%edx
0x0000000008d8f2f6:     mov    (%rdx),%rsi
0x0000000008d8f2f9:     nopl   0x0(%rax)
0x0000000008d8f300:     movq   0x8(%rdx),%mm0
0x0000000008d8f304:     movq   (%rsi,%rax,2),%mm2
0x0000000008d8f308:     movq   0x8(%rsi,%rax,2),%mm5
0x0000000008d8f30d:     add    $0x10,%rdx
0x0000000008d8f311:     mov    (%rdx),%rsi
0x0000000008d8f314:     test   %rsi,%rsi
0x0000000008d8f317:     pmulhw %mm0,%mm2
0x0000000008d8f31a:     pmulhw %mm0,%mm5
0x0000000008d8f31d:     paddw  %mm2,%mm3
0x0000000008d8f320:     paddw  %mm5,%mm4
End of assembler dump.
(gdb) info all-registers
rax            0x29a8   10664
rcx            0xd8e6958        227436888
rdx            0xd8e79c0        227441088
rbx            0x133f4b30       322915120
rsp            0xdbbdcd0        230415568
rbp            0xd7e2e90        226373264
rsi            0x1340ccb0       323013808
rdi            0x133f9530       322934064
r8             0xa2470f8        170160376
r9             0x0      0
r10            0xd7b4f90        226185104
r11            0xd55ec58        223734872
r12            0xa2470f8        170160376
r13            0x133f9530       322934064
r14            0x133fdf30       322953008
r15            0x24f    591
rip            0x8d8f304        0x8d8f304
eflags         0x10202  [ IF RF ]
cs             0x33     51
ss             0x2b     43
ds             0x2b     43
es             0x2b     43
fs             0x53     83
gs             0x2b     43
st0            -nan(0x5d205d205d205d2)  (raw 0xffff05d205d205d205d2)
st1            -nan(0x35ab0dd2fc830000) (raw 0xffff35ab0dd2fc830000)
st2            -nan(0xffffffffffffffff) (raw 0xffffffffffffffffffff)
st3            -nan(0x3000300030003)    (raw 0xffff0003000300030003)
st4            -nan(0x3000300030003)    (raw 0xffff0003000300030003)
st5            -nan(0xffffffffffffffff) (raw 0xffffffffffffffffffff)
st6            -nan(0x8282828182828181) (raw 0xffff8282828182828181)
st7            -inf     (raw 0xffff0000000000000000)
fctrl          0xff0420027f     1095285867135
fstat          0xff0420 16712736
ftag           0xff     255
fiseg          0x2300000000     150323855360
fioff          0x0      0
foseg          0x1f8000000000   34634616274944
fooff          0x0      0
fop            0x2700000000     167503724544
xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm1           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm2           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm3           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm4           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm5           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm6           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm7           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm8           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,

  0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm9           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm10          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm11          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm12          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm13          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm14          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
xmm15          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
 v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
   0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
 uint128 = 0x00000000000000000000000000000000}
mxcsr          0x1f80   [ IM DM ZM OM UM PM ]
mm0            {uint64 = 0x5d205d205d205d2, v2_int32 = {0x5d205d2,
   0x5d205d2}, v4_int16 = {0x5d2, 0x5d2, 0x5d2, 0x5d2}, v8_int8 = {0xd2,
   0x5, 0xd2, 0x5, 0xd2, 0x5, 0xd2, 0x5}}
mm1            {uint64 = 0x35ab0dd2fc830000, v2_int32 = {0xfc830000,
   0x35ab0dd2}, v4_int16 = {0x0, 0xfc83, 0xdd2, 0x35ab}, v8_int8 = {0x0,
   0x0, 0x83, 0xfc, 0xd2, 0xd, 0xab, 0x35}}
mm2            {uint64 = 0xffffffffffffffff, v2_int32 = {0xffffffff,
   0xffffffff}, v4_int16 = {0xffff, 0xffff, 0xffff, 0xffff}, v8_int8 = {
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}
mm3            {uint64 = 0x3000300030003, v2_int32 = {0x30003, 0x30003},
 v4_int16 = {0x3, 0x3, 0x3, 0x3}, v8_int8 = {0x3, 0x0, 0x3, 0x0, 0x3, 0x0,
   0x3, 0x0}}
mm4            {uint64 = 0x3000300030003, v2_int32 = {0x30003, 0x30003},
 v4_int16 = {0x3, 0x3, 0x3, 0x3}, v8_int8 = {0x3, 0x0, 0x3, 0x0, 0x3, 0x0,
   0x3, 0x0}}
mm5            {uint64 = 0xffffffffffffffff, v2_int32 = {0xffffffff,
   0xffffffff}, v4_int16 = {0xffff, 0xffff, 0xffff, 0xffff}, v8_int8 = {
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}
mm6            {uint64 = 0x8282828182828181, v2_int32 = {0x82828181,
   0x82828281}, v4_int16 = {0x8181, 0x8282, 0x8281, 0x8282}, v8_int8 = {
   0x81, 0x81, 0x82, 0x82, 0x81, 0x82, 0x82, 0x82}}
mm7            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0, 0x0,
   0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}

Problem signature:
 Problem Event Name:    APPCRASH
 Application Name:    vlc.exe
 Application Version:    1.1.0.99
 Application Timestamp:    4ab4ef27
 Fault Module Name:    StackHash_cba3
 Fault Module Version:    6.0.6002.18005
 Fault Module Timestamp:    49e0421d
 Exception Code:    c0000374
 Exception Offset:    00000000000aef37
 OS Version:    6.0.6002.2.2.0.256.1
 Locale ID:    3081
 Additional Information 1:    cba3
 Additional Information 2:    95dd1a4107ffce4ffe1cd01c7386d009
 Additional Information 3:    1fe0
 Additional Information 4:    b2900c91fd3762da661e2a29e78195ba
[0x630e0c8] main subpicture error: subpicture heap full
[0x630e0c8] main subpicture error: subpicture heap full
[0x630e0c8] main subpicture error: subpicture heap full
[0x630e0c8] main subpicture error: subpicture heap full
[0x630e0c8] main subpicture error: subpicture heap full

This type of crash is with avi file, idx/sub subtitle is required to trigger
this crash.
I found that optimized binaries don't work well.
1) It crashes when taking snapshots (outside of gdb always reproducible) inside
gdb not always reproducible, when exploited it will just max out the cpu usage.
It occurs when binary is heavily optimized. I found that it is again
reproducible with binary optimized with gcc-trunk.
2) It crashes when idx/sub with avi is loaded, triggered during runtime
randomly or after seeking is done, much harder to reproduce the bug inside gdb.
See the detailed gdb output above. When running outside of gdb it'll just max
out the cpu, system unresponsive.

3) After binary is heavily optimized the program doesn't function properly for
example the subtitle auto loading feature doesn't work. ie, filename.avi and
filename.idx filename.sub in the same dir, it get auto loaded when binary is
not optimized. 

For some reason this bug did not trigger when running inside gdb(Almost
unreproducible) I tried many times and it rarely triggered.

This bug report tested with the optimized binary created by gcc-trunk and
gcc-4_4-branch + Kai's xmmrestore patch.
This bug also exists in optimized binary created by gcc-trunk.

However this bug is really weird, It can have a number of behaviors
(polymorphic):
1) Crash, brings vlc.exe has stopped working dialog.
2) Can Push CPU to its theoretical maximum limit in 2 threads of a dual core
system. ie. O/S, GUI, mouse, keyboard(almost), "task manager" becomes
unresponsive. Will become really hard to even terminate the vlc.exe process.
3) It sometimes D.O.S the system with only one thread of a dual core machine.

Saw from process explorer cycles delta is extremely high.

 This is more catastrophic then the bug "Massive memory jump"
(http://mailman.videolan.org/pipermail/vlc-devel/2009-September/066435.html and
is fixed by patch at http://gcc.gnu.org/ml/gcc-patches/2009-09/msg01007.html,
However, while memory overflow is gone, it still produce a segfault.).


-- 
           Summary: Heavily optimized x86_64-w64 binary produces negative
                    effects
           Product: gcc
           Version: 4.5.0
            Status: UNCONFIRMED
          Severity: blocker
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: xxcv07 at gmail dot com
 GCC build triplet: i486-slackware-linux
  GCC host triplet: x86_64-w64-mingw32
GCC target triplet: x86_64-w64-mingw32


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41424

Follow-Ups:
- [Bug target/41424] Heavily optimized x86_64-w64 binary produces negative effects
  - From: pinskia at gcc dot gnu dot org
- [Bug target/41424] Heavily optimized x86_64-w64 binary produces negative effects
  - From: xxcv07 at gmail dot com
- [Bug target/41424] Heavily optimized x86_64-w64 binary produces negative effects
  - From: xxcv07 at gmail dot com
- [Bug target/41424] Heavily optimized x86_64-w64 binary produces negative effects
  - From: xxcv07 at gmail dot com
- [Bug target/41424] Optimized x86_64-w64 -O1 -foptimize-sibling-calls binary produces negative effects
  - From: xxcv07 at gmail dot com
- [Bug target/41424] Optimized x86_64-w64 -O1 -foptimize-sibling-calls binary produces negative effects
  - From: xxcv07 at gmail dot com
- [Bug target/41424] Optimized x86_64-w64 -O1 -foptimize-sibling-calls binary produces negative effects
  - From: xxcv07 at gmail dot com
- [Bug target/41424] Optimized x86_64-w64 -O1 -foptimize-sibling-calls binary produces negative effects
  - From: ubizjak at gmail dot com
- [Bug target/41424] Optimized x86_64-w64 -O1 -foptimize-sibling-calls binary produces negative effects
  - From: xxcv07 at gmail dot com
- [Bug target/41424] Optimized x86_64-w64 -O1 -foptimize-sibling-calls binary produces negative effects
  - From: xxcv07 at gmail dot com

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]