This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Bad performance (regression) on oopack's Complex test


Hi everyone,

I'm noticing bad performance on this test. Moreover, the
numbers are worse for 3.4 vs 3.2.1.

On my P4-2400, 200000 iterations, -O2 (ideally, the ratio
should be 1 :(

3.2.1
-----
Seconds Mflops
Test Iterations C OOP C OOP Ratio
---- ---------- ----------- ----------- -----
Complex 200000 1.6 21.4 981.6 74.8 13.1


3.4
---
Seconds Mflops
Test Iterations C OOP C OOP Ratio
---- ---------- ----------- ----------- -----
Complex 200000 1.6 29.2 993.8 54.8 18.1


Dumps of the relevant loop follows:

3.2.1 C
-------
000003d0 <ComplexBenchmark::c_style() const>:
3f0: 89 d0 mov %edx,%eax
3f2: d9 c1 fld %st(1)
3f4: c1 e0 04 shl $0x4,%eax
3f7: dd 80 00 00 00 00 fldl 0x0(%eax)
3fd: 42 inc %edx
3fe: d9 c2 fld %st(2)
400: dd 80 08 00 00 00 fldl 0x8(%eax)
406: d9 cb fxch %st(3)
408: d8 ca fmul %st(2),%st
40a: d9 c9 fxch %st(1)
40c: 81 fa e7 03 00 00 cmp $0x3e7,%edx
412: d8 cb fmul %st(3),%st
414: d9 cb fxch %st(3)
416: d8 cd fmul %st(5),%st
418: d9 ca fxch %st(2)
41a: d8 cc fmul %st(4),%st
41c: d9 c9 fxch %st(1)
41e: dc 80 00 00 00 00 faddl 0x0(%eax)
424: d9 ca fxch %st(2)
426: dc 80 08 00 00 00 faddl 0x8(%eax)
42c: d9 ca fxch %st(2)
42e: de e3 fsubp %st,%st(3)
430: de c1 faddp %st,%st(1)
432: d9 c9 fxch %st(1)
434: dd 98 00 00 00 00 fstpl 0x0(%eax)
43a: dd 98 08 00 00 00 fstpl 0x8(%eax)
440: 7e ae jle 3f0 <ComplexBenchmark::c_style() const+0x20>

3.2.1 OOP
---------
00000450 <ComplexBenchmark::oop_style() const>:
490: 89 7d 9c mov %edi,0xffffff9c(%ebp)
493: 89 ca mov %ecx,%edx
495: c1 e2 04 shl $0x4,%edx
498: 89 75 a0 mov %esi,0xffffffa0(%ebp)
49b: 8b 82 00 00 00 00 mov 0x0(%edx),%eax
4a1: 41 inc %ecx
4a2: 89 5d a4 mov %ebx,0xffffffa4(%ebp)
4a5: 81 f9 e7 03 00 00 cmp $0x3e7,%ecx
4ab: 89 45 b8 mov %eax,0xffffffb8(%ebp)
4ae: 8b 82 04 00 00 00 mov 0x4(%edx),%eax
4b4: dd 45 a0 fldl 0xffffffa0(%ebp)
4b7: 89 45 bc mov %eax,0xffffffbc(%ebp)
4ba: 8b 82 08 00 00 00 mov 0x8(%edx),%eax
4c0: d9 c0 fld %st(0)
4c2: 89 45 c0 mov %eax,0xffffffc0(%ebp)
4c5: 8b 82 0c 00 00 00 mov 0xc(%edx),%eax
4cb: 89 45 c4 mov %eax,0xffffffc4(%ebp)
4ce: 8b 45 84 mov 0xffffff84(%ebp),%eax
4d1: 89 45 98 mov %eax,0xffffff98(%ebp)
4d4: 8b 82 00 00 00 00 mov 0x0(%edx),%eax
4da: dd 45 98 fldl 0xffffff98(%ebp)
4dd: 89 45 88 mov %eax,0xffffff88(%ebp)
4e0: 8b 82 04 00 00 00 mov 0x4(%edx),%eax
4e6: d9 c0 fld %st(0)
4e8: 89 45 8c mov %eax,0xffffff8c(%ebp)
4eb: 8b 82 08 00 00 00 mov 0x8(%edx),%eax
4f1: dd 45 88 fldl 0xffffff88(%ebp)
4f4: 89 45 90 mov %eax,0xffffff90(%ebp)
4f7: 8b 82 0c 00 00 00 mov 0xc(%edx),%eax
4fd: dc c9 fmul %st,%st(1)
4ff: 89 45 94 mov %eax,0xffffff94(%ebp)
502: de cc fmulp %st,%st(4)
504: dd 45 90 fldl 0xffffff90(%ebp)
507: dc cb fmul %st,%st(3)
509: de ca fmulp %st,%st(2)
50b: de e2 fsubp %st,%st(2)
50d: de c2 faddp %st,%st(2)
50f: dc 45 b8 faddl 0xffffffb8(%ebp)
512: d9 c9 fxch %st(1)
514: dc 45 c0 faddl 0xffffffc0(%ebp)
517: d9 c9 fxch %st(1)
519: dd 5d c8 fstpl 0xffffffc8(%ebp)
51c: 8b 45 c8 mov 0xffffffc8(%ebp),%eax
51f: dd 5d d0 fstpl 0xffffffd0(%ebp)
522: 89 82 00 00 00 00 mov %eax,0x0(%edx)
528: 8b 45 cc mov 0xffffffcc(%ebp),%eax
52b: 89 82 04 00 00 00 mov %eax,0x4(%edx)
531: 8b 45 d0 mov 0xffffffd0(%ebp),%eax
534: 89 82 08 00 00 00 mov %eax,0x8(%edx)
53a: 8b 45 d4 mov 0xffffffd4(%ebp),%eax
53d: 89 82 0c 00 00 00 mov %eax,0xc(%edx)
543: 0f 8e 47 ff ff ff jle 490 <ComplexBenchmark::oop_style() const+0x40>


3.4 C
-----
000003a0 <ComplexBenchmark::c_style() const>:
3c0: 89 d0 mov %edx,%eax
3c2: d9 c1 fld %st(1)
3c4: c1 e0 04 shl $0x4,%eax
3c7: dd 80 00 00 00 00 fldl 0x0(%eax)
3cd: 42 inc %edx
3ce: d9 c2 fld %st(2)
3d0: dd 80 08 00 00 00 fldl 0x8(%eax)
3d6: d9 cb fxch %st(3)
3d8: 81 fa e7 03 00 00 cmp $0x3e7,%edx
3de: d8 ca fmul %st(2),%st
3e0: d9 c9 fxch %st(1)
3e2: d8 cb fmul %st(3),%st
3e4: d9 cb fxch %st(3)
3e6: d8 cd fmul %st(5),%st
3e8: d9 ca fxch %st(2)
3ea: d8 cc fmul %st(4),%st
3ec: d9 c9 fxch %st(1)
3ee: dc 80 00 00 00 00 faddl 0x0(%eax)
3f4: d9 ca fxch %st(2)
3f6: dc 80 08 00 00 00 faddl 0x8(%eax)
3fc: d9 ca fxch %st(2)
3fe: de e3 fsubp %st,%st(3)
400: de c1 faddp %st,%st(1)
402: d9 c9 fxch %st(1)
404: dd 98 00 00 00 00 fstpl 0x0(%eax)
40a: dd 98 08 00 00 00 fstpl 0x8(%eax)
410: 7e ae jle 3c0 <ComplexBenchmark::c_style() const+0x20>

3.4 OOP
-------
00000420 <ComplexBenchmark::oop_style() const>:
460: 89 7d 9c mov %edi,0xffffff9c(%ebp)
463: 89 ca mov %ecx,%edx
465: c1 e2 04 shl $0x4,%edx
468: 89 75 a0 mov %esi,0xffffffa0(%ebp)
46b: 8b 82 00 00 00 00 mov 0x0(%edx),%eax
471: 41 inc %ecx
472: 89 5d a4 mov %ebx,0xffffffa4(%ebp)
475: 81 f9 e7 03 00 00 cmp $0x3e7,%ecx
47b: dd 45 a0 fldl 0xffffffa0(%ebp)
47e: 89 45 b8 mov %eax,0xffffffb8(%ebp)
481: 8b 82 04 00 00 00 mov 0x4(%edx),%eax
487: d9 c0 fld %st(0)
489: 89 45 bc mov %eax,0xffffffbc(%ebp)
48c: 8b 82 08 00 00 00 mov 0x8(%edx),%eax
492: 89 45 c0 mov %eax,0xffffffc0(%ebp)
495: 8b 82 0c 00 00 00 mov 0xc(%edx),%eax
49b: 89 45 c4 mov %eax,0xffffffc4(%ebp)
49e: 8b 85 74 ff ff ff mov 0xffffff74(%ebp),%eax
4a4: 89 45 98 mov %eax,0xffffff98(%ebp)
4a7: 8b 82 00 00 00 00 mov 0x0(%edx),%eax
4ad: dd 45 98 fldl 0xffffff98(%ebp)
4b0: 89 45 88 mov %eax,0xffffff88(%ebp)
4b3: 8b 82 04 00 00 00 mov 0x4(%edx),%eax
4b9: d9 c0 fld %st(0)
4bb: 89 45 8c mov %eax,0xffffff8c(%ebp)
4be: 8b 82 08 00 00 00 mov 0x8(%edx),%eax
4c4: dd 45 88 fldl 0xffffff88(%ebp)
4c7: 89 45 90 mov %eax,0xffffff90(%ebp)
4ca: 8b 82 0c 00 00 00 mov 0xc(%edx),%eax
4d0: dc c9 fmul %st,%st(1)
4d2: de cc fmulp %st,%st(4)
4d4: 89 45 94 mov %eax,0xffffff94(%ebp)
4d7: dd 45 90 fldl 0xffffff90(%ebp)
4da: dc cb fmul %st,%st(3)
4dc: de ca fmulp %st,%st(2)
4de: de e2 fsubp %st,%st(2)
4e0: de c2 faddp %st,%st(2)
4e2: dd 9d 78 ff ff ff fstpl 0xffffff78(%ebp)
4e8: 8b 85 78 ff ff ff mov 0xffffff78(%ebp),%eax
4ee: dd 5d 80 fstpl 0xffffff80(%ebp)
4f1: 89 45 a8 mov %eax,0xffffffa8(%ebp)
4f4: 8b 85 7c ff ff ff mov 0xffffff7c(%ebp),%eax
4fa: 89 45 ac mov %eax,0xffffffac(%ebp)
4fd: 8b 45 80 mov 0xffffff80(%ebp),%eax
500: 89 45 b0 mov %eax,0xffffffb0(%ebp)
503: 8b 45 84 mov 0xffffff84(%ebp),%eax
506: 89 45 b4 mov %eax,0xffffffb4(%ebp)
509: dd 45 a8 fldl 0xffffffa8(%ebp)
50c: dc 45 b8 faddl 0xffffffb8(%ebp)
50f: dd 45 b0 fldl 0xffffffb0(%ebp)
512: dc 45 c0 faddl 0xffffffc0(%ebp)
515: d9 c9 fxch %st(1)
517: dd 9d 78 ff ff ff fstpl 0xffffff78(%ebp)
51d: 8b 85 78 ff ff ff mov 0xffffff78(%ebp),%eax
523: dd 5d 80 fstpl 0xffffff80(%ebp)
526: 89 82 00 00 00 00 mov %eax,0x0(%edx)
52c: 8b 85 7c ff ff ff mov 0xffffff7c(%ebp),%eax
532: 89 82 04 00 00 00 mov %eax,0x4(%edx)
538: 8b 45 80 mov 0xffffff80(%ebp),%eax
53b: 89 82 08 00 00 00 mov %eax,0x8(%edx)
541: 8b 45 84 mov 0xffffff84(%ebp),%eax
544: 89 82 0c 00 00 00 mov %eax,0xc(%edx)
54a: 0f 8e 10 ff ff ff jle 460 <ComplexBenchmark::oop_style() const+0x40>


Any ideas?

Thanks,
Paolo.

Attachment: oopack_v1p8.C.bz2
Description: Binary data


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]