This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.

Index Nav: Message Nav: [Date Index] [Subject Index] [Author Index] [Thread Index] [Date Prev] [Date Next] [Thread Prev] [Thread Next]

# Re: Haney's real matrix test regression

• To: Richard Kenner <kenner at vlsi1 dot ultra dot nyu dot edu>
• Subject: Re: Haney's real matrix test regression
• From: Paolo Carlini <pcarlini at unitus dot it>
• Date: Fri, 26 Oct 2001 01:17:15 +0200
• CC: gcc at gcc dot gnu dot org
• Organization: Universita' della Tuscia
• References: <10110251737.AA14862@vlsi1.ultra.nyu.edu>
• Reply-To: pcarlini at unitus dot it

```Richard Kenner wrote:

> Look at the code and see why it's slower.

So, this is the "O-O C++" kernel in question, a straightforward matrix
multiplication:

////////
Real &index(Integer i, Integer j)
{
return d[i - 1 + n[0] * (j - 1)];
}

Real index(Integer i, Integer j) const
{
return d[i - 1 + n[0] * (j - 1)];
}

void rmatMul(RealMatrix &t, const RealMatrix &a, const RealMatrix &b)
{
Integer i, j, k;
const Integer M = a.dim(1), N = b.dim(2), K = b.dim(1);
Real temp;
const Real zero = Real(0.0);

memset(t, 0, M * N * sizeof(Real));

for (j = 1; j <= N; j++)
{
for (k = 1; k <= K; k++)
{
temp = b.index(k, j);
if (temp != zero)
{
for (i = 1; i <= M; i++)
t.index(i, j) += temp * a.index(i, k);
}
}
}
}
////////

The two innermost loops, those over k and i, are compiled (-O2
-fomit-frame-pointer) in very different ways by 3.0.2 (and, by extrapolation,
3.1 2001102{2,3}) and 3.1 20011025; it is obvious that the 3.0.2 version is
smaller and faster (~2x, in fact).

Are there any discernible patterns that may help you diagnose the problem?

Thanks,
Paolo Carlini.

3.0.2
-----
410: 8b 44 24 18           mov    0x18(%esp,1),%eax
414: 8b 4c 24 48           mov    0x48(%esp,1),%ecx
418: 8d 14 38              lea    (%eax,%edi,1),%edx
41b: 8b 41 04              mov    0x4(%ecx),%eax
41e: d9 44 90 fc           flds   0xfffffffc(%eax,%edx,4)
422: dd e1                 fucom  %st(1)
424: df e0                 fnstsw %ax
426: 9e                    sahf
427: 7a 02                 jp     42b <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0xcb>
429: 74 5e                 je     489 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x129>
42b: b9 01 00 00 00        mov    \$0x1,%ecx
430: 3b 4c 24 24           cmp    0x24(%esp,1),%ecx
434: 7f 53                 jg     489 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x129>
436: 8b 54 24 40           mov    0x40(%esp,1),%edx
43a: 8b 44 24 14           mov    0x14(%esp,1),%eax
43e: 8b 72 08              mov    0x8(%edx),%esi
441: 8b 54 24 44           mov    0x44(%esp,1),%edx
445: 0f af c6              imul   %esi,%eax
448: 8b 72 08              mov    0x8(%edx),%esi
44b: 8b 54 24 40           mov    0x40(%esp,1),%edx
44f: c1 e0 02              shl    \$0x2,%eax
452: 8b 5a 04              mov    0x4(%edx),%ebx
455: 0f af f5              imul   %ebp,%esi
45a: 8d 50 04              lea    0x4(%eax),%edx
45d: 8b 44 24 44           mov    0x44(%esp,1),%eax
461: 8b 58 04              mov    0x4(%eax),%ebx
464: 8d b6 00 00 00 00     lea    0x0(%esi),%esi
46a: 8d bf 00 00 00 00     lea    0x0(%edi),%edi

470: 8d 04 0e              lea    (%esi,%ecx,1),%eax
473: d9 c0                 fld    %st(0)
475: 41                    inc    %ecx
476: d8 4c 83 fc           fmuls  0xfffffffc(%ebx,%eax,4)
47a: d8 42 fc              fadds  0xfffffffc(%edx)
47d: d9 5a fc              fstps  0xfffffffc(%edx)
480: 83 c2 04              add    \$0x4,%edx
483: 3b 4c 24 24           cmp    0x24(%esp,1),%ecx
487: 7e e7                 jle    470 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x110>

489: dd d8                 fstp   %st(0)
48b: 47                    inc    %edi
48c: 45                    inc    %ebp
48d: 3b 7c 24 1c           cmp    0x1c(%esp,1),%edi
491: 0f 8e 79 ff ff ff     jle    410 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0xb0>

3.1 20011025
------------
420: 8b 54 24 48           mov    0x48(%esp,1),%edx
424: 8b 5c 24 24           mov    0x24(%esp,1),%ebx
428: 8b 42 08              mov    0x8(%edx),%eax
42b: 8b 52 04              mov    0x4(%edx),%edx
42e: 0f af c5              imul   %ebp,%eax
433: d9 44 82 fc           flds   0xfffffffc(%edx,%eax,4)
437: dd e1                 fucom  %st(1)
439: df e0                 fnstsw %ax
43b: 9e                    sahf
43c: 7a 02                 jp     440 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0xb0>
43e: 74 58                 je     498 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x108>
440: bb 01 00 00 00        mov    \$0x1,%ebx
445: 39 fb                 cmp    %edi,%ebx
447: 7f 4f                 jg     498 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x108>
449: 8b 54 24 24           mov    0x24(%esp,1),%edx
44d: 4a                    dec    %edx
44e: 89 54 24 18           mov    %edx,0x18(%esp,1)
452: 8d b4 26 00 00 00 00  lea    0x0(%esi,1),%esi
459: 8d bc 27 00 00 00 00  lea    0x0(%edi,1),%edi

460: 8b 4c 24 40           mov    0x40(%esp,1),%ecx
464: d9 c0                 fld    %st(0)
466: 8b 74 24 44           mov    0x44(%esp,1),%esi
46a: 8b 51 08              mov    0x8(%ecx),%edx
46d: 8b 41 04              mov    0x4(%ecx),%eax
470: 8b 4e 08              mov    0x8(%esi),%ecx
473: 0f af d5              imul   %ebp,%edx
476: 8d 14 1a              lea    (%edx,%ebx,1),%edx
479: 8d 14 90              lea    (%eax,%edx,4),%edx
47c: 8b 44 24 18           mov    0x18(%esp,1),%eax
480: 0f af c1              imul   %ecx,%eax
483: 8b 4e 04              mov    0x4(%esi),%ecx
486: 8d 04 18              lea    (%eax,%ebx,1),%eax
489: 43                    inc    %ebx
48a: 39 fb                 cmp    %edi,%ebx
48c: d8 4c 81 fc           fmuls  0xfffffffc(%ecx,%eax,4)
490: d8 42 fc              fadds  0xfffffffc(%edx)
493: d9 5a fc              fstps  0xfffffffc(%edx)
496: 7e c8                 jle    460 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0xd0>

498: dd d8                 fstp   %st(0)
49a: ff 44 24 24           incl   0x24(%esp,1)
49e: 8b 44 24 1c           mov    0x1c(%esp,1),%eax
4a2: 39 44 24 24           cmp    %eax,0x24(%esp,1)
4a6: 0f 8e 74 ff ff ff     jle    420 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x90>

```

Index Nav: Message Nav: [Date Index] [Subject Index] [Author Index] [Thread Index] [Date Prev] [Date Next] [Thread Prev] [Thread Next]