This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: Haney's real matrix test regression


Richard Kenner wrote:

> Look at the code and see why it's slower.

So, this is the "O-O C++" kernel in question, a straightforward matrix
multiplication:

////////
Real &index(Integer i, Integer j)
  {
    return d[i - 1 + n[0] * (j - 1)];
  }

Real index(Integer i, Integer j) const
  {
    return d[i - 1 + n[0] * (j - 1)];
  }

void rmatMul(RealMatrix &t, const RealMatrix &a, const RealMatrix &b)
{
  Integer i, j, k;
  const Integer M = a.dim(1), N = b.dim(2), K = b.dim(1);
  Real temp;
  const Real zero = Real(0.0);

  memset(t, 0, M * N * sizeof(Real));

  for (j = 1; j <= N; j++)
    {
      for (k = 1; k <= K; k++)
        {
          temp = b.index(k, j);
          if (temp != zero)
            {
              for (i = 1; i <= M; i++)
                t.index(i, j) += temp * a.index(i, k);
            }
        }
    }
}
////////

The two innermost loops, those over k and i, are compiled (-O2
-fomit-frame-pointer) in very different ways by 3.0.2 (and, by extrapolation,
3.1 2001102{2,3}) and 3.1 20011025; it is obvious that the 3.0.2 version is
smaller and faster (~2x, in fact).

Are there any discernible patterns that may help you diagnose the problem?

Thanks,
Paolo Carlini.

3.0.2
-----
 410: 8b 44 24 18           mov    0x18(%esp,1),%eax
 414: 8b 4c 24 48           mov    0x48(%esp,1),%ecx
 418: 8d 14 38              lea    (%eax,%edi,1),%edx
 41b: 8b 41 04              mov    0x4(%ecx),%eax
 41e: d9 44 90 fc           flds   0xfffffffc(%eax,%edx,4)
 422: dd e1                 fucom  %st(1)
 424: df e0                 fnstsw %ax
 426: 9e                    sahf
 427: 7a 02                 jp     42b <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0xcb>
 429: 74 5e                 je     489 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x129>
 42b: b9 01 00 00 00        mov    $0x1,%ecx
 430: 3b 4c 24 24           cmp    0x24(%esp,1),%ecx
 434: 7f 53                 jg     489 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x129>
 436: 8b 54 24 40           mov    0x40(%esp,1),%edx
 43a: 8b 44 24 14           mov    0x14(%esp,1),%eax
 43e: 8b 72 08              mov    0x8(%edx),%esi
 441: 8b 54 24 44           mov    0x44(%esp,1),%edx
 445: 0f af c6              imul   %esi,%eax
 448: 8b 72 08              mov    0x8(%edx),%esi
 44b: 8b 54 24 40           mov    0x40(%esp,1),%edx
 44f: c1 e0 02              shl    $0x2,%eax
 452: 8b 5a 04              mov    0x4(%edx),%ebx
 455: 0f af f5              imul   %ebp,%esi
 458: 01 d8                 add    %ebx,%eax
 45a: 8d 50 04              lea    0x4(%eax),%edx
 45d: 8b 44 24 44           mov    0x44(%esp,1),%eax
 461: 8b 58 04              mov    0x4(%eax),%ebx
 464: 8d b6 00 00 00 00     lea    0x0(%esi),%esi
 46a: 8d bf 00 00 00 00     lea    0x0(%edi),%edi

 470: 8d 04 0e              lea    (%esi,%ecx,1),%eax
 473: d9 c0                 fld    %st(0)
 475: 41                    inc    %ecx
 476: d8 4c 83 fc           fmuls  0xfffffffc(%ebx,%eax,4)
 47a: d8 42 fc              fadds  0xfffffffc(%edx)
 47d: d9 5a fc              fstps  0xfffffffc(%edx)
 480: 83 c2 04              add    $0x4,%edx
 483: 3b 4c 24 24           cmp    0x24(%esp,1),%ecx
 487: 7e e7                 jle    470 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x110>

 489: dd d8                 fstp   %st(0)
 48b: 47                    inc    %edi
 48c: 45                    inc    %ebp
 48d: 3b 7c 24 1c           cmp    0x1c(%esp,1),%edi
 491: 0f 8e 79 ff ff ff     jle    410 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0xb0>


3.1 20011025
------------
 420: 8b 54 24 48           mov    0x48(%esp,1),%edx
 424: 8b 5c 24 24           mov    0x24(%esp,1),%ebx
 428: 8b 42 08              mov    0x8(%edx),%eax
 42b: 8b 52 04              mov    0x4(%edx),%edx
 42e: 0f af c5              imul   %ebp,%eax
 431: 01 d8                 add    %ebx,%eax
 433: d9 44 82 fc           flds   0xfffffffc(%edx,%eax,4)
 437: dd e1                 fucom  %st(1)
 439: df e0                 fnstsw %ax
 43b: 9e                    sahf
 43c: 7a 02                 jp     440 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0xb0>
 43e: 74 58                 je     498 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x108>
 440: bb 01 00 00 00        mov    $0x1,%ebx
 445: 39 fb                 cmp    %edi,%ebx
 447: 7f 4f                 jg     498 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x108>
 449: 8b 54 24 24           mov    0x24(%esp,1),%edx
 44d: 4a                    dec    %edx
 44e: 89 54 24 18           mov    %edx,0x18(%esp,1)
 452: 8d b4 26 00 00 00 00  lea    0x0(%esi,1),%esi
 459: 8d bc 27 00 00 00 00  lea    0x0(%edi,1),%edi

 460: 8b 4c 24 40           mov    0x40(%esp,1),%ecx
 464: d9 c0                 fld    %st(0)
 466: 8b 74 24 44           mov    0x44(%esp,1),%esi
 46a: 8b 51 08              mov    0x8(%ecx),%edx
 46d: 8b 41 04              mov    0x4(%ecx),%eax
 470: 8b 4e 08              mov    0x8(%esi),%ecx
 473: 0f af d5              imul   %ebp,%edx
 476: 8d 14 1a              lea    (%edx,%ebx,1),%edx
 479: 8d 14 90              lea    (%eax,%edx,4),%edx
 47c: 8b 44 24 18           mov    0x18(%esp,1),%eax
 480: 0f af c1              imul   %ecx,%eax
 483: 8b 4e 04              mov    0x4(%esi),%ecx
 486: 8d 04 18              lea    (%eax,%ebx,1),%eax
 489: 43                    inc    %ebx
 48a: 39 fb                 cmp    %edi,%ebx
 48c: d8 4c 81 fc           fmuls  0xfffffffc(%ecx,%eax,4)
 490: d8 42 fc              fadds  0xfffffffc(%edx)
 493: d9 5a fc              fstps  0xfffffffc(%edx)
 496: 7e c8                 jle    460 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0xd0>

 498: dd d8                 fstp   %st(0)
 49a: ff 44 24 24           incl   0x24(%esp,1)
 49e: 8b 44 24 1c           mov    0x1c(%esp,1),%eax
 4a2: 39 44 24 24           cmp    %eax,0x24(%esp,1)
 4a6: 0f 8e 74 ff ff ff     jle    420 <rmatMul(RealMatrix&, RealMatrix const&,
RealMatrix const&)+0x90>



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]