This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: Performance of Integer Multiplication on PIII



For those of you who are interested I refined my code a bit and here are
the results.

$ gcc -O2 -march=i686 read-empty.c read.hand.s t.c && ./a.out
Loop: 1.33, Code: 1.94
Clocks: 14.45

$ icc read-empty.c read.c t.c && ./a.out
Loop: 1.10, Code: 1.97
Clocks: 14.68

$ gcc -O2 -march=i686 read-empty.c read.c t.c && ./a.out
Loop: 1.33, Code: 3.57
Clocks: 26.60

The file read.intel.s is the assembly output from the Intel assembly after
being converted into Intel syntax.  Similarly the file read.gcc.s is the
output from gcc. The file read.hand.s is my assembler code which is
modified a bit from what I posted last time.

When looking at the code from the Intel compiler it is doing almost the
same thing I am expect that it is first moving the vales from the array
into a register before multiplying.

For those of you forgot this is on a Pentium III running at 500 MHz.

Finally, just for comparison sakes:

$ gcc -O2 read-empty.c read.c t.c && ./a.out
Loop: 1.30, Code: 5.48
Clocks: 40.82


#include <stdio.h>
#include <assert.h>
#include <time.h>

unsigned read(const unsigned *spl);

static const unsigned iter      = 0X4000000;
static const double   cpu_speed = 499.947*1e6;
static       double   loop_time;

int main()
{
  time_t start,stop;
  double time;
  unsigned a[8] = {1, 2, 3, 4, 
		   1, 2, 3, 4};
  unsigned i;
  i = read(a);
  if (i != 2366519)
    printf("Warning Wrong Value: %f\n", i);
  start = clock();
  for (i = 0; i != iter; ++i)
    read_empty(a);
  stop = clock();
  loop_time = (stop-start)/(double)CLOCKS_PER_SEC;
  start = clock();
  for (i = 0; i != iter; ++i)
    read(a);
  stop = clock();
  time = (stop-start)/(double)CLOCKS_PER_SEC - loop_time;
  printf("Loop: %.2f, Code: %.2f\n", loop_time, time);
  printf("Clocks: %.2f\n", time*cpu_speed/iter);
  return 0;
}
unsigned read(const unsigned * spl)
{
  return
    spl[0] +
    spl[1] * 7 +
    spl[2] * 7*7 +
    spl[3] * 7*7*7 +
    spl[4] * 7*7*7*7 +
    spl[5] * 7*7*7*7*6 +
    spl[6] * 7*7*7*7*6*6 +
    spl[7] * 7*7*7*7*6*6*6;
}
.intel_syntax noprefix
.globl read
	.type	 read,@function
read:
	mov edx,[esp+4]
	mov eax,[edx+0*4]
	imul ecx,[edx+1*4],7
	add eax,ecx
	imul ecx,[edx+2*4],7*7
	add eax,ecx	
	imul ecx,[edx+3*4],7*7*7
	add eax,ecx	
	imul ecx,[edx+4*4],7*7*7*7
	add eax,ecx	
	imul ecx,[edx+5*4],7*7*7*7*6
	add eax,ecx	
	imul ecx,[edx+6*4],7*7*7*7*6*6
	add eax,ecx	
	imul ecx,[edx+7*4],7*7*7*7*6*6*6
	add eax,ecx
	ret
.end_read:
	.size	 read,.end_read-read
        ;FILE "read.c"
gcc2_compiled.: 
SECTION .text
        ALIGN 16
GLOBAL read
        GLOBAL read:function
read: 
        push    ebp
        mov     ebp,esp
        push    edi
        mov     edi, [ebp+8]
        push    esi
        push    ebx
        mov     eax, [edi+20]
        mov     edx, [edi+24]
        mov     ecx, [edi+16]
        imul    eax,eax,14406
        imul    edx,edx,86436
        lea     ebx, [ecx+ecx*4]
        add     eax,edx
        mov     edx, [edi+12]
        lea     esi, [edx+edx*8]
        lea     esi, [edx+esi*2]
        lea     esi, [esi+esi*8]
        lea     esi, [edx+esi*2]
        mov     edx,ebx
        sal     edx,4
        sub     edx,ebx
        mov     ebx, [edi+8]
        sal     edx,5
        add     edx,ecx
        add     esi,edx
        mov     edx, [edi+4]
        lea     ecx, [edx*8+0]
        sub     ecx,edx
        lea     edx, [ebx+ebx*2]
        sal     edx,4
        add     edx,ebx
        pop     ebx
        add     ecx,edx
        mov     edx, [edi]
        add     ecx,edx
        mov     edx, [edi+28]
        add     esi,ecx
        add     eax,esi
        pop     esi
        imul    edx,edx,518616
        pop     edi
        add     eax,edx
        pop     ebp
        ret
.Lfe1: 
        GLOBAL   read:function (.Lfe1-read)
        ;IDENT "GCC: (GNU) 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)"

; -- Machine type PX
; mark_description "Intel(R) C++ Compiler for 32-bit applications, Version 5.0.1   Build 010730D0";
; mark_description "-tp p6 -long_double -D__int64=long long -S";
        ;IDENT "Intel(R) C++ Compiler for 32-bit applications, Version 5.0.1   Build 010730D0"
        ;IDENT "-tp p6 -long_double -D__int64=long long -S"
        ;FILE "read.c"
        SECTION .text
        SECTION .data
        ALIGN 4
        SECTION .bss
        ALIGN 4
        ;IDENT "-?comment:Intel(R) C++ Compiler for 32-bit applications, Version 5.0.1   Build 010730D0  : read.c : -tp p6 -long_double -D__int64=long long -S"
        SECTION .data
        SECTION .text
; -- Begin  read
; mark_begin;
       ALIGN 4, db 090h
; parameter 1: 4 + %esp
        GLOBAL   read
read: 
.B1.1:                           ; Preds .B1.0
        mov       ecx, [esp+4]                                  ;1.10
        mov       edx, [ecx+4]                                  ;5.5
        lea       eax, [edx+edx]                                ;5.5
        add       eax,eax                                       ;5.5
        add       eax,eax                                       ;5.5
        sub       eax,edx                                       ;5.5
        mov       edx, [ecx+8]                                  ;6.5
        add       eax, [ecx]                                    ;5.5
        imul      edx,edx,49                                    ;6.5
        add       eax,edx                                       ;6.5
        mov       edx, [ecx+12]                                 ;7.5
        imul      edx,edx,343                                   ;7.5
        add       eax,edx                                       ;7.5
        mov       edx, [ecx+16]                                 ;8.5
        imul      edx,edx,2401                                  ;8.5
        add       eax,edx                                       ;8.5
        mov       edx, [ecx+20]                                 ;9.5
        imul      edx,edx,14406                                 ;9.5
        add       eax,edx                                       ;9.5
        mov       edx, [ecx+24]                                 ;10.5
        imul      edx,edx,86436                                 ;10.5
        mov       ecx, [ecx+28]                                 ;11.5
        imul      ecx,ecx,518616                                ;11.5
        add       edx,ecx                                       ;10.5
        add       eax,edx                                       ;11.5
        ret                                                     ;11.5
        ALIGN 4, db 090h
                                ; LOE
; mark_end;
        GLOBAL read:function
        GLOBAL  read:function (.-read)
        SECTION .data
; -- End  read
        SECTION .data
; End

unsigned read_empty(const unsigned * spl)
{
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]