This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

-O4 needed


I compared the running times of a mergesort program (follows below)
compiled with cc -fast (UltraSparcI-Sun 5.6) and gcc -O3 (egcs-1.1a):

100000 elements
   cc -fast:   200 ms
   gcc -O3 :   242 ms

Hmm... not good.

Then I tried to use more optimization flags:

100000 elements
   cc -fast -xO5:   195 ms
   gcc -O3 -fstrength-reduce -fthread-jumps -fcse-follow-jumps
-fcse-skip-blocks -frerun-cse-after-loop -frerun-loop-opt -fgcse
-fexpensive-optimizations -fregmove -fschedule-insns -fschedule-insns2
-ffunction-sections -fcaller-saves -funroll-loops -fmove-all-movables
-freduce-all-givs -fstrict-aliasing
                :   202 ms

Hmm... much better. 20% better than just -O3, and almost as fast as
Suns compiler.

My question is: why not just put all those extra optimizations flags
(or almost all) that not are used in -O3 in a new -O4 flag. I think
that 20% i a very significant improvement.

Thanks,
Patrik Hägglund



/* log, ceil */
# include <math.h>

/* gethrtime */
#include <sys/time.h>

/* printf */
#include <stdio.h>

/* malloc, rand */
#include <stdlib.h>

/* alloca */
#include <alloca.h>

typedef int item;

typedef struct { int *a; int n; } array_t;

/* #define COMP_COUNT */

#ifdef COMP_COUNT
int comp=0;
#endif

#ifdef COMP_COUNT
#define ordered(a, b) comp++, a <= b
#else
#define ordered(a, b) a <= b
#endif

array_t tmp;

/* n=10000: cc -fast 200 ms, gcc -O3 242 ms  */
void
merge(array_t a1, array_t a2) 
{
    
    int p1, p2, r;

/*     if( ordered(*(a1.past-1), *a2.a) 
	return; 
	*/

    /* copy one part to temporary storage */
/*     tmp.a = alloca(a1.n * sizeof(item)); */
    tmp.n = a1.n;

    for(r = 0 ; r < a1.n ; r++)
	tmp.a[r]= a1.a[r];
    
    /* merge */
    p1 = 0;
    p2 = 0;
    r = 0;
    
    for(;;) 
	if( ordered(tmp.a[p1], a2.a[p2]) ) {
	    a1.a[r++] = tmp.a[p1++];
	    if( p1 >= tmp.n ) 
		break;
	}
	else {
	    a1.a[r++] = a2.a[p2++];
	    if( p2 >= a2.n ) 
		break;
	    
	}

    /* move tmp-tail */
    for(; p1 < tmp.n;)
	a1.a[r++] = tmp.a[p1++];
}

void
mergesort(array_t array) 
{
    array_t left, right;
    
    left.a = array.a;
    left.n = array.n/2;
    right.a = left.a + left.n;
    right.n = array.n - left.n;
    if( left.n > 1 ) 
	mergesort(left);
    if( right.n > 1 ) 
	mergesort(right);
    merge(left, right);
}

int
main(int argc, char *argv[]) 
{
    array_t *lists;
    int i, j, nr, mul;
    hrtime_t t1, t2;
    double c;
    int max=0, min=10000000, sum=0;
    
    /* number of elements */
    nr = atoi(argv[1]);
    /* number of test iterations */
    mul = atoi(argv[2]);
    lists = alloca(mul * sizeof(array_t));

    tmp.a = alloca( (nr/2+1) * sizeof(int) );

    for(j = 0; j < mul; j++) {
	
	lists[j].a = malloc(nr * sizeof(int));
	lists[j].n = nr;
    
	for(i = 0; i < nr; i++) {
	    lists[j].a[i] = rand();
	}

    }

    t1 = gethrtime();

    for(j = 0; j < mul; j++) {
#ifdef COMP_COUNT
	comp = 0;
#endif
	mergesort(lists[j]);
#ifdef COMP_COUNT
	sum += comp;
	if (max < comp) 
	    max = comp;
	if (min > comp)
	    min = comp;
	
#endif
    }
    
    t2 = gethrtime();

    printf("n=%d, msec: %f\n", nr, (t2 - t1)/1.0e6/mul);

#ifdef COMP_COUNT
    c = 0;
    for(i = 0; i < nr; i++)
	c += log (i+1);
	
    printf("number of comp, max %d, min %d, avg %d (theoretical: %d)\n", max, min, sum/mul, (int)ceil(c/log(2)) );
#endif
    
    if (argc > 3) {
	for(j = 0; j < mul; j++) {

	    for(i = 0; i < nr; i++) {
		printf("%6d ", lists[j].a[i]);
		
	    }
	    printf("\n\n");
	}
	
    }
    
    return 0;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]