This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
-O4 needed
- To: egcs at cygnus dot com
- Subject: -O4 needed
- From: Patrik Hagglund <patha at ida dot liu dot se>
- Date: Fri, 11 Sep 1998 10:41:30 +0200
I compared the running times of a mergesort program (follows below)
compiled with cc -fast (UltraSparcI-Sun 5.6) and gcc -O3 (egcs-1.1a):
100000 elements
cc -fast: 200 ms
gcc -O3 : 242 ms
Hmm... not good.
Then I tried to use more optimization flags:
100000 elements
cc -fast -xO5: 195 ms
gcc -O3 -fstrength-reduce -fthread-jumps -fcse-follow-jumps
-fcse-skip-blocks -frerun-cse-after-loop -frerun-loop-opt -fgcse
-fexpensive-optimizations -fregmove -fschedule-insns -fschedule-insns2
-ffunction-sections -fcaller-saves -funroll-loops -fmove-all-movables
-freduce-all-givs -fstrict-aliasing
: 202 ms
Hmm... much better. 20% better than just -O3, and almost as fast as
Suns compiler.
My question is: why not just put all those extra optimizations flags
(or almost all) that not are used in -O3 in a new -O4 flag. I think
that 20% i a very significant improvement.
Thanks,
Patrik Hägglund
/* log, ceil */
# include <math.h>
/* gethrtime */
#include <sys/time.h>
/* printf */
#include <stdio.h>
/* malloc, rand */
#include <stdlib.h>
/* alloca */
#include <alloca.h>
typedef int item;
typedef struct { int *a; int n; } array_t;
/* #define COMP_COUNT */
#ifdef COMP_COUNT
int comp=0;
#endif
#ifdef COMP_COUNT
#define ordered(a, b) comp++, a <= b
#else
#define ordered(a, b) a <= b
#endif
array_t tmp;
/* n=10000: cc -fast 200 ms, gcc -O3 242 ms */
void
merge(array_t a1, array_t a2)
{
int p1, p2, r;
/* if( ordered(*(a1.past-1), *a2.a)
return;
*/
/* copy one part to temporary storage */
/* tmp.a = alloca(a1.n * sizeof(item)); */
tmp.n = a1.n;
for(r = 0 ; r < a1.n ; r++)
tmp.a[r]= a1.a[r];
/* merge */
p1 = 0;
p2 = 0;
r = 0;
for(;;)
if( ordered(tmp.a[p1], a2.a[p2]) ) {
a1.a[r++] = tmp.a[p1++];
if( p1 >= tmp.n )
break;
}
else {
a1.a[r++] = a2.a[p2++];
if( p2 >= a2.n )
break;
}
/* move tmp-tail */
for(; p1 < tmp.n;)
a1.a[r++] = tmp.a[p1++];
}
void
mergesort(array_t array)
{
array_t left, right;
left.a = array.a;
left.n = array.n/2;
right.a = left.a + left.n;
right.n = array.n - left.n;
if( left.n > 1 )
mergesort(left);
if( right.n > 1 )
mergesort(right);
merge(left, right);
}
int
main(int argc, char *argv[])
{
array_t *lists;
int i, j, nr, mul;
hrtime_t t1, t2;
double c;
int max=0, min=10000000, sum=0;
/* number of elements */
nr = atoi(argv[1]);
/* number of test iterations */
mul = atoi(argv[2]);
lists = alloca(mul * sizeof(array_t));
tmp.a = alloca( (nr/2+1) * sizeof(int) );
for(j = 0; j < mul; j++) {
lists[j].a = malloc(nr * sizeof(int));
lists[j].n = nr;
for(i = 0; i < nr; i++) {
lists[j].a[i] = rand();
}
}
t1 = gethrtime();
for(j = 0; j < mul; j++) {
#ifdef COMP_COUNT
comp = 0;
#endif
mergesort(lists[j]);
#ifdef COMP_COUNT
sum += comp;
if (max < comp)
max = comp;
if (min > comp)
min = comp;
#endif
}
t2 = gethrtime();
printf("n=%d, msec: %f\n", nr, (t2 - t1)/1.0e6/mul);
#ifdef COMP_COUNT
c = 0;
for(i = 0; i < nr; i++)
c += log (i+1);
printf("number of comp, max %d, min %d, avg %d (theoretical: %d)\n", max, min, sum/mul, (int)ceil(c/log(2)) );
#endif
if (argc > 3) {
for(j = 0; j < mul; j++) {
for(i = 0; i < nr; i++) {
printf("%6d ", lists[j].a[i]);
}
printf("\n\n");
}
}
return 0;
}