This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: gomp slowness
On Thu, 2007-10-18 at 12:02 +0800, Biplab Kumar Modak wrote:
> skaller wrote:
> > On Wed, 2007-10-17 at 18:14 +0100, Biagio Lucini wrote:
> >> skaller wrote:
> >
> >> It would be interesting to try with another compiler. Do you have access
> >> to another OpenMP-enabled compiler?
> >
> > Unfortunately no, unless MSVC++ in VS2005 has openMP.
> > I have an Intel licence but they're too tied up with commerical
> > vendors and it doesn't work on Ubuntu (it's built for Fedora and Suse).
> >
> If possible, you can post the source code. I've a MSVC 2005 license (I
> bought it to get OpenMP working with it).
>
> I can then give it a try. I have a dual core PC. :)
OK, attached.
--
John Skaller <skaller at users dot sf dot net>
Felix, successor to C++: http://felix.sf.net
/*
* LU.c
*
* A prgram to d an LU decomposition.
*
*/
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#define SIZE 800
int main(int argc, char *argv[])
{
double start, stop; /* for keeping track of running time */
double A[SIZE][SIZE];
double col[SIZE], row[SIZE];
int i, j, k, n;
/* preload A with random values */
for (i = 0; i<SIZE; i++)
for (j = 0; j<SIZE; j++)
A[i][j] = rand();
/* time start now */
start = clock();
/* The core algorithm */
// #pragma omp parallel shared(A, col, row)
for (k = 0; k<SIZE-1; k++) {
/* set col values to column k of A */
for (n = k; n<SIZE; n++) {
col[n] = A[n][k];
}
/* scale values of A by multiplier */
for (n = k+1; n<SIZE; n++) {
A[k][n] /= col[k];
}
/* set row values to row k of A */
for (n = k+1; n<SIZE; n++) {
row[n] = A[k][n];
}
/* Here we update A by subtracting the appropriate values from row
and column. Note that these adjustments to A can be done in
any order */
#pragma omp parallel for shared(A, row, col)
for (i = k+1; i<SIZE; i++) {
for (j = k+1; j<SIZE; j++) {
A[i][j] = A[i][j] - row[i] * col[j];
}
}
}
/* we're done so stop the timer */
stop = clock();
printf("Completed decomposition in %.3f seconds\n", (stop-start)/CLOCKS_PER_SEC);
return 0;
}
/*
* combined.c
*
* This program combines what we saw before. It calculates e and pi
* and then integrates the x^2. We also print out the elapsed time in
* ms at several points in our program. We have replaced the function y=x^2
* with a more complex polynomial 3x^3 + 2x^2 + x.
*/
#include <stdio.h>
#include <time.h>
#define num_steps 10000000 /* steps to use in taylor expansions */
#define int_steps (1<<30) /* steps to use in integration */
int main(int argc, char *argv[])
{
double start, stop; /* times of beginning and end of procedure */
/* Values for part 1 */
double e, pi, factorial, product;
int i;
/* Values for part 2 */
double sum;
double x;
/* start the timer */
start = clock();
#pragma omp parallel reduction(+: sum)
{
#pragma omp sections nowait
{
#pragma omp section
{
/* First we calculate e from its taylor expansion */
printf("e started at %.0f\n", clock()-start);
e = 1;
factorial = 1;
for (i = 1; i<num_steps; i++) {
factorial *= i;
e += 1.0/factorial;
}
printf("e done at %.0f\n", clock()-start);
}
#pragma omp section
{
/* Then we calculate pi from its taylor expansion */
printf("pi started at %.0f\n", clock()-start);
pi = 0;
for (i = 0; i < num_steps*20; i++) {
pi += 1.0/(i*4.0 + 1.0);
pi -= 1.0/(i*4.0 + 3.0);
}
pi = pi * 4.0;
printf("pi done at %.0f\n", clock()-start);
}
} /* sections */
/* Now we integrate the function */
printf("integration started at %.0f\n", clock()-start);
sum = 0;
#pragma omp for nowait
for (i = 0; i<int_steps; i++) {
x = 2.0 * (double)i / (double)(int_steps); /* value of x */
sum += ( 3*x*x*x + 2*x*x + x ) / int_steps;
}
#pragma omp single /* we only need to print this once */
printf("integration done at %.0f\n", clock()-start);
#pragma omp barrier
/* make sure all threads are caught up before we do the multiplication */
product = e * pi;
} /* omp parallel */
/* we're done so stop the timer */
stop = clock();
printf("Values: e*pi = %f, integral = %f\n", product, sum);
printf("Total elapsed time: %.3f seconds\n", (stop-start)/1000);
return 0;
}