This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: gomp slowness

From: skaller <skaller at users dot sourceforge dot net>
To: Biplab Kumar Modak <bkmodak at gmail dot com>
Cc: gcc at gcc dot gnu dot org
Date: Thu, 18 Oct 2007 14:47:44 +1000
Subject: Re: gomp slowness
References: <1192640402.10798.11.camel@rosella.wigram> <4716430A.5010204@swansea.ac.uk> <1192668349.25512.2.camel@rosella.wigram> <ff6m44$9g2$1@ger.gmane.org>

On Thu, 2007-10-18 at 12:02 +0800, Biplab Kumar Modak wrote:
> skaller wrote:
> > On Wed, 2007-10-17 at 18:14 +0100, Biagio Lucini wrote:
> >> skaller wrote:
> > 
> >> It would be interesting to try with another compiler. Do you have access 
> >> to another OpenMP-enabled compiler?
> > 
> > Unfortunately no, unless MSVC++ in VS2005 has openMP.
> > I have an Intel licence but they're too tied up with commerical
> > vendors and it doesn't work on Ubuntu (it's built for Fedora and Suse).
> > 
> If possible, you can post the source code. I've a MSVC 2005 license (I 
> bought it to get OpenMP working with it).
> 
> I can then give it a try. I have a dual core PC. :)

OK, attached.


-- 
John Skaller <skaller at users dot sf dot net>
Felix, successor to C++: http://felix.sf.net

/*
 * LU.c
 * 
 * A prgram to d an LU decomposition.
 *
 */

#include <time.h>
#include <stdio.h>
#include <stdlib.h>

#define SIZE 800

int main(int argc, char *argv[])
{
  double start, stop; /* for keeping track of running time */
  double A[SIZE][SIZE];
  double col[SIZE], row[SIZE];
  int i, j, k, n;

  /* preload A with random values */
  for (i = 0; i<SIZE; i++)
    for (j = 0; j<SIZE; j++)
      A[i][j] = rand();

  /* time start now */
  start = clock();

  /* The core algorithm */
  // #pragma omp parallel shared(A, col, row)
  for (k = 0; k<SIZE-1; k++) {
    /* set col values to column k of A */
    for (n = k; n<SIZE; n++) {
      col[n] = A[n][k];
    }

    /* scale values of A by multiplier */
    for (n = k+1; n<SIZE; n++) {
      A[k][n] /= col[k];
    }

    /* set row values to row k of A */
    for (n = k+1; n<SIZE; n++) {
      row[n] = A[k][n];
    }

    /* Here we update A by subtracting the appropriate values from row
       and column.  Note that these adjustments to A can be done in
       any order */
#pragma omp parallel for shared(A, row, col)
    for (i = k+1; i<SIZE; i++) {
      for (j = k+1; j<SIZE; j++) {
	A[i][j] = A[i][j] - row[i] * col[j];
      }
    }
  }

  /* we're done so stop the timer */
  stop = clock();

  printf("Completed decomposition in %.3f seconds\n", (stop-start)/CLOCKS_PER_SEC);

  

  return 0;
}

/*
 * combined.c
 *
 * This program combines what we saw before.  It calculates e and pi
 * and then integrates the x^2.  We also print out the elapsed time in
 * ms at several points in our program.  We have replaced the function y=x^2
 * with a more complex polynomial 3x^3 + 2x^2 + x.
 */

#include <stdio.h>
#include <time.h>

#define num_steps 10000000 /* steps to use in taylor expansions */
#define int_steps (1<<30)  /* steps to use in integration */

int main(int argc, char *argv[])
{
  double start, stop; /* times of beginning and end of procedure */

  /* Values for part 1 */
  double e, pi, factorial, product;
  int i;

  /* Values for part 2 */
  double sum;
  double x;

  /* start the timer */
  start = clock();

#pragma omp parallel reduction(+: sum)
  {
#pragma omp sections nowait
    {
#pragma omp section
      {
	/* First we calculate e from its taylor expansion */
	printf("e started at %.0f\n", clock()-start);
	e = 1;
	factorial = 1;
	for (i = 1; i<num_steps; i++) {
	  factorial *= i;
	  e += 1.0/factorial;
	}
	printf("e done at %.0f\n", clock()-start);
      }
#pragma omp section
      {
	/* Then we calculate pi from its taylor expansion */
	printf("pi started at %.0f\n", clock()-start);
	
	pi = 0;
	for (i = 0; i < num_steps*20; i++) {
	  pi += 1.0/(i*4.0 + 1.0);
	  pi -= 1.0/(i*4.0 + 3.0);
	}
	pi = pi * 4.0;
	printf("pi done at %.0f\n", clock()-start);
      }
    } /* sections */

    /* Now we integrate the function */
    printf("integration started at %.0f\n", clock()-start);
    sum = 0;
#pragma omp for nowait
    for (i = 0; i<int_steps; i++) {
      x = 2.0 * (double)i / (double)(int_steps); /* value of x */
      sum += ( 3*x*x*x + 2*x*x + x ) / int_steps;
    }

#pragma omp single /* we only need to print this once */
    printf("integration done at %.0f\n", clock()-start);


#pragma omp barrier
    /* make sure all threads are caught up before we do the multiplication */
    product = e * pi;
  
  } /* omp parallel */

  /* we're done so stop the timer */
  stop = clock();

  printf("Values: e*pi = %f,  integral = %f\n", product, sum);
  printf("Total elapsed time: %.3f seconds\n", (stop-start)/1000);

  return 0;
}

Follow-Ups:
- Re: gomp slowness
  - From: Biplab Kumar Modak
- Re: gomp slowness
  - From: Jakub Jelinek
- Re: gomp slowness
  - From: Tim Prince
- Re: gomp slowness
  - From: Tomash Brechko

References:
- gomp slowness
  - From: skaller
- Re: gomp slowness
  - From: Biagio Lucini
- Re: gomp slowness
  - From: skaller
- Re: gomp slowness
  - From: Biplab Kumar Modak

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]