This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [4.5] Better support for widening multiplies

From: Bernd Schmidt <bernds_cb1 at t-online dot de>
To: Paolo Bonzini <bonzini at gnu dot org>
Cc: Steven Bosscher <stevenb dot gcc at gmail dot com>, GCC Patches <gcc-patches at gcc dot gnu dot org>
Date: Tue, 17 Feb 2009 22:00:04 +0100
Subject: Re: [4.5] Better support for widening multiplies
References: <4999B411.6010902@t-online.de> <4999CE9C.2020508@gmail.com> <4999E204.4040808@t-online.de> <4999F629.7070402@gmail.com> <499A0D24.3020801@t-online.de> <499AF2E7.2040409@gnu.org>

Paolo Bonzini wrote:
> I don't ask you to look at it right now, just if you post some testcases
> (any target will do) I'll check if my code optimizes them.

Ok.  The source is from
  http://www.edn.com/archives/1997/060597/12df_02.htm

I'm attaching edn.i below.

Note that you typically won't see a difference in generated assembly yet
between the two versions of my patch, so you'll have to compare RTL.  I
have some future plans for which it would be useful to get rid of the
unnecessary sign extensions (unfortunately the Blackfin allows fewer
base registers for a memory load with extension than for a plain
halfword load).

Bernd
-- 
This footer brought to you by insane German lawmakers.
Analog Devices GmbH      Wilhelm-Wagenfeld-Str. 6      80807 Muenchen
Sitz der Gesellschaft Muenchen, Registergericht Muenchen HRB 40368
Geschaeftsfuehrer Thomas Wessel, William A. Martin, Margaret Seif

# 1 "edn.c"
# 1 "<built-in>"
# 1 "<command line>"
# 1 "edn.c"







# 1 "edn.h" 1



void vec_mpy1(short y[], const short x[], short scaler);
int mac(const short *a, const short *b, int sqr, int *sum);
void fir(const short array1[], const short coeff[], short output[]);
void fir_no_red_ld(const short x[], const short h[], short y[]);
int latsynth(short b[], const short k[], long int n, long int f);
void iir1(const short *coefs, const short *input, short *optr, short *state);
int codebook(int mask, int bitchanged, int numbasis, int codeword, int g, const short *d, short ddim, short theta);
void jpegdct(short *d, const short *r);
# 9 "edn.c" 2





void vec_mpy1(short y[], const short x[], short scaler)
{
 int i;

 for (i = 0; i < 150; i++)
  y[i] += ((scaler * x[i]) >> 15);
}





int mac(const short *a, const short *b, int sqr, int *sum)
{
 int i;
 int dotp = *sum;

 for (i = 0; i < 150; i++) {
  dotp += b[i] * a[i];
  sqr += b[i] * b[i];
 }

 *sum = dotp;
 return sqr;
}





void fir(const short array1[], const short coeff[], short output[])
{
 int i, j, sum;

 for (i = 0; i < 100 - 50; i++) {
  sum = 0;
  for (j = 0; j < 50; j++) {
   sum += array1[i + j] * coeff[j];
  }
  output[i] = sum >> 15;
 }
}
# 64 "edn.c"
void fir_no_red_ld(const short x[], const short h[], short y[])
{
 int i, j;
 long sum0, sum1;
 short x0, x1, h0, h1;

 for (j = 0; j < 100; j += 2) {
  sum0 = 0;
  sum1 = 0;
  x0 = x[j];
  for (i = 0; i < 32; i += 2) {
   x1 = x[j + i + 1];
   h0 = h[i];
   sum0 += x0 * h0;
   sum1 += x1 * h0;
   x0 = x[j + i + 2];
   h1 = h[i + 1];
   sum0 += x1 * h1;
   sum1 += x0 * h1;
  }
  y[j] = sum0 >> 15;
  y[j + 1] = sum1 >> 15;
 }
}
# 96 "edn.c"
int latsynth(short b[], const short k[], long int n, long int f)
{
 int i;

 f -= b[n - 1] * k[n - 1];
 for (i = n - 2; i >= 0; i--) {
  f -= b[i] * k[i];
  b[i + 1] = b[i] + ((k[i] * (f >> 16)) >> 16);
 }
 b[0] = f >> 16;
 return f;
}




void iir1(const short *coefs, const short *input, short *optr,
 short *state)
{
 short x;
 short t;
 int n;

 x = input[0];
 for (n = 0; n < 50; n++) {
  t = x + ((coefs[2] * state[0] +
   coefs[3] * state[1]) >> 15);
  x = t + ((coefs[0] * state[0] +
   coefs[1] * state[1]) >> 15);

  state[1] = state[0];
  state[0] = t;
  coefs += 4;
  state += 2;
 }
 *optr++ = x;
}




int codebook(int mask, int bitchanged, int numbasis, int codeword,
  int g, const short *d, short ddim, short theta)
{
 int j;
 int tmpMask;

 tmpMask = mask << 1;
 for (j = bitchanged + 1; j <= numbasis; j++) {
  if (theta == !(!(codeword & tmpMask)))
   g += *(d + bitchanged * ddim + j);
  else
   g -= *(d + bitchanged * ddim + j);
  tmpMask <<= 1;
 }

 return g;
}





void
jpegdct(short *d, const short *r)
{
 int t[12];
 int i, j, k, m, n, p;

 for (k = 1, m = 0, n = 13, p = 8;
  k <= 8;
  k += 7, m += 3, n += 3, p -= 7, d -= 64) {
  for (i = 0; i < 8; i++, d += p) {
   for (j = 0; j < 4; j++) {
    t[j] = d[k * j] + d[k * (7 - j)];
    t[7 - j] = d[k * j] - d[k * (7 - j)];
   }
   t[8] = t[0] + t[3];
   t[9] = t[0] - t[3];
   t[10] = t[1] + t[2];
   t[11] = t[1] - t[2];
   d[0] = (t[8] + t[10]) >> m;
   d[4 * k] = (t[8] - t[10]) >> m;
   t[8] = (short) (t[11] + t[9]) * r[10];
   d[2 * k] = t[8] + (short) ((t[9] * r[9]) >> n);
   d[6 * k] = t[8] + (short) ((t[11] * r[11]) >> n);
   t[0] = (short) (t[4] + t[7]) * r[2];
   t[1] = (short) (t[5] + t[6]) * r[0];
   t[2] = t[4] + t[6];
   t[3] = t[5] + t[7];
   t[8] = (short) (t[2] + t[3]) * r[8];
   t[2] = (short) t[2] * r[1] + t[8];
   t[3] = (short) t[3] * r[3] + t[8];
   d[7 * k] = (short) (t[4] * r[4] + t[0] + t[2]) >> n;
   d[5 * k] = (short) (t[5] * r[6] + t[1] + t[3]) >> n;
   d[3 * k] = (short) (t[6] * r[5] + t[1] + t[2]) >> n;
   d[1 * k] = (short) (t[7] * r[7] + t[0] + t[3]) >> n;
  }
 }
}

References:
- [4.5] Better support for widening multiplies
  - From: Bernd Schmidt
- Re: [4.5] Better support for widening multiplies
  - From: Dave Korn
- Re: [4.5] Better support for widening multiplies
  - From: Bernd Schmidt
- Re: [4.5] Better support for widening multiplies
  - From: Dave Korn
- Re: [4.5] Better support for widening multiplies
  - From: Bernd Schmidt
- Re: [4.5] Better support for widening multiplies
  - From: Paolo Bonzini

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]