This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: -fnew-ra -- which architectures?
- From: Falk Hueffner <falk dot hueffner at student dot uni-tuebingen dot de>
- To: Daniel Berlin <dberlin at dberlin dot org>
- Cc: gcc at gcc dot gnu dot org
- Date: 16 Jul 2002 02:00:17 +0200
- Subject: Re: -fnew-ra -- which architectures?
- References: <Pine.LNX.4.44.0207151920300.8950-100000@dberlin.org>
Daniel Berlin <dberlin@dberlin.org> writes:
> > Thanks, this fixes the problem. Improvements are moderate on my test
> > case, though:
> >
> > number of stack writes
> > gcc -O1 0
> > Compaq Compiler 11 (the caller saved registers)
> > gcc -O3 119
> > gcc -O3 -fnew-ra 101
> >
> > Are you interested in the test case?
>
> SUre, if course like, though a 20% reduction is nothing to sneeze
> at.
It would be great normally, but in this case, most or all of the
stores could be eliminated, as -O1 shows, and I had hoped the new
regalloc would manage that.
> There is also a few subreg related changes to df.c that might help,
> if your test case's rtl has subregs.
It doesn't seem to affect this function...
--
Falk
#define CONST_BITS_JA 11
#define VAL_BITS 11
#define ALLBITS (CONST_BITS_JA + VAL_BITS)
#define TWO (1 + CONST_BITS_JA)
#define C6 1567
#define C4 2896
#define Q 2217
#define C4Q 3135
#define R 5352
#define C4R 7568
#define INT long
#define DOTFIVE ((1 << ALLBITS) / 2)
#define TOSHORT(x) ((x) >> ALLBITS)
#define ROUND(x) TOSHORT((x) + DOTFIVE)
void j_rev_dct(short* shortcoeff)
{
INT tmp, tmp1, tmp2, tmp7;
INT matr2_0, matr2_4, matr2_5, matr2_6, matr2_7;
INT matr2_32;
INT matr2_40;
INT matr2_48;
INT matr2_56;
matr2_0 = 256 * shortcoeff[0] * (1 << CONST_BITS_JA);
if (shortcoeff[1]) {
INT coeff1 = 355 * shortcoeff[1];
tmp = C6 * coeff1;
matr2_32 = -tmp;
matr2_48 = R * coeff1 - tmp;
matr2_40 = C4 * coeff1;
matr2_56 = coeff1 * (1 << CONST_BITS_JA);
} else {
matr2_32 = 0;
matr2_48 = 0;
matr2_40 = 0;
matr2_56 = 0;
}
if (shortcoeff[8]) {
INT coeff8 = 355 * shortcoeff[8];
tmp = C6 * coeff8;
matr2_4 = -tmp;
matr2_5 = coeff8 * C4;
matr2_6 = R * coeff8 - tmp;
matr2_7 = coeff8 * (1 << CONST_BITS_JA);
} else {
matr2_4 = 0;
matr2_5 = 0;
matr2_6 = 0;
matr2_7 = 0;
}
//////////////////////////////////////////////////////////////////////
INT matr3_0, matr3_1, matr3_2, matr3_3, matr3_4, matr3_5, matr3_6, matr3_7;
matr3_0 = matr2_0 + matr2_7;
tmp2 = matr2_6 - matr2_7;
tmp1 = tmp2 - matr2_5;
tmp7 = matr2_4 - tmp1;
matr3_1 = matr2_0 + tmp2;
matr3_2 = matr2_0 - tmp1;
matr3_3 = matr2_0 - tmp7;
matr3_4 = matr2_0 + tmp7;
matr3_5 = matr2_0 + tmp1;
matr3_6 = matr2_0 - tmp2;
matr3_7 = matr2_0 - matr2_7;
//////////////////////////////////////////////////////////////////////
tmp2 = matr2_48 - matr2_56;
tmp1 = tmp2 - matr2_40;
tmp7 = matr2_32 - tmp1;
shortcoeff[0] = ROUND(matr3_0 + matr2_56);
shortcoeff[1] = ROUND(matr3_0 + tmp2);
shortcoeff[2] = ROUND(matr3_0 - tmp1);
shortcoeff[3] = ROUND(matr3_0 - tmp7);
shortcoeff[4] = ROUND(matr3_0 + tmp7);
shortcoeff[5] = ROUND(matr3_0 + tmp1);
shortcoeff[6] = ROUND(matr3_0 - tmp2);
shortcoeff[7] = ROUND(matr3_0 - matr2_56);
shortcoeff[ 8] = ROUND(matr3_1 + matr2_56);
shortcoeff[ 9] = ROUND(matr3_1 + tmp2);
shortcoeff[10] = ROUND(matr3_1 - tmp1);
shortcoeff[11] = ROUND(matr3_1 - tmp7);
shortcoeff[12] = ROUND(matr3_1 + tmp7);
shortcoeff[13] = ROUND(matr3_1 + tmp1);
shortcoeff[14] = ROUND(matr3_1 - tmp2);
shortcoeff[15] = ROUND(matr3_1 - matr2_56);
shortcoeff[16] = ROUND(matr3_2 + matr2_56);
shortcoeff[17] = ROUND(matr3_2 + tmp2);
shortcoeff[18] = ROUND(matr3_2 - tmp1);
shortcoeff[19] = ROUND(matr3_2 - tmp7);
shortcoeff[20] = ROUND(matr3_2 + tmp7);
shortcoeff[21] = ROUND(matr3_2 + tmp1);
shortcoeff[22] = ROUND(matr3_2 - tmp2);
shortcoeff[23] = ROUND(matr3_2 - matr2_56);
shortcoeff[24] = ROUND(matr3_3 + matr2_56);
shortcoeff[25] = ROUND(matr3_3 + tmp2);
shortcoeff[26] = ROUND(matr3_3 - tmp1);
shortcoeff[27] = ROUND(matr3_3 - tmp7);
shortcoeff[28] = ROUND(matr3_3 + tmp7);
shortcoeff[29] = ROUND(matr3_3 + tmp1);
shortcoeff[30] = ROUND(matr3_3 - tmp2);
shortcoeff[31] = ROUND(matr3_3 - matr2_56);
shortcoeff[32] = ROUND(matr3_4 + matr2_56);
shortcoeff[33] = ROUND(matr3_4 + tmp2);
shortcoeff[34] = ROUND(matr3_4 - tmp1);
shortcoeff[35] = ROUND(matr3_4 - tmp7);
shortcoeff[36] = ROUND(matr3_4 + tmp7);
shortcoeff[37] = ROUND(matr3_4 + tmp1);
shortcoeff[38] = ROUND(matr3_4 - tmp2);
shortcoeff[39] = ROUND(matr3_4 - matr2_56);
shortcoeff[40] = ROUND(matr3_5 + matr2_56);
shortcoeff[41] = ROUND(matr3_5 + tmp2);
shortcoeff[42] = ROUND(matr3_5 - tmp1);
shortcoeff[43] = ROUND(matr3_5 - tmp7);
shortcoeff[44] = ROUND(matr3_5 + tmp7);
shortcoeff[45] = ROUND(matr3_5 + tmp1);
shortcoeff[46] = ROUND(matr3_5 - tmp2);
shortcoeff[47] = ROUND(matr3_5 - matr2_56);
shortcoeff[48] = ROUND(matr3_6 + matr2_56);
shortcoeff[49] = ROUND(matr3_6 + tmp2);
shortcoeff[50] = ROUND(matr3_6 - tmp1);
shortcoeff[51] = ROUND(matr3_6 - tmp7);
shortcoeff[52] = ROUND(matr3_6 + tmp7);
shortcoeff[53] = ROUND(matr3_6 + tmp1);
shortcoeff[54] = ROUND(matr3_6 - tmp2);
shortcoeff[55] = ROUND(matr3_6 - matr2_56);
shortcoeff[56] = ROUND(matr3_7 + matr2_56);
shortcoeff[57] = ROUND(matr3_7 + tmp2);
shortcoeff[58] = ROUND(matr3_7 - tmp1);
shortcoeff[59] = ROUND(matr3_7 - tmp7);
shortcoeff[60] = ROUND(matr3_7 + tmp7);
shortcoeff[61] = ROUND(matr3_7 + tmp1);
shortcoeff[62] = ROUND(matr3_7 - tmp2);
shortcoeff[63] = ROUND(matr3_7 - matr2_56);
}