This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: -fnew-ra -- which architectures?


Daniel Berlin <dberlin@dberlin.org> writes:

> > Thanks, this fixes the problem. Improvements are moderate on my test
> > case, though:
> > 
> >                   number of stack writes
> > gcc -O1             0
> > Compaq Compiler    11 (the caller saved registers)
> > gcc -O3           119
> > gcc -O3 -fnew-ra  101
> > 
> > Are you interested in the test case?
>
> SUre, if course like, though a 20% reduction is nothing to sneeze
> at.

It would be great normally, but in this case, most or all of the
stores could be eliminated, as -O1 shows, and I had hoped the new
regalloc would manage that.

> There is also a few subreg related changes to df.c that might help,
> if your test case's rtl has subregs.

It doesn't seem to affect this function...

-- 
	Falk

#define CONST_BITS_JA 11
#define VAL_BITS 11
#define ALLBITS (CONST_BITS_JA + VAL_BITS)
#define TWO (1 + CONST_BITS_JA)

#define C6  1567
#define C4  2896
#define Q   2217
#define C4Q 3135
#define R   5352
#define C4R 7568

#define INT long

#define DOTFIVE ((1 << ALLBITS) / 2)
#define TOSHORT(x) ((x) >> ALLBITS)

#define ROUND(x) TOSHORT((x) + DOTFIVE)

void j_rev_dct(short* shortcoeff)
{
    INT tmp, tmp1, tmp2, tmp7;

    INT matr2_0, matr2_4, matr2_5, matr2_6, matr2_7;
    INT matr2_32;
    INT matr2_40;
    INT matr2_48;
    INT matr2_56;

    matr2_0 = 256 * shortcoeff[0] * (1 << CONST_BITS_JA);

    if (shortcoeff[1]) {
	INT coeff1 = 355 * shortcoeff[1];
	tmp = C6 * coeff1;
	matr2_32 = -tmp;
	matr2_48 = R * coeff1 - tmp;
	matr2_40 = C4 * coeff1;
	matr2_56 = coeff1 * (1 << CONST_BITS_JA);
    } else {
	matr2_32 = 0;
	matr2_48 = 0;
	matr2_40 = 0;
	matr2_56 = 0;
    }

    if (shortcoeff[8]) {
	INT coeff8 = 355 * shortcoeff[8];
	tmp = C6 * coeff8;
	matr2_4 = -tmp;
	matr2_5 = coeff8 * C4;
	matr2_6 = R * coeff8 - tmp;
	matr2_7 = coeff8 * (1 << CONST_BITS_JA);
    } else {
	matr2_4 = 0;
	matr2_5 = 0;
	matr2_6 = 0;
	matr2_7 = 0;
    }

    //////////////////////////////////////////////////////////////////////
    INT matr3_0, matr3_1, matr3_2, matr3_3, matr3_4, matr3_5, matr3_6, matr3_7;

    matr3_0 = matr2_0 + matr2_7;

    tmp2 = matr2_6 - matr2_7;
    tmp1 = tmp2 - matr2_5;
    tmp7 = matr2_4 - tmp1;

    matr3_1 = matr2_0 + tmp2;
    matr3_2 = matr2_0 - tmp1;
    matr3_3 = matr2_0 - tmp7;
    matr3_4 = matr2_0 + tmp7;
    matr3_5 = matr2_0 + tmp1;
    matr3_6 = matr2_0 - tmp2;
    matr3_7 = matr2_0 - matr2_7;

    //////////////////////////////////////////////////////////////////////
    tmp2 = matr2_48 - matr2_56;
    tmp1 = tmp2 - matr2_40;
    tmp7 = matr2_32 - tmp1;

    shortcoeff[0] = ROUND(matr3_0 + matr2_56);
    shortcoeff[1] = ROUND(matr3_0 + tmp2);
    shortcoeff[2] = ROUND(matr3_0 - tmp1);
    shortcoeff[3] = ROUND(matr3_0 - tmp7);
    shortcoeff[4] = ROUND(matr3_0 + tmp7);
    shortcoeff[5] = ROUND(matr3_0 + tmp1);
    shortcoeff[6] = ROUND(matr3_0 - tmp2);
    shortcoeff[7] = ROUND(matr3_0 - matr2_56);

    shortcoeff[ 8] = ROUND(matr3_1 + matr2_56);
    shortcoeff[ 9] = ROUND(matr3_1 + tmp2);
    shortcoeff[10] = ROUND(matr3_1 - tmp1);
    shortcoeff[11] = ROUND(matr3_1 - tmp7);
    shortcoeff[12] = ROUND(matr3_1 + tmp7);
    shortcoeff[13] = ROUND(matr3_1 + tmp1);
    shortcoeff[14] = ROUND(matr3_1 - tmp2);
    shortcoeff[15] = ROUND(matr3_1 - matr2_56);

    shortcoeff[16] = ROUND(matr3_2 + matr2_56);
    shortcoeff[17] = ROUND(matr3_2 + tmp2);
    shortcoeff[18] = ROUND(matr3_2 - tmp1);
    shortcoeff[19] = ROUND(matr3_2 - tmp7);
    shortcoeff[20] = ROUND(matr3_2 + tmp7);
    shortcoeff[21] = ROUND(matr3_2 + tmp1);
    shortcoeff[22] = ROUND(matr3_2 - tmp2);
    shortcoeff[23] = ROUND(matr3_2 - matr2_56);

    shortcoeff[24] = ROUND(matr3_3 + matr2_56);
    shortcoeff[25] = ROUND(matr3_3 + tmp2);
    shortcoeff[26] = ROUND(matr3_3 - tmp1);
    shortcoeff[27] = ROUND(matr3_3 - tmp7);
    shortcoeff[28] = ROUND(matr3_3 + tmp7);
    shortcoeff[29] = ROUND(matr3_3 + tmp1);
    shortcoeff[30] = ROUND(matr3_3 - tmp2);
    shortcoeff[31] = ROUND(matr3_3 - matr2_56);

    shortcoeff[32] = ROUND(matr3_4 + matr2_56);
    shortcoeff[33] = ROUND(matr3_4 + tmp2);
    shortcoeff[34] = ROUND(matr3_4 - tmp1);
    shortcoeff[35] = ROUND(matr3_4 - tmp7);
    shortcoeff[36] = ROUND(matr3_4 + tmp7);
    shortcoeff[37] = ROUND(matr3_4 + tmp1);
    shortcoeff[38] = ROUND(matr3_4 - tmp2);
    shortcoeff[39] = ROUND(matr3_4 - matr2_56);

    shortcoeff[40] = ROUND(matr3_5 + matr2_56);
    shortcoeff[41] = ROUND(matr3_5 + tmp2);
    shortcoeff[42] = ROUND(matr3_5 - tmp1);
    shortcoeff[43] = ROUND(matr3_5 - tmp7);
    shortcoeff[44] = ROUND(matr3_5 + tmp7);
    shortcoeff[45] = ROUND(matr3_5 + tmp1);
    shortcoeff[46] = ROUND(matr3_5 - tmp2);
    shortcoeff[47] = ROUND(matr3_5 - matr2_56);

    shortcoeff[48] = ROUND(matr3_6 + matr2_56);
    shortcoeff[49] = ROUND(matr3_6 + tmp2);
    shortcoeff[50] = ROUND(matr3_6 - tmp1);
    shortcoeff[51] = ROUND(matr3_6 - tmp7);
    shortcoeff[52] = ROUND(matr3_6 + tmp7);
    shortcoeff[53] = ROUND(matr3_6 + tmp1);
    shortcoeff[54] = ROUND(matr3_6 - tmp2);
    shortcoeff[55] = ROUND(matr3_6 - matr2_56);

    shortcoeff[56] = ROUND(matr3_7 + matr2_56);
    shortcoeff[57] = ROUND(matr3_7 + tmp2);
    shortcoeff[58] = ROUND(matr3_7 - tmp1);
    shortcoeff[59] = ROUND(matr3_7 - tmp7);
    shortcoeff[60] = ROUND(matr3_7 + tmp7);
    shortcoeff[61] = ROUND(matr3_7 + tmp1);
    shortcoeff[62] = ROUND(matr3_7 - tmp2);
    shortcoeff[63] = ROUND(matr3_7 - matr2_56);
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]