This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: big slowdown in egcs-1.1.2->gcc-2.95 on alpha
- To: lucier at math dot purdue dot edu (Brad Lucier)
- Subject: Re: big slowdown in egcs-1.1.2->gcc-2.95 on alpha
- From: Joern Rennecke <amylaar at cygnus dot co dot uk>
- Date: Thu, 4 Nov 1999 06:29:25 +0000 (GMT)
- Cc: rth at cygnus dot com, lucier at math dot purdue dot edu, gcc at gcc dot gnu dot org, gcc-bugs at gcc dot gnu dot org, staff at math dot purdue dot edu, hosking at cs dot purdue dot edu, wilker at math dot purdue dot edu, bernds at cygnus dot com, gcc-patches at gcc dot gnu dot org
> Each sample counts as 0.000976562 seconds.
> % cumulative self self total
> time seconds seconds calls ms/call ms/call name
> 44.25 56.94 56.94 16 3558.59 3559.59 prune_preferences
> 8.93 68.43 11.49 202789 0.06 0.06 record_one_conflict
> 2.05 71.07 2.64 40979048 0.00 0.00 bitmap_bit_p
> 1.88 73.49 2.42 8 303.10 16018.62 yyparse
> 1.67 75.65 2.15 27806760 0.00 0.00 count_pseudo
> 1.58 77.68 2.03 15315 0.13 0.47 order_regs_for_reload
I think we don't have to worry about the quadratic algorithm for now if
we can gain a linear factor of 32. prune_preferences is so slow because
it tests every bit in the array separately. We can process them one
word at a time instead, like record_one_conflict / record_conflicts do.
Moreover, CONFLICTP and SET_CONFLICT use a signed division instead of a
shift. A simple cast to unsigned should remove some unnecessary overhead.
Could you benchmark this patch?
Thu Nov 4 06:15:40 1999 J"orn Rennecke <amylaar@cygnus.co.uk>
* global.c (CONFLICTP, SET_CONFLICT): Avoid signed division.
(mirror_conflicts): New function.
(global_alloc): Call it.
(expand_preferences): Remove redundant CONFLICTP test.
(find_reg, dump_conflicts): Likewise.
(prune_preferences): Process conflicts one word at a time.
Index: global.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/global.c,v
retrieving revision 1.39
diff -p -r1.39 global.c
*** global.c 1999/11/01 23:19:44 1.39
--- global.c 1999/11/04 06:12:01
*************** static int allocno_row_words;
*** 127,138 ****
/* Two macros to test or store 1 in an element of `conflicts'. */
#define CONFLICTP(I, J) \
! (conflicts[(I) * allocno_row_words + (J) / INT_BITS] \
! & ((INT_TYPE) 1 << ((J) % INT_BITS)))
#define SET_CONFLICT(I, J) \
! (conflicts[(I) * allocno_row_words + (J) / INT_BITS] \
! |= ((INT_TYPE) 1 << ((J) % INT_BITS)))
/* Set of hard regs currently live (during scan of all insns). */
--- 127,138 ----
/* Two macros to test or store 1 in an element of `conflicts'. */
#define CONFLICTP(I, J) \
! (conflicts[(I) * allocno_row_words + (unsigned)(J) / INT_BITS] \
! & ((INT_TYPE) 1 << ((unsigned)(J) % INT_BITS)))
#define SET_CONFLICT(I, J) \
! (conflicts[(I) * allocno_row_words + (unsigned)(J) / INT_BITS] \
! |= ((INT_TYPE) 1 << ((unsigned)(J) % INT_BITS)))
/* Set of hard regs currently live (during scan of all insns). */
*************** static HARD_REG_SET eliminable_regset;
*** 259,264 ****
--- 259,265 ----
static int allocno_compare PROTO((const PTR, const PTR));
static void global_conflicts PROTO((void));
+ static void mirror_conflicts PROTO((void));
static void expand_preferences PROTO((void));
static void prune_preferences PROTO((void));
static void find_reg PROTO((int, HARD_REG_SET, int, int, int));
*************** global_alloc (file)
*** 486,491 ****
--- 487,494 ----
global_conflicts ();
+ mirror_conflicts ();
+
/* Eliminate conflicts between pseudos and eliminable registers. If
the register is not eliminated, the pseudo won't really be able to
live in the eliminable register, so the conflict doesn't matter.
*************** expand_preferences ()
*** 818,826 ****
&& GET_CODE (XEXP (link, 0)) == REG
&& reg_allocno[REGNO (XEXP (link, 0))] >= 0
&& ! CONFLICTP (reg_allocno[REGNO (SET_DEST (set))],
! reg_allocno[REGNO (XEXP (link, 0))])
! && ! CONFLICTP (reg_allocno[REGNO (XEXP (link, 0))],
! reg_allocno[REGNO (SET_DEST (set))]))
{
int a1 = reg_allocno[REGNO (SET_DEST (set))];
int a2 = reg_allocno[REGNO (XEXP (link, 0))];
--- 821,827 ----
&& GET_CODE (XEXP (link, 0)) == REG
&& reg_allocno[REGNO (XEXP (link, 0))] >= 0
&& ! CONFLICTP (reg_allocno[REGNO (SET_DEST (set))],
! reg_allocno[REGNO (XEXP (link, 0))]))
{
int a1 = reg_allocno[REGNO (SET_DEST (set))];
int a2 = reg_allocno[REGNO (XEXP (link, 0))];
*************** prune_preferences ()
*** 856,861 ****
--- 857,863 ----
{
int i, j;
int allocno;
+ int *allocno_to_order = (int *) xmalloc (max_allocno * sizeof (int));
/* Scan least most important to most important.
For each allocno, remove from preferences registers that cannot be used,
*************** prune_preferences ()
*** 864,872 ****
for (i = max_allocno - 1; i >= 0; i--)
{
! HARD_REG_SET temp, temp2;
allocno = allocno_order[i];
COPY_HARD_REG_SET (temp, hard_reg_conflicts[allocno]);
if (allocno_calls_crossed[allocno] == 0)
--- 866,875 ----
for (i = max_allocno - 1; i >= 0; i--)
{
! HARD_REG_SET temp;
allocno = allocno_order[i];
+ allocno_to_order[allocno] = i;
COPY_HARD_REG_SET (temp, hard_reg_conflicts[allocno]);
if (allocno_calls_crossed[allocno] == 0)
*************** prune_preferences ()
*** 881,909 ****
AND_COMPL_HARD_REG_SET (hard_reg_preferences[allocno], temp);
AND_COMPL_HARD_REG_SET (hard_reg_copy_preferences[allocno], temp);
AND_COMPL_HARD_REG_SET (hard_reg_full_preferences[allocno], temp);
/* Merge in the preferences of lower-priority registers (they have
already been pruned). If we also prefer some of those registers,
don't exclude them unless we are of a smaller size (in which case
we want to give the lower-priority allocno the first chance for
these registers). */
CLEAR_HARD_REG_SET (temp);
CLEAR_HARD_REG_SET (temp2);
! for (j = i + 1; j < max_allocno; j++)
! if (CONFLICTP (allocno, allocno_order[j])
! || CONFLICTP (allocno_order[j], allocno))
! {
! if (allocno_size[allocno_order[j]] <= allocno_size[allocno])
! IOR_HARD_REG_SET (temp,
! hard_reg_full_preferences[allocno_order[j]]);
! else
! IOR_HARD_REG_SET (temp2,
! hard_reg_full_preferences[allocno_order[j]]);
! }
AND_COMPL_HARD_REG_SET (temp, hard_reg_full_preferences[allocno]);
IOR_HARD_REG_SET (temp, temp2);
COPY_HARD_REG_SET (regs_someone_prefers[allocno], temp);
}
}
/* Assign a hard register to ALLOCNO; look for one that is the beginning
--- 884,932 ----
AND_COMPL_HARD_REG_SET (hard_reg_preferences[allocno], temp);
AND_COMPL_HARD_REG_SET (hard_reg_copy_preferences[allocno], temp);
AND_COMPL_HARD_REG_SET (hard_reg_full_preferences[allocno], temp);
+ }
+ for (i = max_allocno - 1; i >= 0; i--)
+ {
/* Merge in the preferences of lower-priority registers (they have
already been pruned). If we also prefer some of those registers,
don't exclude them unless we are of a smaller size (in which case
we want to give the lower-priority allocno the first chance for
these registers). */
+ HARD_REG_SET temp, temp2;
+ INT_TYPE *p;
+ int allocno2, allocno3;
+
+ allocno = allocno_order[i];
+ p = conflicts + allocno * allocno_row_words;
+
CLEAR_HARD_REG_SET (temp);
CLEAR_HARD_REG_SET (temp2);
!
! for (j = allocno_row_words - 1, allocno2 = 0; j >= 0;
! j--, allocno2 += INT_BITS)
! {
! unsigned INT_TYPE word = (unsigned INT_TYPE) *p++;
!
! for (allocno3 = allocno2; word; word >>= 1, allocno3++)
! {
! if (word & 1 && allocno_to_order[allocno3] > i)
! {
! if (allocno_size[allocno3] <= allocno_size[allocno])
! IOR_HARD_REG_SET (temp,
! hard_reg_full_preferences[allocno3]);
! else
! IOR_HARD_REG_SET (temp2,
! hard_reg_full_preferences[allocno3]);
! }
! }
! }
!
AND_COMPL_HARD_REG_SET (temp, hard_reg_full_preferences[allocno]);
IOR_HARD_REG_SET (temp, temp2);
COPY_HARD_REG_SET (regs_someone_prefers[allocno], temp);
}
+ free (allocno_to_order);
}
/* Assign a hard register to ALLOCNO; look for one that is the beginning
*************** find_reg (allocno, losers, alt_regs_p, a
*** 1219,1225 ****
mark it as conflicting with the hard regs this one occupies. */
lim = allocno;
for (j = 0; j < max_allocno; j++)
! if (CONFLICTP (lim, j) || CONFLICTP (j, lim))
{
IOR_HARD_REG_SET (hard_reg_conflicts[j], this_reg);
}
--- 1242,1248 ----
mark it as conflicting with the hard regs this one occupies. */
lim = allocno;
for (j = 0; j < max_allocno; j++)
! if (CONFLICTP (j, lim))
{
IOR_HARD_REG_SET (hard_reg_conflicts[j], this_reg);
}
*************** record_conflicts (allocno_vec, len)
*** 1327,1332 ****
--- 1350,1387 ----
conflicts[ialloc_prod + j] |= allocnos_live[j];
}
}
+
+ /* If CONFLICTP (i, j) is true, make sure CONFLICTP (j, i) is also true. */
+ static void
+ mirror_conflicts ()
+ {
+ register int i, j;
+ int rw = allocno_row_words;
+ int rwb = rw * INT_BITS;
+ INT_TYPE *p = conflicts;
+ INT_TYPE *q0 = conflicts, *q1, *q2;
+ unsigned INT_TYPE mask;
+
+ for (i = max_allocno - 1, mask = 1; i >= 0; i--, mask <<= 1)
+ {
+ if (! mask)
+ {
+ mask = 1;
+ q0++;
+ }
+ for (j = allocno_row_words - 1, q1 = q0; j >= 0; j--, q1 += rwb)
+ {
+ unsigned INT_TYPE word;
+
+ for (word = (unsigned INT_TYPE) *p++, q2 = q1; word;
+ word >>= 1, q2 += rw)
+ {
+ if (word & 1)
+ *q2 |= mask;
+ }
+ }
+ }
+ }
/* Handle the case where REG is set by the insn being scanned,
during the forward scan to accumulate conflicts.
*************** dump_conflicts (file)
*** 1805,1811 ****
register int j;
fprintf (file, ";; %d conflicts:", allocno_reg[i]);
for (j = 0; j < max_allocno; j++)
! if (CONFLICTP (i, j) || CONFLICTP (j, i))
fprintf (file, " %d", allocno_reg[j]);
for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
if (TEST_HARD_REG_BIT (hard_reg_conflicts[i], j))
--- 1860,1866 ----
register int j;
fprintf (file, ";; %d conflicts:", allocno_reg[i]);
for (j = 0; j < max_allocno; j++)
! if (CONFLICTP (j, i))
fprintf (file, " %d", allocno_reg[j]);
for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
if (TEST_HARD_REG_BIT (hard_reg_conflicts[i], j))