This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Optimize bitmap_ior_into and friends
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 9 Jun 2010 12:04:53 +0200
- Subject: Optimize bitmap_ior_into and friends
Hi,
in the profiles, bitmap_ior_into shows as second most expensive function. It
is mostly because it is collecting cache misses from dataflow that I hope to
improve a bit somewhat by adding more obstacks still.
Looking at mispredict profiles I however noticed that we have there difficult
to predict branch in internal loop while looking for changes. The return value
of bitmap_ior_into is ignored in all hot calls, but it is possible to manually
ifconvert the loop (we won't do it as it introduce extra store) to get reduce
the problem. I will later do some measurements if it makes sense to have two
variants - one tracking changes and one not, but I want to be finished with
flattening first.
I also noticed that bitmap.c code walks the array backwards. We won't reverse
it at -O3 since we unroll first. It should be better on every modern CPU to
reverse the direction.
The patch seems to cut about 5-15% off the profile of bitmap_ior_into, but still
keeps function second most hot.
Bootstrapped/regtested x86_64-linux, OK?
Honza
* bitmap.c (bitmap_and): Walk array forward.
(bitmap_and_compl_into): Likewise.
(bitmap_xor): Likewise.
(bitmap_xor_into): Likewise.
(bitmap_equal_p): Likewise.
(bitmap_intersect_p): Likewise.
(bitmap_intersect_compl_p): Likewise.
(bitmap_ior_and_into): Likewise.
(bitmap_elt_copy): Optimize test for changes; walk forwards.
(bitmap_and_compl): Likewise.
(bitmap_elt_ior): Likewise.
Index: bitmap.c
===================================================================
--- bitmap.c (revision 160447)
+++ bitmap.c (working copy)
@@ -908,7 +908,7 @@ bitmap_and (bitmap dst, const_bitmap a,
dst_elt = bitmap_elt_insert_after (dst, dst_prev, a_elt->indx);
else
dst_elt->indx = a_elt->indx;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = a_elt->bits[ix] & b_elt->bits[ix];
@@ -960,7 +960,7 @@ bitmap_and_into (bitmap a, const_bitmap
unsigned ix;
BITMAP_WORD ior = 0;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = a_elt->bits[ix] & b_elt->bits[ix];
@@ -991,13 +991,14 @@ bitmap_elt_copy (bitmap dst, bitmap_elem
if (!changed && dst_elt && dst_elt->indx == src_elt->indx)
{
unsigned ix;
+ BITMAP_WORD changed_here = 0;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
- if (src_elt->bits[ix] != dst_elt->bits[ix])
- {
- dst_elt->bits[ix] = src_elt->bits[ix];
- changed = true;
- }
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
+ {
+ changed_here |= src_elt->bits[ix] ^ dst_elt->bits[ix];
+ dst_elt->bits[ix] = src_elt->bits[ix];
+ }
+ changed = changed_here != 0;
}
else
{
@@ -1056,17 +1057,16 @@ bitmap_and_compl (bitmap dst, const_bitm
if (!changed && dst_elt && dst_elt->indx == a_elt->indx)
{
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ BITMAP_WORD changed_here = 0;
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = a_elt->bits[ix] & ~b_elt->bits[ix];
- if (dst_elt->bits[ix] != r)
- {
- changed = true;
- dst_elt->bits[ix] = r;
- }
+ changed_here |= dst_elt->bits[ix] ^ r;
+ dst_elt->bits[ix] = r;
ior |= r;
}
+ changed = changed_here != 0;
}
else
{
@@ -1082,7 +1082,7 @@ bitmap_and_compl (bitmap dst, const_bitm
new_element = false;
}
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = a_elt->bits[ix] & ~b_elt->bits[ix];
@@ -1158,16 +1158,18 @@ bitmap_and_compl_into (bitmap a, const_b
/* Matching elts, generate A &= ~B. */
unsigned ix;
BITMAP_WORD ior = 0;
+ BITMAP_WORD changed_here = 0;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD cleared = a_elt->bits[ix] & b_elt->bits[ix];
BITMAP_WORD r = a_elt->bits[ix] ^ cleared;
a_elt->bits[ix] = r;
- changed |= cleared;
+ changed_here |= cleared;
ior |= r;
}
+ changed = changed_here != 0;
next = a_elt->next;
if (!ior)
bitmap_element_free (a, a_elt);
@@ -1453,7 +1455,7 @@ bitmap_compl_and_into (bitmap a, const_b
unsigned ix;
BITMAP_WORD ior = 0;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD cleared = a_elt->bits[ix] & b_elt->bits[ix];
BITMAP_WORD r = b_elt->bits[ix] ^ cleared;
@@ -1494,15 +1496,14 @@ bitmap_elt_ior (bitmap dst, bitmap_eleme
if (!changed && dst_elt && dst_elt->indx == a_elt->indx)
{
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ BITMAP_WORD changed_here = 0;
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = a_elt->bits[ix] | b_elt->bits[ix];
- if (r != dst_elt->bits[ix])
- {
- dst_elt->bits[ix] = r;
- changed = true;
- }
+ changed_here |= r ^ dst_elt->bits[ix];
+ dst_elt->bits[ix] = r;
}
+ changed = changed_here != 0;
}
else
{
@@ -1511,7 +1512,7 @@ bitmap_elt_ior (bitmap dst, bitmap_eleme
dst_elt = bitmap_elt_insert_after (dst, dst_prev, a_elt->indx);
else
dst_elt->indx = a_elt->indx;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = a_elt->bits[ix] | b_elt->bits[ix];
dst_elt->bits[ix] = r;
@@ -1650,7 +1651,7 @@ bitmap_xor (bitmap dst, const_bitmap a,
dst_elt = bitmap_elt_insert_after (dst, dst_prev, a_elt->indx);
else
dst_elt->indx = a_elt->indx;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = a_elt->bits[ix] ^ b_elt->bits[ix];
@@ -1735,7 +1736,7 @@ bitmap_xor_into (bitmap a, const_bitmap
BITMAP_WORD ior = 0;
bitmap_element *next = a_elt->next;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = a_elt->bits[ix] ^ b_elt->bits[ix];
@@ -1772,7 +1773,7 @@ bitmap_equal_p (const_bitmap a, const_bi
{
if (a_elt->indx != b_elt->indx)
return false;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
if (a_elt->bits[ix] != b_elt->bits[ix])
return false;
}
@@ -1797,7 +1798,7 @@ bitmap_intersect_p (const_bitmap a, cons
b_elt = b_elt->next;
else
{
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
if (a_elt->bits[ix] & b_elt->bits[ix])
return true;
a_elt = a_elt->next;
@@ -1824,7 +1825,7 @@ bitmap_intersect_compl_p (const_bitmap a
b_elt = b_elt->next;
else
{
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
if (a_elt->bits[ix] & ~b_elt->bits[ix])
return true;
a_elt = a_elt->next;
@@ -1880,7 +1881,7 @@ bitmap_ior_and_compl (bitmap dst, const_
BITMAP_WORD ior = 0;
tmp_elt.indx = b_elt->indx;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
BITMAP_WORD r = b_elt->bits[ix] & ~kill_elt->bits[ix];
ior |= r;
@@ -1998,7 +1999,7 @@ bitmap_ior_and_into (bitmap a, const_bit
overall = 0;
and_elt.indx = b_elt->indx;
- for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+ for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
{
and_elt.bits[ix] = b_elt->bits[ix] & c_elt->bits[ix];
overall |= and_elt.bits[ix];