This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Optimize bitmap_ior_into and friends


Hi,
in the profiles, bitmap_ior_into shows as second most expensive function.  It
is mostly because it is collecting cache misses from dataflow that I hope to
improve a bit somewhat by adding more obstacks still.

Looking at mispredict profiles I however noticed that we have there difficult
to predict branch in internal loop while looking for changes.  The return value
of bitmap_ior_into is ignored in all hot calls, but it is possible to manually
ifconvert the loop (we won't do it as it introduce extra store) to get reduce
the problem.  I will later do some measurements if it makes sense to have two
variants - one tracking changes and one not, but I want to be finished with
flattening first.

I also noticed that bitmap.c code walks the array backwards.  We won't reverse
it at -O3 since we unroll first.  It should be better on every modern CPU to
reverse the direction.

The patch seems to cut about 5-15% off the profile of bitmap_ior_into, but still
keeps function second most hot.

Bootstrapped/regtested x86_64-linux, OK?

Honza

	* bitmap.c (bitmap_and): Walk array forward.
	(bitmap_and_compl_into): Likewise.
	(bitmap_xor): Likewise.
	(bitmap_xor_into):  Likewise.
	(bitmap_equal_p): Likewise.
	(bitmap_intersect_p): Likewise.
	(bitmap_intersect_compl_p): Likewise.
	(bitmap_ior_and_into): Likewise.
	(bitmap_elt_copy): Optimize test for changes; walk forwards.
	(bitmap_and_compl): Likewise.
	(bitmap_elt_ior): Likewise.
Index: bitmap.c
===================================================================
--- bitmap.c	(revision 160447)
+++ bitmap.c	(working copy)
@@ -908,7 +908,7 @@ bitmap_and (bitmap dst, const_bitmap a,
 	    dst_elt = bitmap_elt_insert_after (dst, dst_prev, a_elt->indx);
 	  else
 	    dst_elt->indx = a_elt->indx;
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    {
 	      BITMAP_WORD r = a_elt->bits[ix] & b_elt->bits[ix];
 
@@ -960,7 +960,7 @@ bitmap_and_into (bitmap a, const_bitmap
 	  unsigned ix;
 	  BITMAP_WORD ior = 0;
 
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    {
 	      BITMAP_WORD r = a_elt->bits[ix] & b_elt->bits[ix];
 
@@ -991,13 +991,14 @@ bitmap_elt_copy (bitmap dst, bitmap_elem
   if (!changed && dst_elt && dst_elt->indx == src_elt->indx)
     {
       unsigned ix;
+      BITMAP_WORD changed_here = 0;
 
-      for (ix = BITMAP_ELEMENT_WORDS; ix--;)
-	if (src_elt->bits[ix] != dst_elt->bits[ix])
-	  {
-	    dst_elt->bits[ix] = src_elt->bits[ix];
-	    changed = true;
-	  }
+      for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
+	{
+	  changed_here |= src_elt->bits[ix] ^ dst_elt->bits[ix];
+	  dst_elt->bits[ix] = src_elt->bits[ix];
+	}
+      changed = changed_here != 0;
     }
   else
     {
@@ -1056,17 +1057,16 @@ bitmap_and_compl (bitmap dst, const_bitm
 
 	  if (!changed && dst_elt && dst_elt->indx == a_elt->indx)
 	    {
-	      for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	      BITMAP_WORD changed_here = 0;
+	      for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 		{
 		  BITMAP_WORD r = a_elt->bits[ix] & ~b_elt->bits[ix];
 
-		  if (dst_elt->bits[ix] != r)
-		    {
-		      changed = true;
-		      dst_elt->bits[ix] = r;
-		    }
+		  changed_here |= dst_elt->bits[ix] ^ r;
+		  dst_elt->bits[ix] = r;
 		  ior |= r;
 		}
+	      changed = changed_here != 0;
 	    }
 	  else
 	    {
@@ -1082,7 +1082,7 @@ bitmap_and_compl (bitmap dst, const_bitm
 		  new_element = false;
 		}
 
-	      for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	      for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 		{
 		  BITMAP_WORD r = a_elt->bits[ix] & ~b_elt->bits[ix];
 
@@ -1158,16 +1158,18 @@ bitmap_and_compl_into (bitmap a, const_b
 	  /* Matching elts, generate A &= ~B.  */
 	  unsigned ix;
 	  BITMAP_WORD ior = 0;
+	  BITMAP_WORD changed_here = 0;
 
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    {
 	      BITMAP_WORD cleared = a_elt->bits[ix] & b_elt->bits[ix];
 	      BITMAP_WORD r = a_elt->bits[ix] ^ cleared;
 
 	      a_elt->bits[ix] = r;
-	      changed |= cleared;
+	      changed_here |= cleared;
 	      ior |= r;
 	    }
+	  changed = changed_here != 0;
 	  next = a_elt->next;
 	  if (!ior)
 	    bitmap_element_free (a, a_elt);
@@ -1453,7 +1455,7 @@ bitmap_compl_and_into (bitmap a, const_b
 	  unsigned ix;
 	  BITMAP_WORD ior = 0;
 
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    {
 	      BITMAP_WORD cleared = a_elt->bits[ix] & b_elt->bits[ix];
 	      BITMAP_WORD r = b_elt->bits[ix] ^ cleared;
@@ -1494,15 +1496,14 @@ bitmap_elt_ior (bitmap dst, bitmap_eleme
 
       if (!changed && dst_elt && dst_elt->indx == a_elt->indx)
 	{
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+          BITMAP_WORD changed_here = 0;
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    {
 	      BITMAP_WORD r = a_elt->bits[ix] | b_elt->bits[ix];
-	      if (r != dst_elt->bits[ix])
-		{
-		  dst_elt->bits[ix] = r;
-		  changed = true;
-		}
+	      changed_here |= r ^ dst_elt->bits[ix];
+	      dst_elt->bits[ix] = r;
 	    }
+	  changed = changed_here != 0;
 	}
       else
 	{
@@ -1511,7 +1512,7 @@ bitmap_elt_ior (bitmap dst, bitmap_eleme
 	    dst_elt = bitmap_elt_insert_after (dst, dst_prev, a_elt->indx);
 	  else
 	    dst_elt->indx = a_elt->indx;
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    {
 	      BITMAP_WORD r = a_elt->bits[ix] | b_elt->bits[ix];
 	      dst_elt->bits[ix] = r;
@@ -1650,7 +1651,7 @@ bitmap_xor (bitmap dst, const_bitmap a,
 	    dst_elt = bitmap_elt_insert_after (dst, dst_prev, a_elt->indx);
 	  else
 	    dst_elt->indx = a_elt->indx;
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    {
 	      BITMAP_WORD r = a_elt->bits[ix] ^ b_elt->bits[ix];
 
@@ -1735,7 +1736,7 @@ bitmap_xor_into (bitmap a, const_bitmap
 	  BITMAP_WORD ior = 0;
 	  bitmap_element *next = a_elt->next;
 
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    {
 	      BITMAP_WORD r = a_elt->bits[ix] ^ b_elt->bits[ix];
 
@@ -1772,7 +1773,7 @@ bitmap_equal_p (const_bitmap a, const_bi
     {
       if (a_elt->indx != b_elt->indx)
 	return false;
-      for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+      for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	if (a_elt->bits[ix] != b_elt->bits[ix])
 	  return false;
     }
@@ -1797,7 +1798,7 @@ bitmap_intersect_p (const_bitmap a, cons
 	b_elt = b_elt->next;
       else
 	{
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    if (a_elt->bits[ix] & b_elt->bits[ix])
 	      return true;
 	  a_elt = a_elt->next;
@@ -1824,7 +1825,7 @@ bitmap_intersect_compl_p (const_bitmap a
 	b_elt = b_elt->next;
       else
 	{
-	  for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	    if (a_elt->bits[ix] & ~b_elt->bits[ix])
 	      return true;
 	  a_elt = a_elt->next;
@@ -1880,7 +1881,7 @@ bitmap_ior_and_compl (bitmap dst, const_
 
 	  BITMAP_WORD ior = 0;
 	  tmp_elt.indx = b_elt->indx;
-          for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+	  for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
             {
               BITMAP_WORD r = b_elt->bits[ix] & ~kill_elt->bits[ix];
               ior |= r;
@@ -1998,7 +1999,7 @@ bitmap_ior_and_into (bitmap a, const_bit
 
       overall = 0;
       and_elt.indx = b_elt->indx;
-      for (ix = BITMAP_ELEMENT_WORDS; ix--;)
+      for (ix = 0; ix < BITMAP_ELEMENT_WORDS; ix++)
 	{
 	  and_elt.bits[ix] = b_elt->bits[ix] & c_elt->bits[ix];
 	  overall |= and_elt.bits[ix];


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]