This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Use bit scanning and counting builtins for std::bitset


Hi,

this patch utilizes gcc's builtin functions for bit counting and
scanning.  With it, I can achieve a speedup of 32% for a toy program
of mine (by using the Alpha "ctpop" instruction).

I didn't rebuild Makefile.in because I don't have a proper version,
and I didn't want the patch to get too big...

2003-02-02  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

	* include/std/std_bitset.h (_Base_bitset::_M_do_count): Use
	__builtin_popcountl.
	(_Base_bitset<1>::_M_do_count): Likewise.
	(_Base_bitset::_M_do_find_first): Use __builtin_ctzl.
	(_Base_bitset::_M_do_find_next): Likewise.
	(_Base_bitset<1>::_M_do_find_first): Likewise
	(_Base_bitset<1>::_M_do_find_next): Likewise.
	* src/bitset.cc: Delete.
	* src/Makefile.am (sources): Remove bitset.cc.

-- 
	Falk
Index: include/std/std_bitset.h
===================================================================
RCS file: /cvs/gcc/gcc/libstdc++-v3/include/std/std_bitset.h,v
retrieving revision 1.11
diff -u -r1.11 std_bitset.h
--- include/std/std_bitset.h	30 Dec 2002 03:58:46 -0000	1.11
+++ include/std/std_bitset.h	2 Feb 2003 02:30:48 -0000
@@ -65,9 +65,6 @@
 
 namespace std
 {
-  extern unsigned char 	_S_bit_count[256];
-  extern unsigned char 	_S_first_one[256];
-
   /**
    *  @if maint
    *  Base class, general case.  It is a class inveriant that _Nw will be
@@ -191,14 +188,8 @@
       _M_do_count() const
       {
 	size_t __result = 0;
-	const unsigned char* __byte_ptr = (const unsigned char*)_M_w;
-	const unsigned char* __end_ptr = (const unsigned char*)(_M_w + _Nw);
-
-	while ( __byte_ptr < __end_ptr )
-	  {
-	    __result += _S_bit_count[*__byte_ptr];
-	    __byte_ptr++;
-	  }
+	for (size_t __i = 0; __i < _Nw; __i++)
+	  __result += __builtin_popcountl(_M_w[__i]);
 	return __result;
       }
 
@@ -284,19 +275,8 @@
 	{
 	  _WordT __thisword = _M_w[__i];
 	  if ( __thisword != static_cast<_WordT>(0) )
-	    {
-	      // find byte within word
-	      for (size_t __j = 0; __j < sizeof(_WordT); __j++ )
-		{
-		  unsigned char __this_byte
-		    = static_cast<unsigned char>(__thisword & (~(unsigned char)0));
-		  if (__this_byte)
-		    return __i*_GLIBCPP_BITSET_BITS_PER_WORD + __j*CHAR_BIT +
-		      _S_first_one[__this_byte];
-
-		  __thisword >>= CHAR_BIT;
-		}
-	    }
+	    return __i * _GLIBCPP_BITSET_BITS_PER_WORD
+	      + __builtin_ctzl(__thisword);
 	}
       // not found, so return an indication of failure.
       return __not_found;
@@ -318,24 +298,11 @@
       _WordT __thisword = _M_w[__i];
 
       // mask off bits below bound
-      __thisword &= (~static_cast<_WordT>(0)) << _S_whichbit(__prev);
+      __thisword >>= __prev + 1;
 
       if ( __thisword != static_cast<_WordT>(0) )
-	{
-	  // find byte within word
-	  // get first byte into place
-	  __thisword >>= _S_whichbyte(__prev) * CHAR_BIT;
-	  for (size_t __j = _S_whichbyte(__prev); __j < sizeof(_WordT); __j++)
-	    {
-	      unsigned char __this_byte
-		= static_cast<unsigned char>(__thisword & (~(unsigned char)0));
-	      if ( __this_byte )
-		return __i*_GLIBCPP_BITSET_BITS_PER_WORD + __j*CHAR_BIT +
-		  _S_first_one[__this_byte];
-
-	      __thisword >>= CHAR_BIT;
-	    }
-	}
+	return __i * _GLIBCPP_BITSET_BITS_PER_WORD
+	  + __builtin_ctzl(__thisword);
 
       // check subsequent words
       __i++;
@@ -343,19 +310,8 @@
 	{
 	  __thisword = _M_w[__i];
 	  if ( __thisword != static_cast<_WordT>(0) )
-	    {
-	      // find byte within word
-	      for (size_t __j = 0; __j < sizeof(_WordT); __j++ )
-		{
-		  unsigned char __this_byte
-		    = static_cast<unsigned char>(__thisword & (~(unsigned char)0));
-		  if ( __this_byte )
-		    return __i*_GLIBCPP_BITSET_BITS_PER_WORD + __j*CHAR_BIT +
-		      _S_first_one[__this_byte];
-
-		  __thisword >>= CHAR_BIT;
-		}
-	    }
+	    return __i * _GLIBCPP_BITSET_BITS_PER_WORD
+	      + __builtin_ctzl(__thisword);
 	}
       // not found, so return an indication of failure.
       return __not_found;
@@ -438,29 +394,34 @@
       _M_is_any() const { return _M_w != 0; }
 
       size_t
-      _M_do_count() const
-      {
-	size_t __result = 0;
-	const unsigned char* __byte_ptr = (const unsigned char*)&_M_w;
-	const unsigned char* __end_ptr
-	  = ((const unsigned char*)&_M_w)+sizeof(_M_w);
-	while ( __byte_ptr < __end_ptr )
-	  {
-	    __result += _S_bit_count[*__byte_ptr];
-	    __byte_ptr++;
-	  }
-	return __result;
-      }
+      _M_do_count() const { return __builtin_popcountl(_M_w); }
 
       unsigned long
       _M_do_to_ulong() const { return _M_w; }
 
       size_t
-      _M_do_find_first(size_t __not_found) const;
+      _M_do_find_first(size_t __not_found) const
+      {
+        if (_M_w != 0)
+          return __builtin_ctzl(_M_w);
+        else
+          return __not_found;
+      }
 
       // find the next "on" bit that follows "prev"
       size_t
-      _M_do_find_next(size_t __prev, size_t __not_found) const;
+      _M_do_find_next(size_t __prev, size_t __not_found) const
+      {
+	++__prev;
+	if (__prev >= _GLIBCPP_BITSET_BITS_PER_WORD)
+	  return __not_found;
+
+	_WordT __x = _M_w >> __prev;
+	if (__x != 0)
+	  return __builtin_ctzl(__x) + __prev;
+	else
+	  return __not_found;
+      }	
     };
 
 
Index: src/Makefile.am
===================================================================
RCS file: /cvs/gcc/gcc/libstdc++-v3/src/Makefile.am,v
retrieving revision 1.118
diff -u -r1.118 Makefile.am
--- src/Makefile.am	28 Jan 2003 01:17:36 -0000	1.118
+++ src/Makefile.am	2 Feb 2003 02:31:08 -0000
@@ -117,7 +117,6 @@
 
 # Sources present in the src directory.
 sources = \
-	bitset.cc \
 	codecvt.cc \
 	complex_io.cc \
 	concept-inst.cc \
Index: src/bitset.cc
===================================================================
RCS file: /cvs/gcc/gcc/libstdc++-v3/src/bitset.cc,v
retrieving revision 1.10
diff -u -r1.10 bitset.cc
--- src/bitset.cc	11 Sep 2002 03:36:45 -0000	1.10
+++ src/bitset.cc	2 Feb 2003 02:31:08 -0000
@@ -1,219 +0,0 @@
-// Bitset definitions -*- C++ -*-
-
-// Copyright (C) 2001, 2002 Free Software Foundation
-//
-// This file is part of the GNU ISO C++ Library.  This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 2, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-
-// You should have received a copy of the GNU General Public License along
-// with this library; see the file COPYING.  If not, write to the Free
-// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-// USA.
-
-// As a special exception, you may use this file as part of a free software
-// library without restriction.  Specifically, if other files instantiate
-// templates or use macros or inline functions from this file, or you compile
-// this file and link it with other files to produce an executable, this
-// file does not by itself cause the resulting executable to be covered by
-// the GNU General Public License.  This exception does not however
-// invalidate any other reasons why the executable file might be covered by
-// the GNU General Public License.
-
-/*
- * Copyright (c) 1998
- * Silicon Graphics Computer Systems, Inc.
- *
- * Permission to use, copy, modify, distribute and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appear in all copies and
- * that both that copyright notice and this permission notice appear
- * in supporting documentation.  Silicon Graphics makes no
- * representations about the suitability of this software for any
- * purpose.  It is provided "as is" without express or implied warranty.
- */
-
-#include <bitset>
-
-//
-// Definitions of non-inline functions from the single-word version of
-//  _Base_bitset.
-//
-
-std::size_t
-std::_Base_bitset<1>::_M_do_find_first(std::size_t __not_found) const
-{
-  _WordT __thisword = _M_w;
-
-  if ( __thisword != static_cast<_WordT>(0) ) {
-    // find byte within word
-    for (std::size_t __j = 0; __j < sizeof(_WordT); __j++ ) {
-      unsigned char __this_byte
-        = static_cast<unsigned char>(__thisword & (~(unsigned char)0));
-      if ( __this_byte )
-        return __j * CHAR_BIT + _S_first_one[__this_byte];
-
-      __thisword >>= CHAR_BIT;
-    }
-  }
-  // not found, so return a value that indicates failure.
-  return __not_found;
-}
-
-std::size_t
-std::_Base_bitset<1>::_M_do_find_next(std::size_t __prev,
-				      std::size_t __not_found) const
-{
-  // make bound inclusive
-  ++__prev;
-
-  // check out of bounds
-  if ( __prev >= _GLIBCPP_BITSET_BITS_PER_WORD )
-    return __not_found;
-
-    // search first (and only) word
-  _WordT __thisword = _M_w;
-
-  // mask off bits below bound
-  __thisword &= (~static_cast<_WordT>(0)) << _S_whichbit(__prev);
-
-  if ( __thisword != static_cast<_WordT>(0) ) {
-    // find byte within word
-    // get first byte into place
-    __thisword >>= _S_whichbyte(__prev) * CHAR_BIT;
-    for ( std::size_t __j = _S_whichbyte(__prev); __j < sizeof(_WordT); __j++ ) {
-      unsigned char __this_byte
-        = static_cast<unsigned char>(__thisword & (~(unsigned char)0));
-      if ( __this_byte )
-        return __j * CHAR_BIT + _S_first_one[__this_byte];
-
-      __thisword >>= CHAR_BIT;
-    }
-  }
-
-  // not found, so return a value that indicates failure.
-  return __not_found;
-} // end _M_do_find_next
-
-
-// Lookup tables for find and count operations.  In _S_bit_count, the value
-// *at* an index is the number of bits set *in* that index.
-unsigned char std::_S_bit_count[256] =
-{
-  0, /*   0 */ 1, /*   1 */ 1, /*   2 */ 2, /*   3 */ 1, /*   4 */
-  2, /*   5 */ 2, /*   6 */ 3, /*   7 */ 1, /*   8 */ 2, /*   9 */
-  2, /*  10 */ 3, /*  11 */ 2, /*  12 */ 3, /*  13 */ 3, /*  14 */
-  4, /*  15 */ 1, /*  16 */ 2, /*  17 */ 2, /*  18 */ 3, /*  19 */
-  2, /*  20 */ 3, /*  21 */ 3, /*  22 */ 4, /*  23 */ 2, /*  24 */
-  3, /*  25 */ 3, /*  26 */ 4, /*  27 */ 3, /*  28 */ 4, /*  29 */
-  4, /*  30 */ 5, /*  31 */ 1, /*  32 */ 2, /*  33 */ 2, /*  34 */
-  3, /*  35 */ 2, /*  36 */ 3, /*  37 */ 3, /*  38 */ 4, /*  39 */
-  2, /*  40 */ 3, /*  41 */ 3, /*  42 */ 4, /*  43 */ 3, /*  44 */
-  4, /*  45 */ 4, /*  46 */ 5, /*  47 */ 2, /*  48 */ 3, /*  49 */
-  3, /*  50 */ 4, /*  51 */ 3, /*  52 */ 4, /*  53 */ 4, /*  54 */
-  5, /*  55 */ 3, /*  56 */ 4, /*  57 */ 4, /*  58 */ 5, /*  59 */
-  4, /*  60 */ 5, /*  61 */ 5, /*  62 */ 6, /*  63 */ 1, /*  64 */
-  2, /*  65 */ 2, /*  66 */ 3, /*  67 */ 2, /*  68 */ 3, /*  69 */
-  3, /*  70 */ 4, /*  71 */ 2, /*  72 */ 3, /*  73 */ 3, /*  74 */
-  4, /*  75 */ 3, /*  76 */ 4, /*  77 */ 4, /*  78 */ 5, /*  79 */
-  2, /*  80 */ 3, /*  81 */ 3, /*  82 */ 4, /*  83 */ 3, /*  84 */
-  4, /*  85 */ 4, /*  86 */ 5, /*  87 */ 3, /*  88 */ 4, /*  89 */
-  4, /*  90 */ 5, /*  91 */ 4, /*  92 */ 5, /*  93 */ 5, /*  94 */
-  6, /*  95 */ 2, /*  96 */ 3, /*  97 */ 3, /*  98 */ 4, /*  99 */
-  3, /* 100 */ 4, /* 101 */ 4, /* 102 */ 5, /* 103 */ 3, /* 104 */
-  4, /* 105 */ 4, /* 106 */ 5, /* 107 */ 4, /* 108 */ 5, /* 109 */
-  5, /* 110 */ 6, /* 111 */ 3, /* 112 */ 4, /* 113 */ 4, /* 114 */
-  5, /* 115 */ 4, /* 116 */ 5, /* 117 */ 5, /* 118 */ 6, /* 119 */
-  4, /* 120 */ 5, /* 121 */ 5, /* 122 */ 6, /* 123 */ 5, /* 124 */
-  6, /* 125 */ 6, /* 126 */ 7, /* 127 */ 1, /* 128 */ 2, /* 129 */
-  2, /* 130 */ 3, /* 131 */ 2, /* 132 */ 3, /* 133 */ 3, /* 134 */
-  4, /* 135 */ 2, /* 136 */ 3, /* 137 */ 3, /* 138 */ 4, /* 139 */
-  3, /* 140 */ 4, /* 141 */ 4, /* 142 */ 5, /* 143 */ 2, /* 144 */
-  3, /* 145 */ 3, /* 146 */ 4, /* 147 */ 3, /* 148 */ 4, /* 149 */
-  4, /* 150 */ 5, /* 151 */ 3, /* 152 */ 4, /* 153 */ 4, /* 154 */
-  5, /* 155 */ 4, /* 156 */ 5, /* 157 */ 5, /* 158 */ 6, /* 159 */
-  2, /* 160 */ 3, /* 161 */ 3, /* 162 */ 4, /* 163 */ 3, /* 164 */
-  4, /* 165 */ 4, /* 166 */ 5, /* 167 */ 3, /* 168 */ 4, /* 169 */
-  4, /* 170 */ 5, /* 171 */ 4, /* 172 */ 5, /* 173 */ 5, /* 174 */
-  6, /* 175 */ 3, /* 176 */ 4, /* 177 */ 4, /* 178 */ 5, /* 179 */
-  4, /* 180 */ 5, /* 181 */ 5, /* 182 */ 6, /* 183 */ 4, /* 184 */
-  5, /* 185 */ 5, /* 186 */ 6, /* 187 */ 5, /* 188 */ 6, /* 189 */
-  6, /* 190 */ 7, /* 191 */ 2, /* 192 */ 3, /* 193 */ 3, /* 194 */
-  4, /* 195 */ 3, /* 196 */ 4, /* 197 */ 4, /* 198 */ 5, /* 199 */
-  3, /* 200 */ 4, /* 201 */ 4, /* 202 */ 5, /* 203 */ 4, /* 204 */
-  5, /* 205 */ 5, /* 206 */ 6, /* 207 */ 3, /* 208 */ 4, /* 209 */
-  4, /* 210 */ 5, /* 211 */ 4, /* 212 */ 5, /* 213 */ 5, /* 214 */
-  6, /* 215 */ 4, /* 216 */ 5, /* 217 */ 5, /* 218 */ 6, /* 219 */
-  5, /* 220 */ 6, /* 221 */ 6, /* 222 */ 7, /* 223 */ 3, /* 224 */
-  4, /* 225 */ 4, /* 226 */ 5, /* 227 */ 4, /* 228 */ 5, /* 229 */
-  5, /* 230 */ 6, /* 231 */ 4, /* 232 */ 5, /* 233 */ 5, /* 234 */
-  6, /* 235 */ 5, /* 236 */ 6, /* 237 */ 6, /* 238 */ 7, /* 239 */
-  4, /* 240 */ 5, /* 241 */ 5, /* 242 */ 6, /* 243 */ 5, /* 244 */
-  6, /* 245 */ 6, /* 246 */ 7, /* 247 */ 5, /* 248 */ 6, /* 249 */
-  6, /* 250 */ 7, /* 251 */ 6, /* 252 */ 7, /* 253 */ 7, /* 254 */
-  8  /* 255 */
-}; // end _S_bit_count
-
-unsigned char std::_S_first_one[256] =
-{
-  0, /*   0 */ 0, /*   1 */ 1, /*   2 */ 0, /*   3 */ 2, /*   4 */
-  0, /*   5 */ 1, /*   6 */ 0, /*   7 */ 3, /*   8 */ 0, /*   9 */
-  1, /*  10 */ 0, /*  11 */ 2, /*  12 */ 0, /*  13 */ 1, /*  14 */
-  0, /*  15 */ 4, /*  16 */ 0, /*  17 */ 1, /*  18 */ 0, /*  19 */
-  2, /*  20 */ 0, /*  21 */ 1, /*  22 */ 0, /*  23 */ 3, /*  24 */
-  0, /*  25 */ 1, /*  26 */ 0, /*  27 */ 2, /*  28 */ 0, /*  29 */
-  1, /*  30 */ 0, /*  31 */ 5, /*  32 */ 0, /*  33 */ 1, /*  34 */
-  0, /*  35 */ 2, /*  36 */ 0, /*  37 */ 1, /*  38 */ 0, /*  39 */
-  3, /*  40 */ 0, /*  41 */ 1, /*  42 */ 0, /*  43 */ 2, /*  44 */
-  0, /*  45 */ 1, /*  46 */ 0, /*  47 */ 4, /*  48 */ 0, /*  49 */
-  1, /*  50 */ 0, /*  51 */ 2, /*  52 */ 0, /*  53 */ 1, /*  54 */
-  0, /*  55 */ 3, /*  56 */ 0, /*  57 */ 1, /*  58 */ 0, /*  59 */
-  2, /*  60 */ 0, /*  61 */ 1, /*  62 */ 0, /*  63 */ 6, /*  64 */
-  0, /*  65 */ 1, /*  66 */ 0, /*  67 */ 2, /*  68 */ 0, /*  69 */
-  1, /*  70 */ 0, /*  71 */ 3, /*  72 */ 0, /*  73 */ 1, /*  74 */
-  0, /*  75 */ 2, /*  76 */ 0, /*  77 */ 1, /*  78 */ 0, /*  79 */
-  4, /*  80 */ 0, /*  81 */ 1, /*  82 */ 0, /*  83 */ 2, /*  84 */
-  0, /*  85 */ 1, /*  86 */ 0, /*  87 */ 3, /*  88 */ 0, /*  89 */
-  1, /*  90 */ 0, /*  91 */ 2, /*  92 */ 0, /*  93 */ 1, /*  94 */
-  0, /*  95 */ 5, /*  96 */ 0, /*  97 */ 1, /*  98 */ 0, /*  99 */
-  2, /* 100 */ 0, /* 101 */ 1, /* 102 */ 0, /* 103 */ 3, /* 104 */
-  0, /* 105 */ 1, /* 106 */ 0, /* 107 */ 2, /* 108 */ 0, /* 109 */
-  1, /* 110 */ 0, /* 111 */ 4, /* 112 */ 0, /* 113 */ 1, /* 114 */
-  0, /* 115 */ 2, /* 116 */ 0, /* 117 */ 1, /* 118 */ 0, /* 119 */
-  3, /* 120 */ 0, /* 121 */ 1, /* 122 */ 0, /* 123 */ 2, /* 124 */
-  0, /* 125 */ 1, /* 126 */ 0, /* 127 */ 7, /* 128 */ 0, /* 129 */
-  1, /* 130 */ 0, /* 131 */ 2, /* 132 */ 0, /* 133 */ 1, /* 134 */
-  0, /* 135 */ 3, /* 136 */ 0, /* 137 */ 1, /* 138 */ 0, /* 139 */
-  2, /* 140 */ 0, /* 141 */ 1, /* 142 */ 0, /* 143 */ 4, /* 144 */
-  0, /* 145 */ 1, /* 146 */ 0, /* 147 */ 2, /* 148 */ 0, /* 149 */
-  1, /* 150 */ 0, /* 151 */ 3, /* 152 */ 0, /* 153 */ 1, /* 154 */
-  0, /* 155 */ 2, /* 156 */ 0, /* 157 */ 1, /* 158 */ 0, /* 159 */
-  5, /* 160 */ 0, /* 161 */ 1, /* 162 */ 0, /* 163 */ 2, /* 164 */
-  0, /* 165 */ 1, /* 166 */ 0, /* 167 */ 3, /* 168 */ 0, /* 169 */
-  1, /* 170 */ 0, /* 171 */ 2, /* 172 */ 0, /* 173 */ 1, /* 174 */
-  0, /* 175 */ 4, /* 176 */ 0, /* 177 */ 1, /* 178 */ 0, /* 179 */
-  2, /* 180 */ 0, /* 181 */ 1, /* 182 */ 0, /* 183 */ 3, /* 184 */
-  0, /* 185 */ 1, /* 186 */ 0, /* 187 */ 2, /* 188 */ 0, /* 189 */
-  1, /* 190 */ 0, /* 191 */ 6, /* 192 */ 0, /* 193 */ 1, /* 194 */
-  0, /* 195 */ 2, /* 196 */ 0, /* 197 */ 1, /* 198 */ 0, /* 199 */
-  3, /* 200 */ 0, /* 201 */ 1, /* 202 */ 0, /* 203 */ 2, /* 204 */
-  0, /* 205 */ 1, /* 206 */ 0, /* 207 */ 4, /* 208 */ 0, /* 209 */
-  1, /* 210 */ 0, /* 211 */ 2, /* 212 */ 0, /* 213 */ 1, /* 214 */
-  0, /* 215 */ 3, /* 216 */ 0, /* 217 */ 1, /* 218 */ 0, /* 219 */
-  2, /* 220 */ 0, /* 221 */ 1, /* 222 */ 0, /* 223 */ 5, /* 224 */
-  0, /* 225 */ 1, /* 226 */ 0, /* 227 */ 2, /* 228 */ 0, /* 229 */
-  1, /* 230 */ 0, /* 231 */ 3, /* 232 */ 0, /* 233 */ 1, /* 234 */
-  0, /* 235 */ 2, /* 236 */ 0, /* 237 */ 1, /* 238 */ 0, /* 239 */
-  4, /* 240 */ 0, /* 241 */ 1, /* 242 */ 0, /* 243 */ 2, /* 244 */
-  0, /* 245 */ 1, /* 246 */ 0, /* 247 */ 3, /* 248 */ 0, /* 249 */
-  1, /* 250 */ 0, /* 251 */ 2, /* 252 */ 0, /* 253 */ 1, /* 254 */
-  0, /* 255 */
-}; // end _S_first_one
-

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]