This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

ext/mt_alloc.h


Hi everyone!

My and Ola's FSF assignments are now near completion (we have sent back signed assignment papers last week). As per the previous discussion here on the list we have prepared a new version of the __mt_alloc allocator in a separate file ext/mt_alloc.h as well as taken care of a bug (that showed up in consumer/producer environment), optimized bin lookup and added some rough out-of-memory handling.

Attached you will also find the revised testprograms.

There are a couple of "unknowns" for us:
- There are quite a few different #ifndef naming conventions in use - which one should this file use (we chose __GLIBCPP_MT_ALLOC_H)?
- Currently we include stl_alloc.h in this file since all the other "SGI-style" allocators have defined a set of alloc_traits thingies - is this needed? If not it would suffice with stl_threads.h and functexcept.h?
- Which namespace should this code live in?


Also note that (as mentioned in the original posting http://gcc.gnu.org/ml/libstdc++/2003-03/msg00033.html) this allocator will currently only work on platforms using the pthread_* since there is no __gthread_mutex_init () call in gthr.h (we use pthread_mutex_init () directly!).

Brgds

/Stefan & Ola

--
A real person has two reasons for doing anything... ...a good reason and the real reason.


// Copyright (C) 2002, 2003 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING.  If not, write to the Free
// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
// USA.

// As a special exception, you may use this file as part of a free software
// library without restriction.  Specifically, if other files instantiate
// templates or use macros or inline functions from this file, or you compile
// this file and link it with other files to produce an executable, this
// file does not by itself cause the resulting executable to be covered by
// the GNU General Public License.  This exception does not however
// invalidate any other reasons why the executable file might be covered by
// the GNU General Public License.

/** @file ext/mt_alloc.h
 *  This file is a GNU extension to the Standard C++ Library. 
 *  You should only include this header if you are using GCC 3 or later.
 */

#ifndef __GLIBCPP_MT_ALLOC_H
#define __GLIBCPP_MT_ALLOC_H

/**
 *  This is a fixed size (power of 2) allocator which - when compiled
 *  with thread support - will maintain one freelist per size per thread
 *  plus a "global" one. Steps are taken to limit the per thread freelist
 *  sizes (by returning excess back to "global").
 *
 *  Usage examples:
 *    vector< int, __mt_alloc<0> > v1;
 *
 *    typedef std::__allocator< char, __mt_alloc<0> > string_alloc;
 *    std::basic_string< char, std::char_traits<char>, string_alloc > s1;
 */

#include <bits/stl_alloc.h>

namespace std
{
  template<int __inst>
    class __mt_alloc
    {
    private:
      /*
       * We need to create the initial lists and set up some variables
       * before we can answer to the first request for memory. 
       * The initialization of these variables is done at file scope 
       * below class declaration.
       */
#ifdef __GTHREADS
      static __gthread_once_t _S_once_mt;
#endif
      static bool _S_initialized;

      /*
       * Using short int as type for the binmap implies we are never caching
       * blocks larger than 65535 with this allocator
       */
      typedef unsigned short int binmap_type;
      static binmap_type* _S_binmap;

      static void _S_init ();

      /*
       * Variables used to "tune" the behavior of the allocator, assigned
       * and explained in detail below.
       */
      static size_t _S_max_bytes;
      static size_t _S_chunk_size;
      static size_t _S_max_threads;
      static size_t _S_no_of_bins;
      static size_t _S_freelist_headroom;

      /*
       * Each requesting thread is assigned an id ranging from 1 to 
       * _S_max_threads. Thread id 0 is used as a global memory pool.
       * In order to get constant performance on the thread assignment
       * routine, we keep a list of free ids. When a thread first requests
       * memory we remove the first record in this list and stores the address
       * in a __gthread_key. When initializing the __gthread_key
       * we specify a destructor. When this destructor (i.e. the thread dies)
       * is called, we return the thread id to the back of this list.
       */
#ifdef __GTHREADS
      struct thread_record
      {
        /*
         * Points to next free thread id record. NULL if last record in list.
         */
        thread_record* next;

        /*
         * Thread id ranging from 1 to _S_max_threads.
         */
        size_t id;
      };

      static thread_record* _S_thread_freelist_first;
      static thread_record* _S_thread_freelist_last;
      static __gthread_mutex_t _S_thread_freelist_mutex;
      static void _S_thread_key_destr (void* freelist_pos);
      static __gthread_key_t _S_thread_key;
      static size_t _S_get_thread_id ();
#endif

      struct block_record
      {
        /*
         * Points to the next block_record for its thread_id.
         */
        block_record* next;

        /*
         * The thread id of the thread which has requested this block.
         * All blocks are initially "owned" by global pool thread id 0.
         */
        size_t thread_id;
      };

      struct bin_record
      {
        /*
         * An "array" of pointers to the first/last free block for each 
         * thread id. Memory to these "arrays" is allocated in _S_init () 
         * for _S_max_threads + global pool 0.
         */
        block_record** first;
        block_record** last;

        /*
         * An "array" of counters used to keep track of the amount of blocks
         * that are on the freelist/used for each thread id.
         * Memory to these "arrays" is allocated in _S_init () 
         * for _S_max_threads + global pool 0.
         */
        size_t* free;
        size_t* used;

        /*
         * Each bin has its own mutex which is used to ensure data integrity 
         * while changing "ownership" on a block.
         * The mutex is initialized in _S_init ().
         */
#ifdef __GTHREADS
        __gthread_mutex_t* mutex; 
#endif
      };

      /*
       * An "array" of bin_records each of which represents a specific 
       * power of 2 size. Memory to this "array" is allocated in _S_init ().
       */
      static bin_record* _S_bin;

    public:
      static void*
      allocate (size_t __n)
      {
        /*
         * Requests larger than _S_max_bytes are handled by malloc/free directly
         */
        if (__n > _S_max_bytes)
          {
            void* __ret = malloc (__n);
            if (!__ret) 
              __throw_bad_alloc ();

            return __ret;
          }

        /*
         * Although the test in __gthread_once () would suffice, we wrap test of
         * the once condition in our own unlocked check. This saves one function
         * call to pthread_once() (which itself only tests for the once value
         * unlocked anyway and immediately returns if set)
         */
        if (!_S_initialized)
          {
#ifdef __GTHREADS
            if (__gthread_active_p ())
              __gthread_once (&_S_once_mt, _S_init);
            else
#endif
              {
                _S_max_threads = 0;
                _S_init ();
              }
          }

        /*
         * Round up to power of 2 and figure out which bin to use
         */
        size_t bin = _S_binmap[__n];

#ifdef __GTHREADS
        size_t thread_id = _S_get_thread_id ();
#else
        size_t thread_id = 0;
#endif

        block_record* block;

        /*
         * Find out if we have blocks on our freelist.
         * If so, go ahead and use them directly without
         * having to lock anything.
         */
        if (_S_bin[bin].first[thread_id] == NULL)
          {
            /*
             * Are we using threads?
             * - Yes, lock and check if there are free blocks on the global 
             *   list (and if not add new ones), get the first one 
             *   and change owner.
             * - No, all operations are made directly to global pool 0 
             *   no need to lock or change ownership but check for free 
             *   blocks on global list (and if not add new ones) and 
             *   get the first one.
             */
#ifdef __GTHREADS
            if (__gthread_active_p ())
              {
                __gthread_mutex_lock (_S_bin[bin].mutex);

                if (_S_bin[bin].first[0] == NULL)
                  {
                    _S_bin[bin].first[0] = 
                      (block_record*)malloc (_S_chunk_size);

                    if (!_S_bin[bin].first[0])
                      {
                        __gthread_mutex_unlock (_S_bin[bin].mutex);
                        __throw_bad_alloc ();
                      }

                    size_t bin_t = 1 << bin;
                    size_t block_count = 
                      _S_chunk_size / (bin_t + sizeof (block_record));

                    _S_bin[bin].free[0] = block_count;

                    block_count--;
                    block = _S_bin[bin].first[0];

                    while (block_count > 0)
                      {
                        block->next = (block_record*)((char*)block + 
                                      (bin_t + sizeof (block_record)));

                        block = block->next;

                        block_count--;
                      }

                    block->next = NULL;
                    _S_bin[bin].last[0] = block;
                  }

                block = _S_bin[bin].first[0];

                /*
                 * Remove from list and count down the available counter on
                 * global pool 0.
                 */
                _S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
                _S_bin[bin].free[0]--;

                __gthread_mutex_unlock (_S_bin[bin].mutex);

                /*
                 * Now that we have removed the block from the global
                 * freelist we can change owner and update the used counter
                 * for this thread without locking.
                 */
                block->thread_id = thread_id;
                _S_bin[bin].used[thread_id]++;
              }
            else
#endif
              {
                _S_bin[bin].first[0] = (block_record*)malloc (_S_chunk_size);

                if (!_S_bin[bin].first[0]) 
                  __throw_bad_alloc ();

                size_t bin_t = 1 << bin;
                size_t block_count = 
                  _S_chunk_size / (bin_t + sizeof (block_record));

                _S_bin[bin].free[0] = block_count;

                block_count--;
                block = _S_bin[bin].first[0];

                while (block_count > 0)
                  {
                    block->next = (block_record*)((char*)block + 
                                  (bin_t + sizeof (block_record)));

                    block = block->next;

                    block_count--;
                  }

                block->next = NULL;
                _S_bin[bin].last[0] = block;

                block = _S_bin[bin].first[0];

                /*
                 * Remove from list and count down the available counter on
                 * global pool 0 and increase it's used counter.
                 */
                _S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
                _S_bin[bin].free[0]--;
                _S_bin[bin].used[0]++;
              }
          }  
        else
          {
            /*
             * "Default" operation - we have blocks on our own freelist
             * grab the first record and update the counters.
             */
            block = _S_bin[bin].first[thread_id];

            _S_bin[bin].first[thread_id] = _S_bin[bin].first[thread_id]->next;
            _S_bin[bin].free[thread_id]--;
            _S_bin[bin].used[thread_id]++;
          }

        return (void*)((char*)block + sizeof (block_record));
      }

      static void
      deallocate (void* __p, size_t __n)
      {
        /*
         * Requests larger than _S_max_bytes are handled by malloc/free directly
         */
        if (__n > _S_max_bytes)
          {
            free (__p);
            return;
          }

        /*
         * Round up to power of 2 and figure out which bin to use
         */
        size_t bin = _S_binmap[__n];

#ifdef __GTHREADS
        size_t thread_id = _S_get_thread_id ();
#else
        size_t thread_id = 0;
#endif

        block_record* block = (block_record*)((char*)__p - 
                               sizeof (block_record));

        /*
         * This block will always be at the back of a list and thus
         * we set its next pointer to NULL.
         */
        block->next = NULL;

#ifdef __GTHREADS
        if (__gthread_active_p ())
          {
            /*
             * Calculate the number of records to remove from our freelist
             */
            int remove = _S_bin[bin].free[thread_id] - 
                         (_S_bin[bin].used[thread_id] / _S_freelist_headroom);

            /*
             * The calculation above will almost always tell us to remove one
             * or two records at a time, but this creates too much contentation
             * when locking and therefore we wait until the number of records
             * is "high enough".
             */
            if (remove > (int)(100 * (_S_no_of_bins - bin)) && 
                remove > (int)(_S_bin[bin].free[thread_id] / 
                               _S_freelist_headroom))
              {
                __gthread_mutex_lock (_S_bin[bin].mutex);

                while (remove > 0)
                  {
                    if (_S_bin[bin].first[0] == NULL)
                      _S_bin[bin].first[0] = _S_bin[bin].first[thread_id];
                    else
                      _S_bin[bin].last[0]->next = _S_bin[bin].first[thread_id];

                    _S_bin[bin].last[0] = _S_bin[bin].first[thread_id];

                    _S_bin[bin].first[thread_id] = 
                      _S_bin[bin].first[thread_id]->next;

                    _S_bin[bin].free[0]++;
                    _S_bin[bin].free[thread_id]--;

                    remove--;
                  }

                _S_bin[bin].last[0]->next = NULL;

                __gthread_mutex_unlock (_S_bin[bin].mutex);
              }

            /*
             * Did we allocate this block?
             * - Yes, return it to our freelist
             * - No, return it to global pool
             */
            if (thread_id == block->thread_id)
              {
                if (_S_bin[bin].first[thread_id] == NULL)
                  _S_bin[bin].first[thread_id] = block;
                else
                  _S_bin[bin].last[thread_id]->next = block;

                _S_bin[bin].last[thread_id] = block;

                _S_bin[bin].free[thread_id]++;
                _S_bin[bin].used[thread_id]--;
              }
            else
              {
                __gthread_mutex_lock (_S_bin[bin].mutex);

                if (_S_bin[bin].first[0] == NULL)
                  _S_bin[bin].first[0] = block;
                else
                  _S_bin[bin].last[0]->next = block;

                _S_bin[bin].last[0] = block;

                _S_bin[bin].free[0]++;
                _S_bin[bin].used[block->thread_id]--;

                __gthread_mutex_unlock (_S_bin[bin].mutex);
              }
          }
        else
#endif
          {
            /*
             * Single threaded application - return to global pool
             */
            if (_S_bin[bin].first[0] == NULL)
              _S_bin[bin].first[0] = block;
            else
              _S_bin[bin].last[0]->next = block;

            _S_bin[bin].last[0] = block;

            _S_bin[bin].free[0]++;
            _S_bin[bin].used[0]--;
          }
      }
    };

  template<int __inst>
    void
    __mt_alloc<__inst>::
    _S_init ()
    {
      /*
       * Calculate the number of bins required based on _S_max_bytes,
       * _S_no_of_bins is initialized to 1 below.
       */
      {
        size_t bin_t = 1;
        while (_S_max_bytes > bin_t)
          {
            bin_t = bin_t << 1;
            _S_no_of_bins++;
          }
      }

      /*
       * Setup the bin map for quick lookup of the relevant bin
       */
      _S_binmap = (binmap_type*)
        malloc ((_S_max_bytes + 1) * sizeof (binmap_type));

      if (!_S_binmap) 
        __throw_bad_alloc ();

      binmap_type* bp_t = _S_binmap;
      binmap_type bin_max_t = 1;
      binmap_type bin_t = 0;
      for (binmap_type ct = 0; ct <= _S_max_bytes; ct++)
        {
          if (ct > bin_max_t)
            {
              bin_max_t <<= 1;
              bin_t++;
            }
          *bp_t++ = bin_t;
        }

      /*
       * If __gthread_active_p () create and initialize the list of free 
       * thread ids. Single threaded applications use thread id 0 directly 
       * and have no need for this. 
       */
#ifdef __GTHREADS
      if (__gthread_active_p ())
        {
          _S_thread_freelist_first = 
            (thread_record*)malloc (sizeof (thread_record) * _S_max_threads);

          if (!_S_thread_freelist_first) 
            __throw_bad_alloc ();

          /*
           * NOTE! The first assignable thread id is 1 since the global 
           * pool uses id 0
           */
          size_t i;
          for (i = 1; i < _S_max_threads; i++)
            {
              _S_thread_freelist_first[i - 1].next = 
                &_S_thread_freelist_first[i];

              _S_thread_freelist_first[i - 1].id = i;
            }

          /*
           * Set last record and pointer to this
           */
          _S_thread_freelist_first[i - 1].next = NULL;
          _S_thread_freelist_first[i - 1].id = i;
          _S_thread_freelist_last = &_S_thread_freelist_first[i - 1];

          /*
           * Initialize per thread key to hold pointer to _S_thread_freelist
           * NOTE! Here's an ugly workaround - if _S_thread_key_destr is 
           * not explicitly called at least once it won't be linked into 
           * the application. This is the behavior of template methods and
           * __gthread_key_create () takes only a pointer to the function
           * and does not cause the compiler to create an instance.
           */
          _S_thread_key_destr( NULL );
          __gthread_key_create (&_S_thread_key, _S_thread_key_destr);
        }
#endif

      /*
       * Initialize _S_bin and its members
       */
      _S_bin = (bin_record*)malloc (sizeof (bin_record) * _S_no_of_bins);

      if (!_S_bin) 
        __throw_bad_alloc ();

      for (size_t bin = 0; bin < _S_no_of_bins; bin++)
        {
          _S_bin[bin].first = (block_record**)
            malloc (sizeof (block_record*) * (_S_max_threads + 1));

          if (!_S_bin[bin].first) 
            __throw_bad_alloc ();

          _S_bin[bin].last = (block_record**)
            malloc (sizeof (block_record*) * (_S_max_threads + 1));

          if (!_S_bin[bin].last) 
            __throw_bad_alloc ();

          _S_bin[bin].free = (size_t*)
            malloc (sizeof (size_t) * (_S_max_threads + 1));

          if (!_S_bin[bin].free) 
            __throw_bad_alloc ();

          _S_bin[bin].used = (size_t*)
            malloc (sizeof (size_t) * (_S_max_threads + 1));

          if (!_S_bin[bin].used) 
            __throw_bad_alloc ();

          /*
           * Ugly workaround of what at the time of writing seems to be
           * a parser problem - see PR c++/9779 for more info.
           */
#ifdef __GTHREADS
          size_t s = sizeof (__gthread_mutex_t); 
          _S_bin[bin].mutex = (__gthread_mutex_t*)malloc (s);

          if (!_S_bin[bin].mutex) 
            __throw_bad_alloc ();

          /*
           * This is not only ugly - it's extremly non-portable!
           * However gthr.h does not currently provide a __gthread_mutex_init ()
           * call. The correct solution to this problem needs to be discussed.
           */
          pthread_mutex_init (_S_bin[bin].mutex, NULL);
#endif

          for (size_t thread = 0; thread <= _S_max_threads; thread++)
            {
              _S_bin[bin].first[thread] = NULL;
              _S_bin[bin].last[thread] = NULL;
              _S_bin[bin].free[thread] = 0;
              _S_bin[bin].used[thread] = 0;
            }
        }

        _S_initialized = true;
    }

#ifdef __GTHREADS
  template<int __inst>
    void
    __mt_alloc<__inst>::
    _S_thread_key_destr (void* freelist_pos)
    {
      /*
       * This is due to the ugly workaround mentioned in _S_init ()
       */
      if (freelist_pos == NULL)
        return;

      /*
       * If the thread - when it dies - still have records on its freelist
       * we return them to the global pool here.
       */
      for (size_t bin = 0; bin < _S_no_of_bins; bin++)
        {
          block_record* block = 
            _S_bin[bin].first[((thread_record*)freelist_pos)->id];

          if (block != NULL)
            {
              __gthread_mutex_lock (_S_bin[bin].mutex);

              while (block != NULL)
                {
                  if (_S_bin[bin].first[0] == NULL)
                    _S_bin[bin].first[0] = block;
                  else
                    _S_bin[bin].last[0]->next = block;

                  _S_bin[bin].last[0] = block;

                  block = block->next;

                  _S_bin[bin].free[0]++;
                }

              _S_bin[bin].last[0]->next = NULL;

              __gthread_mutex_unlock (_S_bin[bin].mutex);
            }
        }

      /*
       * Return this thread id record to thread_freelist
       */
      __gthread_mutex_lock (&_S_thread_freelist_mutex);

      _S_thread_freelist_last->next = (thread_record*)freelist_pos;
      _S_thread_freelist_last = (thread_record*)freelist_pos;
      _S_thread_freelist_last->next = NULL;

      __gthread_mutex_unlock (&_S_thread_freelist_mutex);

    }

  template<int __inst>
    size_t
    __mt_alloc<__inst>::
    _S_get_thread_id ()
    {
      /*
       * If we have thread support and it's active we check the thread key
       * value and return it's id or if it's not set we take the first record
       * from _S_thread_freelist and sets the key and returns it's id.
       */
      if (__gthread_active_p ())
        {
          thread_record* freelist_pos;

          if ((freelist_pos = 
              (thread_record*)__gthread_getspecific (_S_thread_key)) == NULL)
            {
              __gthread_mutex_lock (&_S_thread_freelist_mutex);

              /*
               * Since _S_max_threads must be larger than the theoretical 
               * max number of threads of the OS the list can never be empty.
               */
              freelist_pos = _S_thread_freelist_first;
              _S_thread_freelist_first = _S_thread_freelist_first->next;

              __gthread_mutex_unlock (&_S_thread_freelist_mutex);

              __gthread_setspecific (_S_thread_key, (void*)freelist_pos);

              /*
               * Since thread_ids may/will be reused (espcially in
               * producer/consumer applications) we make sure that
               * the list pointers and free counter is reset BUT
               * as the "old" thread may still be owner of some
               * memory (which is referred to by other threads and 
               * thus not freed) we don't reset the used counter.
               */
              for (size_t bin = 0; bin < _S_no_of_bins; bin++)
                {
                  _S_bin[bin].first[freelist_pos->id] = NULL;
                  _S_bin[bin].last[freelist_pos->id] = NULL;
                  _S_bin[bin].free[freelist_pos->id] = 0;
                }
            } 

          return freelist_pos->id;
        }

      /*
       * Otherwise (no thread support or inactive) all requests are served from
       * the global pool 0.
       */
      return 0;
    }

  template<int __inst> __gthread_once_t
  __mt_alloc<__inst>::_S_once_mt = __GTHREAD_ONCE_INIT;
#endif

  template<int __inst> bool
  __mt_alloc<__inst>::_S_initialized = false;

  template<int __inst> typename __mt_alloc<__inst>::binmap_type*
  __mt_alloc<__inst>::_S_binmap = NULL;

  /*
   * Allocation requests (after round-up to power of 2) below this value 
   * will be handled by the allocator. A raw malloc/free () call will be used
   * for requests larger than this value. 
   */
  template<int __inst> size_t
  __mt_alloc<__inst>::_S_max_bytes = 128;

  /*
   * In order to avoid fragmenting and minimize the number of malloc ()
   * calls we always request new memory using this value. Based on 
   * previous discussions on the libstdc++ mailing list we have choosen 
   * the value below. See http://gcc.gnu.org/ml/libstdc++/2001-07/msg00077.html
   */
  template<int __inst> size_t
  __mt_alloc<__inst>::_S_chunk_size = 4096 - 4 * sizeof (void*);

  /*
   * The maximum number of supported threads. Our Linux 2.4.18 reports
   * 4070 in /proc/sys/kernel/threads-max
   */
  template<int __inst> size_t
  __mt_alloc<__inst>::_S_max_threads = 4096;

  /*
   * Actual value calculated in _S_init ()
   */
  template<int __inst> size_t
  __mt_alloc<__inst>::_S_no_of_bins = 1;

  /*
   * Each time a deallocation occurs in a threaded application we make sure
   * that there are no more than _S_freelist_headroom % of used memory on 
   * the freelist. If the number of additional records is more than
   * _S_freelist_headroom % of the freelist, we move these records back to 
   * the global pool.
   */
  template<int __inst> size_t
  __mt_alloc<__inst>::_S_freelist_headroom = 10;

  /*
   * Actual initialization in _S_init ()
   */
#ifdef __GTHREADS
  template<int __inst> typename __mt_alloc<__inst>::thread_record*
  __mt_alloc<__inst>::_S_thread_freelist_first = NULL;

  template<int __inst> typename __mt_alloc<__inst>::thread_record*
  __mt_alloc<__inst>::_S_thread_freelist_last = NULL;

  template<int __inst> __gthread_mutex_t
  __mt_alloc<__inst>::_S_thread_freelist_mutex = __GTHREAD_MUTEX_INIT;

  /*
   * Actual initialization in _S_init ()
   */
  template<int __inst> __gthread_key_t
  __mt_alloc<__inst>::_S_thread_key;
#endif

  template<int __inst> typename __mt_alloc<__inst>::bin_record*
  __mt_alloc<__inst>::_S_bin = NULL;

  template<int __inst>
    inline bool
    operator==(const __mt_alloc<__inst>&,
               const __mt_alloc<__inst>&)
    { return true; }

  template<int __inst>
    inline bool
    operator!=(const __mt_alloc<__inst>&,
               const __mt_alloc<__inst>&)
    { return false; }

  template<typename _Tp, int __inst>
    struct _Alloc_traits<_Tp, __mt_alloc<__inst> >
    {
      static const bool _S_instanceless = true;
      typedef __simple_alloc<_Tp, __mt_alloc<__inst> > _Alloc_type;
      typedef __allocator<_Tp, __mt_alloc<__inst> > allocator_type;
    };

  template<typename _Tp, typename _Tp1, int __inst>
    struct _Alloc_traits<_Tp,
                         __allocator<_Tp1, __mt_alloc<__inst> > >
    {
      static const bool _S_instanceless = true;
      typedef __simple_alloc<_Tp, __mt_alloc<__inst> > _Alloc_type;
      typedef __allocator<_Tp, __mt_alloc<__inst> > allocator_type;
    };
} // namespace std

#endif
/*
 * 2003-02-05 Stefan Olsson <stefan@snon.net>
 *
 * The goal with this application is to compare the performance
 * between different STL allocators relative to the default
 * __pool_alloc.
 *
 * The container used for the tests is vector, which as stated by
 * SGI "Vector is the simplest of the STL container classes, and in 
 * many cases the most efficient.".
 *
 * NOTE! The vector<> container does some "caching" of it's own and
 * therefore we redeclare the variable in each iteration (forcing the
 * const/destr to be called and thus free memory).
 *
 * NOTE! The usage of gettimeofday is unwanted since it's not POSIX,
 * however I have not found a more generic system call to use - 
 * ideas are greatly appriciated!
 *
 * NOTE! This version only does operations on vector<int>. More/other
 * data types should maybe also be tested - ideas are always welcome!
 *
 * I assume that glibc/underlying malloc() implementation has been
 * compiled with -O2 thus should this application also be compiled
 * with -O2 in order to get relevant results.
 */

#include <vector>
#include <sys/time.h>
#include <ext/mt_alloc.h>

/*
 * Make sure that the compiler has thread support...
 */
#if __GTHREADS

using namespace std;

/*
 * In order to avoid that the time it takes for the application to 
 * startup/shutdown affect the end result, we define a target 
 * duration (in seconds) for which all tests should run.
 * Each test is responsible for "calibrating" their # of iterations 
 * to come as close as possible to this target based on the time
 * it takes to complete the test using the default __pool_alloc.
 *
 * NOTE! In an ideal world where there is enough CPU and no
 * contention the application would complete on time, however
 * the design of the __pool_alloc allocator and the fact that we
 * don't have enough CPU available (normally atleast :) will make the
 * actual runtime much longer (at least # of threads * this value)
 */
int target_run_time = 10;

/*
 * The number of iterations to be performed in order to figure out
 * the "speed" of this computer and adjust the number of iterations
 * needed to come close to target_run_time.
 */
int calibrate_iterations = 100000;

/*
 * The number of values to insert in the vector, 32 will cause 
 * 5 (re)allocations to be performed (sizes 4, 8, 16, 32 and 64)
 * This means that all allocations are within _MAX_BYTES = 128
 * as defined in stl_alloc.h for __pool_alloc.
 * Whether or not this value is relevant in "the real world" 
 * or not I don't know and should probably be investigated in 
 * more detail.
 */
int insert_values = 32;

/*
 * Number of threads to create
 */
int threads = 8;

static struct timeval _tstart, _tend;
static struct timezone tz;

void
tstart (void)
{
  gettimeofday (&_tstart, &tz);
}
void
tend (void)
{
  gettimeofday (&_tend, &tz);
}

double
tval()
{
  double t1, t2;

  t1 = (double)_tstart.tv_sec + (double)_tstart.tv_usec/(1000*1000);
  t2 = (double)_tend.tv_sec + (double)_tend.tv_usec/(1000*1000);
  return t2 - t1;
}

int
calibrate_test_ints (void)
{
  tstart ();
  for (int i = 0; i < calibrate_iterations; i++)
  {
    vector<int> v1;

    for (int j = 0; j < insert_values; j++)
    {
      v1.push_back (1);
    }
  }
  tend ();

  return (int)((double)target_run_time / tval ()) * calibrate_iterations;
}

void*
test_ints_pool_alloc (void* it)
{
  int iterations = *(int*)it;

  for (int i = 0; i < iterations; i++)
  {
    vector<int> v1;

    for (int j = 0; j < insert_values; j++)
    {
      v1.push_back (1);
    }
  }

  return NULL;
}

void*
test_ints_malloc_alloc (void* it)
{
  int iterations = *(int*)it;

  for (int i = 0; i < iterations; i++)
  {
    vector<int, __malloc_alloc<0> > v1;

    for (int j = 0; j < insert_values; j++)
    {
      v1.push_back (1);
    }
  }

  return NULL;
}

void*
test_ints_mt_alloc (void* it)
{
  int iterations = *(int*)it;

  for (int i = 0; i < iterations; i++)
  {
    vector<int, __mt_alloc<0> > v1;

    for (int j = 0; j < insert_values; j++)
    {
      v1.push_back (1);
    }
  }

  return NULL;
}

int
main (void)
{
  printf ("Target run-time: %ds\n", target_run_time);

  printf ("Calibrating test_ints... ");
  int iterations = calibrate_test_ints ();
  printf ("DONE! Will do %d iterations.\n", iterations);

  double results[3];
  pthread_t workers[threads];

  printf ("__pool_alloc... ");
  tstart ();
  for( int i = 0; i < threads; i++ )
  {
    pthread_create (&workers[i], NULL, test_ints_pool_alloc, (void*)&iterations);
  }

  for( int i = 0; i < threads; i++ )
  {
    pthread_join (workers[i], NULL);
  }
  tend ();
  results[0] = tval ();
  printf ("DONE! Time %f\n", results[0]);

  printf ("__malloc_alloc... ");
  tstart ();
  for( int i = 0; i < threads; i++ )
  {
    pthread_create (&workers[i], NULL, test_ints_malloc_alloc, (void*)&iterations);
  }

  for( int i = 0; i < threads; i++ )
  {
    pthread_join (workers[i], NULL);
  }
  tend ();
  results[1] = tval ();
  printf ("DONE! Time %f\n", results[1]);

  printf ("__mt_alloc... ");
  tstart ();
  for( int i = 0; i < threads; i++ )
  {
    pthread_create (&workers[i], NULL, test_ints_mt_alloc, (void*)&iterations);
  }

  for( int i = 0; i < threads; i++ )
  {
    pthread_join (workers[i], NULL);
  }
  tend ();
  results[2] = tval ();
  printf ("DONE! Time %f\n", results[2]);

  printf ("Relative performance to __pool_alloc:\n");
  printf ("__malloc_alloc %f %%\n", (results[0] / results[1])*100);
  printf ("__mt_alloc %f %%\n", (results[0] / results[2])*100);

  return 0;
}

#else

int 
main (void) 
{
  printf ("No thread support, exiting!\n");

  return 0;
}

#endif

/*
 * 2003-02-05 Stefan Olsson <stefan@snon.net>
 *
 * The goal with this application is to compare the performance
 * between different STL allocators relative to the default
 * __pool_alloc.
 *
 * The container used for the tests is vector, which as stated by
 * SGI "Vector is the simplest of the STL container classes, and in 
 * many cases the most efficient.".
 *
 * NOTE! The vector<> container does some "caching" of it's own and
 * therefore we redeclare the variable in each iteration (forcing the
 * const/destr to be called and thus free memory).
 *
 * NOTE! The usage of gettimeofday is unwanted since it's not POSIX,
 * however I have not found a more generic system call to use - 
 * ideas are greatly appriciated!
 *
 * NOTE! This version only does operations on vector<int>. More/other
 * data types should maybe also be tested - ideas are always welcome!
 *
 * I assume that glibc/underlying malloc() implementation has been
 * compiled with -O2 thus should this application also be compiled
 * with -O2 in order to get relevant results.
 */

#include <vector>
#include <sys/time.h>
#include <ext/mt_alloc.h>

using namespace std;

/*
 * In order to avoid that the time it takes for the application to 
 * startup/shutdown affect the end result, we define a target 
 * duration (in seconds) for which all tests should run.
 * Each test is responsible for "calibrating" their # of iterations 
 * to come as close as possible to this target based on the time
 * it takes to complete the test using the default __pool_alloc.
 */
int target_run_time = 10;

/*
 * The number of iterations to be performed in order to figure out
 * the "speed" of this computer and adjust the number of iterations
 * needed to come close to target_run_time.
 */
int calibrate_iterations = 100000;

/*
 * The number of values to insert in the vector, 32 will cause 
 * 5 (re)allocations to be performed (sizes 4, 8, 16, 32 and 64)
 * This means that all allocations are within _MAX_BYTES = 128
 * as defined in stl_alloc.h for __pool_alloc.
 * Whether or not this value is relevant in "the real world" 
 * or not I don't know and should probably be investigated in 
 * more detail.
 */
int insert_values = 32;

static struct timeval _tstart, _tend;
static struct timezone tz;

void
tstart (void)
{
  gettimeofday (&_tstart, &tz);
}
void
tend (void)
{
  gettimeofday (&_tend, &tz);
}

double
tval()
{
  double t1, t2;

  t1 = (double)_tstart.tv_sec + (double)_tstart.tv_usec/(1000*1000);
  t2 = (double)_tend.tv_sec + (double)_tend.tv_usec/(1000*1000);
  return t2 - t1;
}

int
calibrate_test_ints (void)
{
  tstart ();
  for (int i = 0; i < calibrate_iterations; i++)
  {
    vector<int> v1;

    for (int j = 0; j < insert_values; j++)
    {
      v1.push_back (1);
    }
  }
  tend ();

  return (int)((double)target_run_time / tval ()) * calibrate_iterations;
}

double
test_ints_pool_alloc (int iterations)
{
  tstart ();
  for (int i = 0; i < iterations; i++)
  {
    vector<int> v1;

    for (int j = 0; j < insert_values; j++)
    {
      v1.push_back (1);
    }
  }
  tend ();

  return tval ();
}

double
test_ints_malloc_alloc (int iterations)
{
  tstart ();
  for (int i = 0; i < iterations; i++)
  {
    vector<int, __malloc_alloc<0> > v1;

    for (int j = 0; j < insert_values; j++)
    {
      v1.push_back (1);
    }
  }
  tend ();

  return tval ();
}

double
test_ints_mt_alloc (int iterations)
{
  tstart ();
  for (int i = 0; i < iterations; i++)
  {
    vector<int, __mt_alloc<0> > v1;

    for (int j = 0; j < insert_values; j++)
    {
      v1.push_back (1);
    }
  }
  tend ();

  return tval ();
}

int
main (void)
{
  printf ("Target run-time: %ds\n", target_run_time);

  printf ("Calibrating test_ints... ");
//int iterations = 1000000;

  int iterations = calibrate_test_ints ();
  printf ("DONE! Will do %d iterations.\n", iterations);

  double results[3];

  printf ("__pool_alloc... ");
  results[0] = test_ints_pool_alloc (iterations);
  printf ("DONE! Time %f\n", results[0]);

  printf ("__malloc_alloc... ");
  results[1] = test_ints_malloc_alloc (iterations);
  printf ("DONE! Time %f\n", results[1]);

  printf ("__mt_alloc... ");
  results[2] = test_ints_mt_alloc (iterations);
  printf ("DONE! Time %f\n", results[2]);

  printf ("Relative performance to __pool_alloc:\n");
  printf ("__malloc_alloc %f %%\n", (results[0] / results[1])*100);
  printf ("__mt_alloc %f %%\n", (results[0] / results[2])*100);

  return 0;
}

Attachment: smime.p7s
Description: S/MIME Cryptographic Signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]