This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
| Other format: | [Raw text] | |
// Copyright (C) 2002, 2003 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING. If not, write to the Free
// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
// USA.
// As a special exception, you may use this file as part of a free software
// library without restriction. Specifically, if other files instantiate
// templates or use macros or inline functions from this file, or you compile
// this file and link it with other files to produce an executable, this
// file does not by itself cause the resulting executable to be covered by
// the GNU General Public License. This exception does not however
// invalidate any other reasons why the executable file might be covered by
// the GNU General Public License.
/** @file ext/mt_alloc.h
* This file is a GNU extension to the Standard C++ Library.
* You should only include this header if you are using GCC 3 or later.
*/
#ifndef __GLIBCPP_MT_ALLOC_H
#define __GLIBCPP_MT_ALLOC_H
/**
* This is a fixed size (power of 2) allocator which - when compiled
* with thread support - will maintain one freelist per size per thread
* plus a "global" one. Steps are taken to limit the per thread freelist
* sizes (by returning excess back to "global").
*
* Usage examples:
* vector< int, __mt_alloc<0> > v1;
*
* typedef std::__allocator< char, __mt_alloc<0> > string_alloc;
* std::basic_string< char, std::char_traits<char>, string_alloc > s1;
*/
#include <bits/stl_alloc.h>
namespace std
{
template<int __inst>
class __mt_alloc
{
private:
/*
* We need to create the initial lists and set up some variables
* before we can answer to the first request for memory.
* The initialization of these variables is done at file scope
* below class declaration.
*/
#ifdef __GTHREADS
static __gthread_once_t _S_once_mt;
#endif
static bool _S_initialized;
/*
* Using short int as type for the binmap implies we are never caching
* blocks larger than 65535 with this allocator
*/
typedef unsigned short int binmap_type;
static binmap_type* _S_binmap;
static void _S_init ();
/*
* Variables used to "tune" the behavior of the allocator, assigned
* and explained in detail below.
*/
static size_t _S_max_bytes;
static size_t _S_chunk_size;
static size_t _S_max_threads;
static size_t _S_no_of_bins;
static size_t _S_freelist_headroom;
/*
* Each requesting thread is assigned an id ranging from 1 to
* _S_max_threads. Thread id 0 is used as a global memory pool.
* In order to get constant performance on the thread assignment
* routine, we keep a list of free ids. When a thread first requests
* memory we remove the first record in this list and stores the address
* in a __gthread_key. When initializing the __gthread_key
* we specify a destructor. When this destructor (i.e. the thread dies)
* is called, we return the thread id to the back of this list.
*/
#ifdef __GTHREADS
struct thread_record
{
/*
* Points to next free thread id record. NULL if last record in list.
*/
thread_record* next;
/*
* Thread id ranging from 1 to _S_max_threads.
*/
size_t id;
};
static thread_record* _S_thread_freelist_first;
static thread_record* _S_thread_freelist_last;
static __gthread_mutex_t _S_thread_freelist_mutex;
static void _S_thread_key_destr (void* freelist_pos);
static __gthread_key_t _S_thread_key;
static size_t _S_get_thread_id ();
#endif
struct block_record
{
/*
* Points to the next block_record for its thread_id.
*/
block_record* next;
/*
* The thread id of the thread which has requested this block.
* All blocks are initially "owned" by global pool thread id 0.
*/
size_t thread_id;
};
struct bin_record
{
/*
* An "array" of pointers to the first/last free block for each
* thread id. Memory to these "arrays" is allocated in _S_init ()
* for _S_max_threads + global pool 0.
*/
block_record** first;
block_record** last;
/*
* An "array" of counters used to keep track of the amount of blocks
* that are on the freelist/used for each thread id.
* Memory to these "arrays" is allocated in _S_init ()
* for _S_max_threads + global pool 0.
*/
size_t* free;
size_t* used;
/*
* Each bin has its own mutex which is used to ensure data integrity
* while changing "ownership" on a block.
* The mutex is initialized in _S_init ().
*/
#ifdef __GTHREADS
__gthread_mutex_t* mutex;
#endif
};
/*
* An "array" of bin_records each of which represents a specific
* power of 2 size. Memory to this "array" is allocated in _S_init ().
*/
static bin_record* _S_bin;
public:
static void*
allocate (size_t __n)
{
/*
* Requests larger than _S_max_bytes are handled by malloc/free directly
*/
if (__n > _S_max_bytes)
{
void* __ret = malloc (__n);
if (!__ret)
__throw_bad_alloc ();
return __ret;
}
/*
* Although the test in __gthread_once () would suffice, we wrap test of
* the once condition in our own unlocked check. This saves one function
* call to pthread_once() (which itself only tests for the once value
* unlocked anyway and immediately returns if set)
*/
if (!_S_initialized)
{
#ifdef __GTHREADS
if (__gthread_active_p ())
__gthread_once (&_S_once_mt, _S_init);
else
#endif
{
_S_max_threads = 0;
_S_init ();
}
}
/*
* Round up to power of 2 and figure out which bin to use
*/
size_t bin = _S_binmap[__n];
#ifdef __GTHREADS
size_t thread_id = _S_get_thread_id ();
#else
size_t thread_id = 0;
#endif
block_record* block;
/*
* Find out if we have blocks on our freelist.
* If so, go ahead and use them directly without
* having to lock anything.
*/
if (_S_bin[bin].first[thread_id] == NULL)
{
/*
* Are we using threads?
* - Yes, lock and check if there are free blocks on the global
* list (and if not add new ones), get the first one
* and change owner.
* - No, all operations are made directly to global pool 0
* no need to lock or change ownership but check for free
* blocks on global list (and if not add new ones) and
* get the first one.
*/
#ifdef __GTHREADS
if (__gthread_active_p ())
{
__gthread_mutex_lock (_S_bin[bin].mutex);
if (_S_bin[bin].first[0] == NULL)
{
_S_bin[bin].first[0] =
(block_record*)malloc (_S_chunk_size);
if (!_S_bin[bin].first[0])
{
__gthread_mutex_unlock (_S_bin[bin].mutex);
__throw_bad_alloc ();
}
size_t bin_t = 1 << bin;
size_t block_count =
_S_chunk_size / (bin_t + sizeof (block_record));
_S_bin[bin].free[0] = block_count;
block_count--;
block = _S_bin[bin].first[0];
while (block_count > 0)
{
block->next = (block_record*)((char*)block +
(bin_t + sizeof (block_record)));
block = block->next;
block_count--;
}
block->next = NULL;
_S_bin[bin].last[0] = block;
}
block = _S_bin[bin].first[0];
/*
* Remove from list and count down the available counter on
* global pool 0.
*/
_S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
_S_bin[bin].free[0]--;
__gthread_mutex_unlock (_S_bin[bin].mutex);
/*
* Now that we have removed the block from the global
* freelist we can change owner and update the used counter
* for this thread without locking.
*/
block->thread_id = thread_id;
_S_bin[bin].used[thread_id]++;
}
else
#endif
{
_S_bin[bin].first[0] = (block_record*)malloc (_S_chunk_size);
if (!_S_bin[bin].first[0])
__throw_bad_alloc ();
size_t bin_t = 1 << bin;
size_t block_count =
_S_chunk_size / (bin_t + sizeof (block_record));
_S_bin[bin].free[0] = block_count;
block_count--;
block = _S_bin[bin].first[0];
while (block_count > 0)
{
block->next = (block_record*)((char*)block +
(bin_t + sizeof (block_record)));
block = block->next;
block_count--;
}
block->next = NULL;
_S_bin[bin].last[0] = block;
block = _S_bin[bin].first[0];
/*
* Remove from list and count down the available counter on
* global pool 0 and increase it's used counter.
*/
_S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
_S_bin[bin].free[0]--;
_S_bin[bin].used[0]++;
}
}
else
{
/*
* "Default" operation - we have blocks on our own freelist
* grab the first record and update the counters.
*/
block = _S_bin[bin].first[thread_id];
_S_bin[bin].first[thread_id] = _S_bin[bin].first[thread_id]->next;
_S_bin[bin].free[thread_id]--;
_S_bin[bin].used[thread_id]++;
}
return (void*)((char*)block + sizeof (block_record));
}
static void
deallocate (void* __p, size_t __n)
{
/*
* Requests larger than _S_max_bytes are handled by malloc/free directly
*/
if (__n > _S_max_bytes)
{
free (__p);
return;
}
/*
* Round up to power of 2 and figure out which bin to use
*/
size_t bin = _S_binmap[__n];
#ifdef __GTHREADS
size_t thread_id = _S_get_thread_id ();
#else
size_t thread_id = 0;
#endif
block_record* block = (block_record*)((char*)__p -
sizeof (block_record));
/*
* This block will always be at the back of a list and thus
* we set its next pointer to NULL.
*/
block->next = NULL;
#ifdef __GTHREADS
if (__gthread_active_p ())
{
/*
* Calculate the number of records to remove from our freelist
*/
int remove = _S_bin[bin].free[thread_id] -
(_S_bin[bin].used[thread_id] / _S_freelist_headroom);
/*
* The calculation above will almost always tell us to remove one
* or two records at a time, but this creates too much contentation
* when locking and therefore we wait until the number of records
* is "high enough".
*/
if (remove > (int)(100 * (_S_no_of_bins - bin)) &&
remove > (int)(_S_bin[bin].free[thread_id] /
_S_freelist_headroom))
{
__gthread_mutex_lock (_S_bin[bin].mutex);
while (remove > 0)
{
if (_S_bin[bin].first[0] == NULL)
_S_bin[bin].first[0] = _S_bin[bin].first[thread_id];
else
_S_bin[bin].last[0]->next = _S_bin[bin].first[thread_id];
_S_bin[bin].last[0] = _S_bin[bin].first[thread_id];
_S_bin[bin].first[thread_id] =
_S_bin[bin].first[thread_id]->next;
_S_bin[bin].free[0]++;
_S_bin[bin].free[thread_id]--;
remove--;
}
_S_bin[bin].last[0]->next = NULL;
__gthread_mutex_unlock (_S_bin[bin].mutex);
}
/*
* Did we allocate this block?
* - Yes, return it to our freelist
* - No, return it to global pool
*/
if (thread_id == block->thread_id)
{
if (_S_bin[bin].first[thread_id] == NULL)
_S_bin[bin].first[thread_id] = block;
else
_S_bin[bin].last[thread_id]->next = block;
_S_bin[bin].last[thread_id] = block;
_S_bin[bin].free[thread_id]++;
_S_bin[bin].used[thread_id]--;
}
else
{
__gthread_mutex_lock (_S_bin[bin].mutex);
if (_S_bin[bin].first[0] == NULL)
_S_bin[bin].first[0] = block;
else
_S_bin[bin].last[0]->next = block;
_S_bin[bin].last[0] = block;
_S_bin[bin].free[0]++;
_S_bin[bin].used[block->thread_id]--;
__gthread_mutex_unlock (_S_bin[bin].mutex);
}
}
else
#endif
{
/*
* Single threaded application - return to global pool
*/
if (_S_bin[bin].first[0] == NULL)
_S_bin[bin].first[0] = block;
else
_S_bin[bin].last[0]->next = block;
_S_bin[bin].last[0] = block;
_S_bin[bin].free[0]++;
_S_bin[bin].used[0]--;
}
}
};
template<int __inst>
void
__mt_alloc<__inst>::
_S_init ()
{
/*
* Calculate the number of bins required based on _S_max_bytes,
* _S_no_of_bins is initialized to 1 below.
*/
{
size_t bin_t = 1;
while (_S_max_bytes > bin_t)
{
bin_t = bin_t << 1;
_S_no_of_bins++;
}
}
/*
* Setup the bin map for quick lookup of the relevant bin
*/
_S_binmap = (binmap_type*)
malloc ((_S_max_bytes + 1) * sizeof (binmap_type));
if (!_S_binmap)
__throw_bad_alloc ();
binmap_type* bp_t = _S_binmap;
binmap_type bin_max_t = 1;
binmap_type bin_t = 0;
for (binmap_type ct = 0; ct <= _S_max_bytes; ct++)
{
if (ct > bin_max_t)
{
bin_max_t <<= 1;
bin_t++;
}
*bp_t++ = bin_t;
}
/*
* If __gthread_active_p () create and initialize the list of free
* thread ids. Single threaded applications use thread id 0 directly
* and have no need for this.
*/
#ifdef __GTHREADS
if (__gthread_active_p ())
{
_S_thread_freelist_first =
(thread_record*)malloc (sizeof (thread_record) * _S_max_threads);
if (!_S_thread_freelist_first)
__throw_bad_alloc ();
/*
* NOTE! The first assignable thread id is 1 since the global
* pool uses id 0
*/
size_t i;
for (i = 1; i < _S_max_threads; i++)
{
_S_thread_freelist_first[i - 1].next =
&_S_thread_freelist_first[i];
_S_thread_freelist_first[i - 1].id = i;
}
/*
* Set last record and pointer to this
*/
_S_thread_freelist_first[i - 1].next = NULL;
_S_thread_freelist_first[i - 1].id = i;
_S_thread_freelist_last = &_S_thread_freelist_first[i - 1];
/*
* Initialize per thread key to hold pointer to _S_thread_freelist
* NOTE! Here's an ugly workaround - if _S_thread_key_destr is
* not explicitly called at least once it won't be linked into
* the application. This is the behavior of template methods and
* __gthread_key_create () takes only a pointer to the function
* and does not cause the compiler to create an instance.
*/
_S_thread_key_destr( NULL );
__gthread_key_create (&_S_thread_key, _S_thread_key_destr);
}
#endif
/*
* Initialize _S_bin and its members
*/
_S_bin = (bin_record*)malloc (sizeof (bin_record) * _S_no_of_bins);
if (!_S_bin)
__throw_bad_alloc ();
for (size_t bin = 0; bin < _S_no_of_bins; bin++)
{
_S_bin[bin].first = (block_record**)
malloc (sizeof (block_record*) * (_S_max_threads + 1));
if (!_S_bin[bin].first)
__throw_bad_alloc ();
_S_bin[bin].last = (block_record**)
malloc (sizeof (block_record*) * (_S_max_threads + 1));
if (!_S_bin[bin].last)
__throw_bad_alloc ();
_S_bin[bin].free = (size_t*)
malloc (sizeof (size_t) * (_S_max_threads + 1));
if (!_S_bin[bin].free)
__throw_bad_alloc ();
_S_bin[bin].used = (size_t*)
malloc (sizeof (size_t) * (_S_max_threads + 1));
if (!_S_bin[bin].used)
__throw_bad_alloc ();
/*
* Ugly workaround of what at the time of writing seems to be
* a parser problem - see PR c++/9779 for more info.
*/
#ifdef __GTHREADS
size_t s = sizeof (__gthread_mutex_t);
_S_bin[bin].mutex = (__gthread_mutex_t*)malloc (s);
if (!_S_bin[bin].mutex)
__throw_bad_alloc ();
/*
* This is not only ugly - it's extremly non-portable!
* However gthr.h does not currently provide a __gthread_mutex_init ()
* call. The correct solution to this problem needs to be discussed.
*/
pthread_mutex_init (_S_bin[bin].mutex, NULL);
#endif
for (size_t thread = 0; thread <= _S_max_threads; thread++)
{
_S_bin[bin].first[thread] = NULL;
_S_bin[bin].last[thread] = NULL;
_S_bin[bin].free[thread] = 0;
_S_bin[bin].used[thread] = 0;
}
}
_S_initialized = true;
}
#ifdef __GTHREADS
template<int __inst>
void
__mt_alloc<__inst>::
_S_thread_key_destr (void* freelist_pos)
{
/*
* This is due to the ugly workaround mentioned in _S_init ()
*/
if (freelist_pos == NULL)
return;
/*
* If the thread - when it dies - still have records on its freelist
* we return them to the global pool here.
*/
for (size_t bin = 0; bin < _S_no_of_bins; bin++)
{
block_record* block =
_S_bin[bin].first[((thread_record*)freelist_pos)->id];
if (block != NULL)
{
__gthread_mutex_lock (_S_bin[bin].mutex);
while (block != NULL)
{
if (_S_bin[bin].first[0] == NULL)
_S_bin[bin].first[0] = block;
else
_S_bin[bin].last[0]->next = block;
_S_bin[bin].last[0] = block;
block = block->next;
_S_bin[bin].free[0]++;
}
_S_bin[bin].last[0]->next = NULL;
__gthread_mutex_unlock (_S_bin[bin].mutex);
}
}
/*
* Return this thread id record to thread_freelist
*/
__gthread_mutex_lock (&_S_thread_freelist_mutex);
_S_thread_freelist_last->next = (thread_record*)freelist_pos;
_S_thread_freelist_last = (thread_record*)freelist_pos;
_S_thread_freelist_last->next = NULL;
__gthread_mutex_unlock (&_S_thread_freelist_mutex);
}
template<int __inst>
size_t
__mt_alloc<__inst>::
_S_get_thread_id ()
{
/*
* If we have thread support and it's active we check the thread key
* value and return it's id or if it's not set we take the first record
* from _S_thread_freelist and sets the key and returns it's id.
*/
if (__gthread_active_p ())
{
thread_record* freelist_pos;
if ((freelist_pos =
(thread_record*)__gthread_getspecific (_S_thread_key)) == NULL)
{
__gthread_mutex_lock (&_S_thread_freelist_mutex);
/*
* Since _S_max_threads must be larger than the theoretical
* max number of threads of the OS the list can never be empty.
*/
freelist_pos = _S_thread_freelist_first;
_S_thread_freelist_first = _S_thread_freelist_first->next;
__gthread_mutex_unlock (&_S_thread_freelist_mutex);
__gthread_setspecific (_S_thread_key, (void*)freelist_pos);
/*
* Since thread_ids may/will be reused (espcially in
* producer/consumer applications) we make sure that
* the list pointers and free counter is reset BUT
* as the "old" thread may still be owner of some
* memory (which is referred to by other threads and
* thus not freed) we don't reset the used counter.
*/
for (size_t bin = 0; bin < _S_no_of_bins; bin++)
{
_S_bin[bin].first[freelist_pos->id] = NULL;
_S_bin[bin].last[freelist_pos->id] = NULL;
_S_bin[bin].free[freelist_pos->id] = 0;
}
}
return freelist_pos->id;
}
/*
* Otherwise (no thread support or inactive) all requests are served from
* the global pool 0.
*/
return 0;
}
template<int __inst> __gthread_once_t
__mt_alloc<__inst>::_S_once_mt = __GTHREAD_ONCE_INIT;
#endif
template<int __inst> bool
__mt_alloc<__inst>::_S_initialized = false;
template<int __inst> typename __mt_alloc<__inst>::binmap_type*
__mt_alloc<__inst>::_S_binmap = NULL;
/*
* Allocation requests (after round-up to power of 2) below this value
* will be handled by the allocator. A raw malloc/free () call will be used
* for requests larger than this value.
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_max_bytes = 128;
/*
* In order to avoid fragmenting and minimize the number of malloc ()
* calls we always request new memory using this value. Based on
* previous discussions on the libstdc++ mailing list we have choosen
* the value below. See http://gcc.gnu.org/ml/libstdc++/2001-07/msg00077.html
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_chunk_size = 4096 - 4 * sizeof (void*);
/*
* The maximum number of supported threads. Our Linux 2.4.18 reports
* 4070 in /proc/sys/kernel/threads-max
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_max_threads = 4096;
/*
* Actual value calculated in _S_init ()
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_no_of_bins = 1;
/*
* Each time a deallocation occurs in a threaded application we make sure
* that there are no more than _S_freelist_headroom % of used memory on
* the freelist. If the number of additional records is more than
* _S_freelist_headroom % of the freelist, we move these records back to
* the global pool.
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_freelist_headroom = 10;
/*
* Actual initialization in _S_init ()
*/
#ifdef __GTHREADS
template<int __inst> typename __mt_alloc<__inst>::thread_record*
__mt_alloc<__inst>::_S_thread_freelist_first = NULL;
template<int __inst> typename __mt_alloc<__inst>::thread_record*
__mt_alloc<__inst>::_S_thread_freelist_last = NULL;
template<int __inst> __gthread_mutex_t
__mt_alloc<__inst>::_S_thread_freelist_mutex = __GTHREAD_MUTEX_INIT;
/*
* Actual initialization in _S_init ()
*/
template<int __inst> __gthread_key_t
__mt_alloc<__inst>::_S_thread_key;
#endif
template<int __inst> typename __mt_alloc<__inst>::bin_record*
__mt_alloc<__inst>::_S_bin = NULL;
template<int __inst>
inline bool
operator==(const __mt_alloc<__inst>&,
const __mt_alloc<__inst>&)
{ return true; }
template<int __inst>
inline bool
operator!=(const __mt_alloc<__inst>&,
const __mt_alloc<__inst>&)
{ return false; }
template<typename _Tp, int __inst>
struct _Alloc_traits<_Tp, __mt_alloc<__inst> >
{
static const bool _S_instanceless = true;
typedef __simple_alloc<_Tp, __mt_alloc<__inst> > _Alloc_type;
typedef __allocator<_Tp, __mt_alloc<__inst> > allocator_type;
};
template<typename _Tp, typename _Tp1, int __inst>
struct _Alloc_traits<_Tp,
__allocator<_Tp1, __mt_alloc<__inst> > >
{
static const bool _S_instanceless = true;
typedef __simple_alloc<_Tp, __mt_alloc<__inst> > _Alloc_type;
typedef __allocator<_Tp, __mt_alloc<__inst> > allocator_type;
};
} // namespace std
#endif
/*
* 2003-02-05 Stefan Olsson <stefan@snon.net>
*
* The goal with this application is to compare the performance
* between different STL allocators relative to the default
* __pool_alloc.
*
* The container used for the tests is vector, which as stated by
* SGI "Vector is the simplest of the STL container classes, and in
* many cases the most efficient.".
*
* NOTE! The vector<> container does some "caching" of it's own and
* therefore we redeclare the variable in each iteration (forcing the
* const/destr to be called and thus free memory).
*
* NOTE! The usage of gettimeofday is unwanted since it's not POSIX,
* however I have not found a more generic system call to use -
* ideas are greatly appriciated!
*
* NOTE! This version only does operations on vector<int>. More/other
* data types should maybe also be tested - ideas are always welcome!
*
* I assume that glibc/underlying malloc() implementation has been
* compiled with -O2 thus should this application also be compiled
* with -O2 in order to get relevant results.
*/
#include <vector>
#include <sys/time.h>
#include <ext/mt_alloc.h>
/*
* Make sure that the compiler has thread support...
*/
#if __GTHREADS
using namespace std;
/*
* In order to avoid that the time it takes for the application to
* startup/shutdown affect the end result, we define a target
* duration (in seconds) for which all tests should run.
* Each test is responsible for "calibrating" their # of iterations
* to come as close as possible to this target based on the time
* it takes to complete the test using the default __pool_alloc.
*
* NOTE! In an ideal world where there is enough CPU and no
* contention the application would complete on time, however
* the design of the __pool_alloc allocator and the fact that we
* don't have enough CPU available (normally atleast :) will make the
* actual runtime much longer (at least # of threads * this value)
*/
int target_run_time = 10;
/*
* The number of iterations to be performed in order to figure out
* the "speed" of this computer and adjust the number of iterations
* needed to come close to target_run_time.
*/
int calibrate_iterations = 100000;
/*
* The number of values to insert in the vector, 32 will cause
* 5 (re)allocations to be performed (sizes 4, 8, 16, 32 and 64)
* This means that all allocations are within _MAX_BYTES = 128
* as defined in stl_alloc.h for __pool_alloc.
* Whether or not this value is relevant in "the real world"
* or not I don't know and should probably be investigated in
* more detail.
*/
int insert_values = 32;
/*
* Number of threads to create
*/
int threads = 8;
static struct timeval _tstart, _tend;
static struct timezone tz;
void
tstart (void)
{
gettimeofday (&_tstart, &tz);
}
void
tend (void)
{
gettimeofday (&_tend, &tz);
}
double
tval()
{
double t1, t2;
t1 = (double)_tstart.tv_sec + (double)_tstart.tv_usec/(1000*1000);
t2 = (double)_tend.tv_sec + (double)_tend.tv_usec/(1000*1000);
return t2 - t1;
}
int
calibrate_test_ints (void)
{
tstart ();
for (int i = 0; i < calibrate_iterations; i++)
{
vector<int> v1;
for (int j = 0; j < insert_values; j++)
{
v1.push_back (1);
}
}
tend ();
return (int)((double)target_run_time / tval ()) * calibrate_iterations;
}
void*
test_ints_pool_alloc (void* it)
{
int iterations = *(int*)it;
for (int i = 0; i < iterations; i++)
{
vector<int> v1;
for (int j = 0; j < insert_values; j++)
{
v1.push_back (1);
}
}
return NULL;
}
void*
test_ints_malloc_alloc (void* it)
{
int iterations = *(int*)it;
for (int i = 0; i < iterations; i++)
{
vector<int, __malloc_alloc<0> > v1;
for (int j = 0; j < insert_values; j++)
{
v1.push_back (1);
}
}
return NULL;
}
void*
test_ints_mt_alloc (void* it)
{
int iterations = *(int*)it;
for (int i = 0; i < iterations; i++)
{
vector<int, __mt_alloc<0> > v1;
for (int j = 0; j < insert_values; j++)
{
v1.push_back (1);
}
}
return NULL;
}
int
main (void)
{
printf ("Target run-time: %ds\n", target_run_time);
printf ("Calibrating test_ints... ");
int iterations = calibrate_test_ints ();
printf ("DONE! Will do %d iterations.\n", iterations);
double results[3];
pthread_t workers[threads];
printf ("__pool_alloc... ");
tstart ();
for( int i = 0; i < threads; i++ )
{
pthread_create (&workers[i], NULL, test_ints_pool_alloc, (void*)&iterations);
}
for( int i = 0; i < threads; i++ )
{
pthread_join (workers[i], NULL);
}
tend ();
results[0] = tval ();
printf ("DONE! Time %f\n", results[0]);
printf ("__malloc_alloc... ");
tstart ();
for( int i = 0; i < threads; i++ )
{
pthread_create (&workers[i], NULL, test_ints_malloc_alloc, (void*)&iterations);
}
for( int i = 0; i < threads; i++ )
{
pthread_join (workers[i], NULL);
}
tend ();
results[1] = tval ();
printf ("DONE! Time %f\n", results[1]);
printf ("__mt_alloc... ");
tstart ();
for( int i = 0; i < threads; i++ )
{
pthread_create (&workers[i], NULL, test_ints_mt_alloc, (void*)&iterations);
}
for( int i = 0; i < threads; i++ )
{
pthread_join (workers[i], NULL);
}
tend ();
results[2] = tval ();
printf ("DONE! Time %f\n", results[2]);
printf ("Relative performance to __pool_alloc:\n");
printf ("__malloc_alloc %f %%\n", (results[0] / results[1])*100);
printf ("__mt_alloc %f %%\n", (results[0] / results[2])*100);
return 0;
}
#else
int
main (void)
{
printf ("No thread support, exiting!\n");
return 0;
}
#endif
/*
* 2003-02-05 Stefan Olsson <stefan@snon.net>
*
* The goal with this application is to compare the performance
* between different STL allocators relative to the default
* __pool_alloc.
*
* The container used for the tests is vector, which as stated by
* SGI "Vector is the simplest of the STL container classes, and in
* many cases the most efficient.".
*
* NOTE! The vector<> container does some "caching" of it's own and
* therefore we redeclare the variable in each iteration (forcing the
* const/destr to be called and thus free memory).
*
* NOTE! The usage of gettimeofday is unwanted since it's not POSIX,
* however I have not found a more generic system call to use -
* ideas are greatly appriciated!
*
* NOTE! This version only does operations on vector<int>. More/other
* data types should maybe also be tested - ideas are always welcome!
*
* I assume that glibc/underlying malloc() implementation has been
* compiled with -O2 thus should this application also be compiled
* with -O2 in order to get relevant results.
*/
#include <vector>
#include <sys/time.h>
#include <ext/mt_alloc.h>
using namespace std;
/*
* In order to avoid that the time it takes for the application to
* startup/shutdown affect the end result, we define a target
* duration (in seconds) for which all tests should run.
* Each test is responsible for "calibrating" their # of iterations
* to come as close as possible to this target based on the time
* it takes to complete the test using the default __pool_alloc.
*/
int target_run_time = 10;
/*
* The number of iterations to be performed in order to figure out
* the "speed" of this computer and adjust the number of iterations
* needed to come close to target_run_time.
*/
int calibrate_iterations = 100000;
/*
* The number of values to insert in the vector, 32 will cause
* 5 (re)allocations to be performed (sizes 4, 8, 16, 32 and 64)
* This means that all allocations are within _MAX_BYTES = 128
* as defined in stl_alloc.h for __pool_alloc.
* Whether or not this value is relevant in "the real world"
* or not I don't know and should probably be investigated in
* more detail.
*/
int insert_values = 32;
static struct timeval _tstart, _tend;
static struct timezone tz;
void
tstart (void)
{
gettimeofday (&_tstart, &tz);
}
void
tend (void)
{
gettimeofday (&_tend, &tz);
}
double
tval()
{
double t1, t2;
t1 = (double)_tstart.tv_sec + (double)_tstart.tv_usec/(1000*1000);
t2 = (double)_tend.tv_sec + (double)_tend.tv_usec/(1000*1000);
return t2 - t1;
}
int
calibrate_test_ints (void)
{
tstart ();
for (int i = 0; i < calibrate_iterations; i++)
{
vector<int> v1;
for (int j = 0; j < insert_values; j++)
{
v1.push_back (1);
}
}
tend ();
return (int)((double)target_run_time / tval ()) * calibrate_iterations;
}
double
test_ints_pool_alloc (int iterations)
{
tstart ();
for (int i = 0; i < iterations; i++)
{
vector<int> v1;
for (int j = 0; j < insert_values; j++)
{
v1.push_back (1);
}
}
tend ();
return tval ();
}
double
test_ints_malloc_alloc (int iterations)
{
tstart ();
for (int i = 0; i < iterations; i++)
{
vector<int, __malloc_alloc<0> > v1;
for (int j = 0; j < insert_values; j++)
{
v1.push_back (1);
}
}
tend ();
return tval ();
}
double
test_ints_mt_alloc (int iterations)
{
tstart ();
for (int i = 0; i < iterations; i++)
{
vector<int, __mt_alloc<0> > v1;
for (int j = 0; j < insert_values; j++)
{
v1.push_back (1);
}
}
tend ();
return tval ();
}
int
main (void)
{
printf ("Target run-time: %ds\n", target_run_time);
printf ("Calibrating test_ints... ");
//int iterations = 1000000;
int iterations = calibrate_test_ints ();
printf ("DONE! Will do %d iterations.\n", iterations);
double results[3];
printf ("__pool_alloc... ");
results[0] = test_ints_pool_alloc (iterations);
printf ("DONE! Time %f\n", results[0]);
printf ("__malloc_alloc... ");
results[1] = test_ints_malloc_alloc (iterations);
printf ("DONE! Time %f\n", results[1]);
printf ("__mt_alloc... ");
results[2] = test_ints_mt_alloc (iterations);
printf ("DONE! Time %f\n", results[2]);
printf ("Relative performance to __pool_alloc:\n");
printf ("__malloc_alloc %f %%\n", (results[0] / results[1])*100);
printf ("__mt_alloc %f %%\n", (results[0] / results[2])*100);
return 0;
}
Attachment:
smime.p7s
Description: S/MIME Cryptographic Signature
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |