3 * Silicon Graphics Computer Systems, Inc.
5 * Permission to use, copy, modify, distribute and sell this software
6 * and its documentation for any purpose is hereby granted without fee,
7 * provided that the above copyright notice appear in all copies and
8 * that both that copyright notice and this permission notice appear
9 * in supporting documentation. Silicon Graphics makes no
10 * representations about the suitability of this software for any
11 * purpose. It is provided "as is" without express or implied warranty.
14 #ifndef __PTHREAD_ALLOC_H
15 #define __PTHREAD_ALLOC_H
17 // Pthread-specific node allocator.
18 // This is similar to the default allocator, except that free-list
19 // information is kept separately for each thread, avoiding locking.
20 // This should be reasonably fast even in the presence of threads.
21 // The down side is that storage may not be well-utilized.
22 // It is not an error to allocate memory in thread A and deallocate
23 // it n thread B. But this effectively transfers ownership of the memory,
24 // so that it can only be reallocated by thread B. Thus this can effectively
25 // result in a storage leak if it's done on a regular basis.
26 // It can also result in frequent sharing of
27 // cache lines among processors, with potentially serious performance
40 // Note that this class has nonstatic members. We instantiate it once
43 class __pthread_alloc_template
{
47 enum {MAX_BYTES
= 128}; // power of 2
48 enum {NFREELISTS
= MAX_BYTES
/ALIGN
};
51 union obj
* free_list_link
;
52 char client_data
[ALIGN
]; /* The client sees this. */
56 obj
* volatile free_list
[NFREELISTS
];
57 __pthread_alloc_template
<dummy
>* next
; // Free list link
59 static size_t ROUND_UP(size_t bytes
) {
60 return (((bytes
) + ALIGN
-1) & ~(ALIGN
- 1));
62 static size_t FREELIST_INDEX(size_t bytes
) {
63 return (((bytes
) + ALIGN
-1)/ALIGN
- 1);
66 // Returns an object of size n, and optionally adds to size n free list.
67 void *refill(size_t n
);
68 // Allocates a chunk for nobjs of size size. nobjs may be reduced
69 // if it is inconvenient to allocate the requested number.
70 static char *chunk_alloc(size_t size
, int &nobjs
);
72 // Chunk allocation state. And other shared state.
73 // Protected by chunk_allocator_lock.
74 static pthread_mutex_t chunk_allocator_lock
;
75 static char *start_free
;
76 static char *end_free
;
77 static size_t heap_size
;
78 static __pthread_alloc_template
<dummy
>* free_allocators
;
79 static pthread_key_t key
;
80 static bool key_initialized
;
81 // Pthread key under which allocator is stored.
82 // Allocator instances that are currently unclaimed by any thread.
83 static void destructor(void *instance
);
84 // Function to be called on thread exit to reclaim allocator
86 static __pthread_alloc_template
<dummy
> *new_allocator();
87 // Return a recycled or new allocator instance.
88 static __pthread_alloc_template
<dummy
> *get_allocator_instance();
89 // ensure that the current thread has an associated
90 // allocator instance.
93 lock () { pthread_mutex_lock(&chunk_allocator_lock
); }
94 ~lock () { pthread_mutex_unlock(&chunk_allocator_lock
); }
101 __pthread_alloc_template() : next(0)
103 memset((void *)free_list
, 0, NFREELISTS
* sizeof(obj
*));
107 static void * allocate(size_t n
)
109 obj
* volatile * my_free_list
;
110 obj
* __RESTRICT result
;
111 __pthread_alloc_template
<dummy
>* a
;
116 if (!key_initialized
||
117 !(a
= (__pthread_alloc_template
<dummy
>*)
118 pthread_getspecific(key
))) {
119 a
= get_allocator_instance();
121 my_free_list
= a
-> free_list
+ FREELIST_INDEX(n
);
122 result
= *my_free_list
;
124 void *r
= a
-> refill(ROUND_UP(n
));
127 *my_free_list
= result
-> free_list_link
;
132 static void deallocate(void *p
, size_t n
)
135 obj
* volatile * my_free_list
;
136 __pthread_alloc_template
<dummy
>* a
;
142 if (!key_initialized
||
143 !(a
= (__pthread_alloc_template
<dummy
>*)
144 pthread_getspecific(key
))) {
145 a
= get_allocator_instance();
147 my_free_list
= a
->free_list
+ FREELIST_INDEX(n
);
148 q
-> free_list_link
= *my_free_list
;
152 static void * reallocate(void *p
, size_t old_sz
, size_t new_sz
);
156 typedef __pthread_alloc_template
<false> pthread_alloc
;
159 template <bool dummy
>
160 void __pthread_alloc_template
<dummy
>::destructor(void * instance
)
162 __pthread_alloc_template
<dummy
>* a
=
163 (__pthread_alloc_template
<dummy
>*)instance
;
164 a
-> next
= free_allocators
;
168 template <bool dummy
>
169 __pthread_alloc_template
<dummy
>*
170 __pthread_alloc_template
<dummy
>::new_allocator()
172 if (0 != free_allocators
) {
173 __pthread_alloc_template
<dummy
>* result
= free_allocators
;
174 free_allocators
= free_allocators
-> next
;
177 return new __pthread_alloc_template
<dummy
>;
181 template <bool dummy
>
182 __pthread_alloc_template
<dummy
>*
183 __pthread_alloc_template
<dummy
>::get_allocator_instance()
185 __pthread_alloc_template
<dummy
>* result
;
186 if (!key_initialized
) {
189 if (!key_initialized
) {
190 if (pthread_key_create(&key
, destructor
)) {
193 key_initialized
= true;
196 result
= new_allocator();
197 if (pthread_setspecific(key
, result
)) abort();
201 /* We allocate memory in large chunks in order to avoid fragmenting */
202 /* the malloc heap too much. */
203 /* We assume that size is properly aligned. */
204 template <bool dummy
>
205 char *__pthread_alloc_template
<dummy
>
206 ::chunk_alloc(size_t size
, int &nobjs
)
213 lock lock_instance
; // Acquire lock for this routine
215 total_bytes
= size
* nobjs
;
216 bytes_left
= end_free
- start_free
;
217 if (bytes_left
>= total_bytes
) {
219 start_free
+= total_bytes
;
221 } else if (bytes_left
>= size
) {
222 nobjs
= bytes_left
/size
;
223 total_bytes
= size
* nobjs
;
225 start_free
+= total_bytes
;
228 size_t bytes_to_get
= 2 * total_bytes
+ ROUND_UP(heap_size
>> 4);
229 // Try to make use of the left-over piece.
230 if (bytes_left
> 0) {
231 __pthread_alloc_template
<dummy
>* a
=
232 (__pthread_alloc_template
<dummy
>*)pthread_getspecific(key
);
233 obj
* volatile * my_free_list
=
234 a
->free_list
+ FREELIST_INDEX(bytes_left
);
236 ((obj
*)start_free
) -> free_list_link
= *my_free_list
;
237 *my_free_list
= (obj
*)start_free
;
240 // Try to get memory that's aligned on something like a
241 // cache line boundary, so as to avoid parceling out
242 // parts of the same line to different threads and thus
243 // possibly different processors.
245 const int cache_line_size
= 128; // probable upper bound
246 bytes_to_get
&= ~(cache_line_size
-1);
247 start_free
= (char *)memalign(cache_line_size
, bytes_to_get
);
248 if (0 == start_free
) {
249 start_free
= (char *)malloc_alloc::allocate(bytes_to_get
);
252 # else /* !SGI_SOURCE */
253 start_free
= (char *)malloc_alloc::allocate(bytes_to_get
);
255 heap_size
+= bytes_to_get
;
256 end_free
= start_free
+ bytes_to_get
;
259 // lock is released here
260 return(chunk_alloc(size
, nobjs
));
264 /* Returns an object of size n, and optionally adds to size n free list.*/
265 /* We assume that n is properly aligned. */
266 /* We hold the allocation lock. */
267 template <bool dummy
>
268 void *__pthread_alloc_template
<dummy
>
272 char * chunk
= chunk_alloc(n
, nobjs
);
273 obj
* volatile * my_free_list
;
275 obj
* current_obj
, * next_obj
;
281 my_free_list
= free_list
+ FREELIST_INDEX(n
);
283 /* Build free list in chunk */
284 result
= (obj
*)chunk
;
285 *my_free_list
= next_obj
= (obj
*)(chunk
+ n
);
287 current_obj
= next_obj
;
288 next_obj
= (obj
*)((char *)next_obj
+ n
);
289 if (nobjs
- 1 == i
) {
290 current_obj
-> free_list_link
= 0;
293 current_obj
-> free_list_link
= next_obj
;
299 template <bool dummy
>
300 void *__pthread_alloc_template
<dummy
>
301 ::reallocate(void *p
, size_t old_sz
, size_t new_sz
)
306 if (old_sz
> MAX_BYTES
&& new_sz
> MAX_BYTES
) {
307 return(realloc(p
, new_sz
));
309 if (ROUND_UP(old_sz
) == ROUND_UP(new_sz
)) return(p
);
310 result
= allocate(new_sz
);
311 copy_sz
= new_sz
> old_sz
? old_sz
: new_sz
;
312 memcpy(result
, p
, copy_sz
);
313 deallocate(p
, old_sz
);
317 template <bool dummy
>
318 __pthread_alloc_template
<dummy
> *
319 __pthread_alloc_template
<dummy
>::free_allocators
= 0;
321 template <bool dummy
>
322 pthread_key_t __pthread_alloc_template
<dummy
>::key
;
324 template <bool dummy
>
325 bool __pthread_alloc_template
<dummy
>::key_initialized
= false;
327 template <bool dummy
>
328 pthread_mutex_t __pthread_alloc_template
<dummy
>::chunk_allocator_lock
329 = PTHREAD_MUTEX_INITIALIZER
;
331 template <bool dummy
>
332 char *__pthread_alloc_template
<dummy
>
335 template <bool dummy
>
336 char *__pthread_alloc_template
<dummy
>
339 template <bool dummy
>
340 size_t __pthread_alloc_template
<dummy
>
344 #endif /* __NODE_ALLOC_H */