This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC PATCH] Optimized __cxa_guard_{acquire,release,abort} for Linux


On Dec 31, 2007 2:39 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> While backporting the guard deadlock fix to 4.1, I have noticed that
> on Linux it IMHO uses global mutex (and conditional variable) completely
> unnecessarily, which hurts parallelism if multiple threads initialize
> different static local vars which need guards.
> As shown below, all we IMHO need is futex syscall (just
> FUTEX_WAIT/FUTEX_WAKE, so any Linux kernel with futex syscall will do)
> and supported __sync_val_compare_and_swap.
>
> __guard is 64-bit var, where we only use first byte (0 uninitialized,
> 1 initialized) and second byte (0 init not pending, 1 pending) if __GTHREAD
> (well, on arm eabi first byte is actually 4th byte if big endian).
> With atomic builtins we can update both these flags together atomically.  The patch
> below adds (as pure optimization) another bit to the second byte, to avoid
> any syscalls if no other thread calls __cxa_guard_acquire before the first
> thread that called it for a particular guard var calls __cxa_guard_release.
> We then have 4 valid states.  __cxa_guard_acquire can atomically transition
> from 0 to pending (in this case it won the race and the calling thread
> will do the initialization) or from pending to pending|waiting (in this
> case it will FUTEX_WAIT until guard is set, or it is aborted back to 0).
> __cxa_guard_release will atomically transition from pending to guard
> or pending|waiting to guard (in the latter case FUTEX_WAIT is used to wake all
> waiters).  __cxa_guard_abort will transition pending to 0 or pending|waiting
> to guard (in the latter case FUTEX_WAIT is used to wake all waiters).
>
> On x86_64-linux, if _GLIBCXX_HAVE_FUTEX is defined .text section decreases
> by 1168 bytes and .bss by 160 bytes, so it also decreases libstdc++.so or
> libsupc++.a footprint.
>
> The patch below doesn't include the configury bits yet (they might be very
> similar to enable_linux_futex stuff in libgomp configury).
> Would this be acceptable with the configury bits in even for 4.3 (could be
> e.g. only enabled if explicitly requested with --enable-linux-futex there),
> or just 4.4+?

This would also "fix" PR33960, right?  So at least for me it would be
worthwhile to
do for 4.3 ;)

Thanks,
Richard.

> 2007-12-31  Jakub Jelinek  <jakub@redhat.com>
>
>         * config/cpu/generic/cxxabi_tweaks.h (_GLIBCXX_GUARD_BIT,
>         _GLIBCXX_GUARD_PENDING_BIT, _GLIBCXX_GUARD_WAITING_BIT): Define.
>         * config/cpu/arm/cxxabi_tweaks.h (_GLIBCXX_GUARD_BIT,
>         _GLIBCXX_GUARD_PENDING_BIT, _GLIBCXX_GUARD_WAITING_BIT): Define.
>         * libsupc++/guard.cc: Include climits and syscall.h.
>         (_GLIBCXX_USE_FUTEX): Define if futex syscall and atomic builtins
>         are supported.
>         (_GLIBCXX_FUTEX_WAIT, _GLIBCXX_FUTEX_WAKE): Likewise.
>         (__guard_test_bit): New static inline.
>         (__cxa_guard_acquire, __cxa_guard_release, __cxa_guard_abort): Use
>         atomic builtins and futex syscall if _GLIBCXX_USE_FUTEX.
>
> --- libstdc++-v3/config/cpu/arm/cxxabi_tweaks.h.jj      2006-12-08 15:58:16.000000000 +0100
> +++ libstdc++-v3/config/cpu/arm/cxxabi_tweaks.h 2007-12-31 11:45:37.000000000 +0100
> @@ -1,6 +1,6 @@
>  // Control various target specific ABI tweaks.  ARM version.
>
> -// Copyright (C) 2004, 2006 Free Software Foundation, Inc.
> +// Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
>  //
>  // This file is part of the GNU ISO C++ Library.  This library is free
>  // software; you can redistribute it and/or modify it under the
> @@ -46,6 +46,9 @@ namespace __cxxabiv1
>    // guard variable.  */
>  #define _GLIBCXX_GUARD_TEST(x) ((*(x) & 1) != 0)
>  #define _GLIBCXX_GUARD_SET(x) *(x) = 1
> +#define _GLIBCXX_GUARD_BIT 1
> +#define _GLIBCXX_GUARD_PENDING_BIT __guard_test_bit (1, 1)
> +#define _GLIBCXX_GUARD_WAITING_BIT __guard_test_bit (1, 2)
>    typedef int __guard;
>
>    // We also want the element size in array cookies.
> @@ -62,6 +65,9 @@ namespace __cxxabiv1
>    // The generic ABI uses the first byte of a 64-bit guard variable.
>  #define _GLIBCXX_GUARD_TEST(x) (*(char *) (x) != 0)
>  #define _GLIBCXX_GUARD_SET(x) *(char *) (x) = 1
> +#define _GLIBCXX_GUARD_BIT __guard_test_bit (0, 1)
> +#define _GLIBCXX_GUARD_PENDING_BIT __guard_test_bit (1, 1)
> +#define _GLIBCXX_GUARD_WAITING_BIT __guard_test_bit (1, 2)
>    __extension__ typedef int __guard __attribute__((mode (__DI__)));
>
>    // __cxa_vec_ctor has void return type.
> --- libstdc++-v3/config/cpu/generic/cxxabi_tweaks.h.jj  2006-12-08 15:58:16.000000000 +0100
> +++ libstdc++-v3/config/cpu/generic/cxxabi_tweaks.h     2007-12-31 11:45:44.000000000 +0100
> @@ -1,6 +1,6 @@
>  // Control various target specific ABI tweaks.  Generic version.
>
> -// Copyright (C) 2004, 2006 Free Software Foundation, Inc.
> +// Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
>  //
>  // This file is part of the GNU ISO C++ Library.  This library is free
>  // software; you can redistribute it and/or modify it under the
> @@ -44,6 +44,9 @@ namespace __cxxabiv1
>    // The generic ABI uses the first byte of a 64-bit guard variable.
>  #define _GLIBCXX_GUARD_TEST(x) (*(char *) (x) != 0)
>  #define _GLIBCXX_GUARD_SET(x) *(char *) (x) = 1
> +#define _GLIBCXX_GUARD_BIT __guard_test_bit (0, 1)
> +#define _GLIBCXX_GUARD_PENDING_BIT __guard_test_bit (1, 1)
> +#define _GLIBCXX_GUARD_WAITING_BIT __guard_test_bit (1, 2)
>    __extension__ typedef int __guard __attribute__((mode (__DI__)));
>
>    // __cxa_vec_ctor has void return type.
> --- libstdc++-v3/libsupc++/guard.cc.jj  2007-10-11 10:54:25.000000000 +0200
> +++ libstdc++-v3/libsupc++/guard.cc     2007-12-31 12:54:24.000000000 +0100
> @@ -35,12 +35,21 @@
>  #include <new>
>  #include <ext/atomicity.h>
>  #include <ext/concurrence.h>
> +#if defined(__GTHREADS) && defined(__GTHREAD_HAS_COND) \
> +    && defined(_GLIBCXX_ATOMIC_BUILTINS) && defined(_GLIBCXX_HAVE_FUTEX)
> +# include <climits>
> +# include <syscall.h>
> +# define _GLIBCXX_USE_FUTEX
> +# define _GLIBCXX_FUTEX_WAIT 0
> +# define _GLIBCXX_FUTEX_WAKE 1
> +#endif
>
>  // The IA64/generic ABI uses the first byte of the guard variable.
>  // The ARM EABI uses the least significant bit.
>
>  // Thread-safe static local initialization support.
>  #ifdef __GTHREADS
> +# ifndef _GLIBCXX_USE_FUTEX
>  namespace
>  {
>    // A single mutex controlling all static initializations.
> @@ -75,8 +84,9 @@ namespace
>      }
>    };
>  }
> +# endif
>
> -#ifdef __GTHREAD_HAS_COND
> +# if defined(__GTHREAD_HAS_COND) && !defined(_GLIBCXX_USE_FUTEX)
>  namespace
>  {
>    // A single conditional variable controlling all static initializations.
> @@ -98,9 +108,9 @@ namespace
>      return *static_cond;
>    }
>  }
> -#endif
> +# endif
>
> -#ifndef _GLIBCXX_GUARD_TEST_AND_ACQUIRE
> +# ifndef _GLIBCXX_GUARD_TEST_AND_ACQUIRE
>  inline bool
>  __test_and_acquire (__cxxabiv1::__guard *g)
>  {
> @@ -108,24 +118,24 @@ __test_and_acquire (__cxxabiv1::__guard
>    _GLIBCXX_READ_MEM_BARRIER;
>    return b;
>  }
> -#define _GLIBCXX_GUARD_TEST_AND_ACQUIRE(G) __test_and_acquire (G)
> -#endif
> +#  define _GLIBCXX_GUARD_TEST_AND_ACQUIRE(G) __test_and_acquire (G)
> +# endif
>
> -#ifndef _GLIBCXX_GUARD_SET_AND_RELEASE
> +# ifndef _GLIBCXX_GUARD_SET_AND_RELEASE
>  inline void
>  __set_and_release (__cxxabiv1::__guard *g)
>  {
>    _GLIBCXX_WRITE_MEM_BARRIER;
>    _GLIBCXX_GUARD_SET (g);
>  }
> -#define _GLIBCXX_GUARD_SET_AND_RELEASE(G) __set_and_release (G)
> -#endif
> +#  define _GLIBCXX_GUARD_SET_AND_RELEASE(G) __set_and_release (G)
> +# endif
>
>  #else /* !__GTHREADS */
>
> -#undef _GLIBCXX_GUARD_TEST_AND_ACQUIRE
> -#undef _GLIBCXX_GUARD_SET_AND_RELEASE
> -#define _GLIBCXX_GUARD_SET_AND_RELEASE(G) _GLIBCXX_GUARD_SET (G)
> +# undef _GLIBCXX_GUARD_TEST_AND_ACQUIRE
> +# undef _GLIBCXX_GUARD_SET_AND_RELEASE
> +# define _GLIBCXX_GUARD_SET_AND_RELEASE(G) _GLIBCXX_GUARD_SET (G)
>
>  #endif /* __GTHREADS */
>
> @@ -176,8 +186,35 @@ namespace __gnu_cxx
>  // headers define a symbol __GTHREAD_HAS_COND for platforms that support POSIX
>  // like conditional variables. For platforms that do not support conditional
>  // variables, we need to fall back to the old code.
> +
> +// If _GLIBCXX_USE_FUTEX, no global mutex or conditional variable is used,
> +// only atomic operations are used together with futex syscall.
> +// Valid values of the first integer in guard are:
> +// 0                             No thread encountered the guarded init
> +//                               yet or it has been aborted.
> +// _GLIBCXX_GUARD_BIT            The guarded static var has been successfully
> +//                               initialized.
> +// _GLIBCXX_GUARD_PENDING_BIT    The guarded static var is being initialized
> +//                               and no other thread is waiting for its
> +//                               initialization.
> +// (_GLIBCXX_GUARD_PENDING_BIT    The guarded static var is being initialized
> +//  | _GLIBCXX_GUARD_WAITING_BIT) and some other threads are waiting until
> +//                               it is initialized.
> +
>  namespace __cxxabiv1
>  {
> +#ifdef _GLIBCXX_USE_FUTEX
> +  namespace
> +  {
> +    static inline int __guard_test_bit (const int __byte, const int __val)
> +    {
> +      union { int __i; char __c[sizeof (int)]; } __u = { 0 };
> +      __u.__c[__byte] = __val;
> +      return __u.__i;
> +    }
> +  }
> +#endif
> +
>    static inline int
>    init_in_progress_flag(__guard* g)
>    { return ((char *)g)[1]; }
> @@ -207,7 +244,7 @@ namespace __cxxabiv1
>        return 0;
>
>      if (init_in_progress_flag(g))
> -       throw_recursive_init_exception();
> +      throw_recursive_init_exception();
>
>      set_init_in_progress_flag(g, 1);
>      return 1;
> @@ -223,14 +260,46 @@ namespace __cxxabiv1
>      if (_GLIBCXX_GUARD_TEST_AND_ACQUIRE (g))
>        return 0;
>
> +# ifdef _GLIBCXX_USE_FUTEX
> +    // If __sync_* and futex syscall are supported, don't use any global
> +    // mutex.
> +    if (__gthread_active_p ())
> +      {
> +       int *gi = (int *) (void *) g;
> +       const int guard_bit = _GLIBCXX_GUARD_BIT;
> +       const int pending_bit = _GLIBCXX_GUARD_PENDING_BIT;
> +       const int waiting_bit = _GLIBCXX_GUARD_WAITING_BIT;
> +
> +       while (1)
> +         {
> +           int old = __sync_val_compare_and_swap (gi, 0, pending_bit);
> +           if (old == 0)
> +             return 1; // This thread should do the initialization.
> +
> +           if (old == guard_bit)
> +             return 0; // Already initialized.
> +
> +           if (old == pending_bit)
> +             {
> +               int newv = old | waiting_bit;
> +               if (__sync_val_compare_and_swap (gi, old, newv) != old)
> +                 continue;
> +
> +               old = newv;
> +             }
> +
> +           syscall (SYS_futex, gi, _GLIBCXX_FUTEX_WAIT, old, 0);
> +         }
> +      }
> +# else
>      if (__gthread_active_p ())
>        {
>         mutex_wrapper mw;
>
>         while (1)       // When this loop is executing, mutex is locked.
>           {
> -#ifdef __GTHREAD_HAS_COND
> -           // The static is allready initialized.
> +#  ifdef __GTHREAD_HAS_COND
> +           // The static is already initialized.
>             if (_GLIBCXX_GUARD_TEST(g))
>               return 0; // The mutex will be unlocked via wrapper
>
> @@ -247,7 +316,7 @@ namespace __cxxabiv1
>                 set_init_in_progress_flag(g, 1);
>                 return 1; // The mutex will be unlocked via wrapper.
>               }
> -#else
> +#  else
>             // This provides compatibility with older systems not supporting
>             // POSIX like conditional variables.
>             if (acquire(g))
> @@ -256,9 +325,10 @@ namespace __cxxabiv1
>                 return 1; // The mutex still locked.
>               }
>             return 0; // The mutex will be unlocked via wrapper.
> -#endif
> +#  endif
>           }
>        }
> +# endif
>  #endif
>
>      return acquire (g);
> @@ -267,7 +337,35 @@ namespace __cxxabiv1
>    extern "C"
>    void __cxa_guard_abort (__guard *g)
>    {
> -#ifdef __GTHREAD_HAS_COND
> +#ifdef _GLIBCXX_USE_FUTEX
> +    // If __sync_* and futex syscall are supported, don't use any global
> +    // mutex.
> +    if (__gthread_active_p ())
> +      {
> +       int *gi = (int *) (void *) g;
> +       const int pending_bit = _GLIBCXX_GUARD_PENDING_BIT;
> +       const int waiting_bit = _GLIBCXX_GUARD_WAITING_BIT;
> +       int old;
> +
> +       old = __sync_val_compare_and_swap (gi, pending_bit, 0);
> +       if (old == (pending_bit | waiting_bit))
> +         {
> +           while (1)
> +             {
> +               int newv = old & ~(pending_bit | waiting_bit);
> +               int oldv = __sync_val_compare_and_swap (gi, old, newv);
> +
> +               if (oldv == old)
> +                 break;
> +               if ((oldv & pending_bit) == 0)
> +                 break;
> +               old = oldv;
> +             }
> +           syscall (SYS_futex, gi, _GLIBCXX_FUTEX_WAKE, INT_MAX);
> +         }
> +       return;
> +      }
> +#elif defined(__GTHREAD_HAS_COND)
>      if (__gthread_active_p())
>        {
>         mutex_wrapper mw;
> @@ -293,7 +391,33 @@ namespace __cxxabiv1
>    extern "C"
>    void __cxa_guard_release (__guard *g)
>    {
> -#ifdef __GTHREAD_HAS_COND
> +#ifdef _GLIBCXX_USE_FUTEX
> +    // If __sync_* and futex syscall are supported, don't use any global
> +    // mutex.
> +    if (__gthread_active_p ())
> +      {
> +       int *gi = (int *) (void *) g;
> +       const int guard_bit = _GLIBCXX_GUARD_BIT;
> +       const int pending_bit = _GLIBCXX_GUARD_PENDING_BIT;
> +       const int waiting_bit = _GLIBCXX_GUARD_WAITING_BIT;
> +       int old;
> +
> +       old = __sync_val_compare_and_swap (gi, pending_bit, guard_bit);
> +       if (old == (pending_bit | waiting_bit))
> +         {
> +           while (1)
> +             {
> +               int oldv = __sync_val_compare_and_swap (gi, old, guard_bit);
> +
> +               if (oldv == old || oldv == guard_bit)
> +                 break;
> +               old = oldv;
> +             }
> +           syscall (SYS_futex, gi, _GLIBCXX_FUTEX_WAKE, INT_MAX);
> +         }
> +       return;
> +      }
> +#elif defined(__GTHREAD_HAS_COND)
>      if (__gthread_active_p())
>        {
>         mutex_wrapper mw;
>
>         Jakub
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]