This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug target/81501] New: Unneccessary calls to __tls_get_addr() in simple thread-singleton pattern


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81501

            Bug ID: 81501
           Summary: Unneccessary calls to __tls_get_addr() in simple
                    thread-singleton pattern
           Product: gcc
           Version: 7.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jak@jak-linux.org
  Target Milestone: ---

I only tested this on amd64, but see for yourself:

+ cat t.cc
struct foo {
    foo();
    ~foo();
};

foo *test() {
    static thread_local foo foo_tls;
    return &foo_tls;
}
+ g++-7 -std=c++14 -v -pthread -fPIC -shared -O2 -o gcc.so t.cc
Using built-in specs.
COLLECT_GCC=/usr/bin/g++-7
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/7/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 7.1.0-9'
--with-bugurl=file:///usr/share/doc/gcc-7/README.Bugs
--enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++ --prefix=/usr
--with-gcc-major-version-only --program-suffix=-7
--program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-vtable-verify --enable-libmpx --enable-plugin --enable-default-pie
--with-system-zlib --with-target-system-zlib --enable-objc-gc=auto
--enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64
--with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic
--enable-offload-targets=nvptx-none --without-cuda-driver
--enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu
--target=x86_64-linux-gnu
Thread model: posix
gcc version 7.1.0 (Debian 7.1.0-9) 
COLLECT_GCC_OPTIONS='-std=c++14' '-v' '-pthread' '-fPIC' '-shared' '-O2' '-o'
'gcc.so' '-shared-libgcc' '-mtune=generic' '-march=x86-64'
 /usr/lib/gcc/x86_64-linux-gnu/7/cc1plus -quiet -v -imultiarch x86_64-linux-gnu
-D_GNU_SOURCE -D_REENTRANT t.cc -quiet -dumpbase t.cc -mtune=generic
-march=x86-64 -auxbase t -O2 -std=c++14 -version -fPIC -o /tmp/ccdUrCDS.s
GNU C++14 (Debian 7.1.0-9) version 7.1.0 (x86_64-linux-gnu)
        compiled by GNU C version 7.1.0, GMP version 6.1.2, MPFR version 3.1.5,
MPC version 1.0.3, isl version isl-0.18-GMP

GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/7"
ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
ignoring nonexistent directory
"/usr/lib/gcc/x86_64-linux-gnu/7/../../../../x86_64-linux-gnu/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/include/c++/7
 /usr/include/x86_64-linux-gnu/c++/7
 /usr/include/c++/7/backward
 /usr/lib/gcc/x86_64-linux-gnu/7/include
 /usr/local/include
 /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed
 /usr/include/x86_64-linux-gnu
 /usr/include
End of search list.
GNU C++14 (Debian 7.1.0-9) version 7.1.0 (x86_64-linux-gnu)
        compiled by GNU C version 7.1.0, GMP version 6.1.2, MPFR version 3.1.5,
MPC version 1.0.3, isl version isl-0.18-GMP

GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
Compiler executable checksum: 3681302eda59faba4e53a905eca4bf72
COLLECT_GCC_OPTIONS='-std=c++14' '-v' '-pthread' '-fPIC' '-shared' '-O2' '-o'
'gcc.so' '-shared-libgcc' '-mtune=generic' '-march=x86-64'
 as -v --64 -o /tmp/ccI2B3TO.o /tmp/ccdUrCDS.s
GNU assembler version 2.28 (x86_64-linux-gnu) using BFD version (GNU Binutils
for Debian) 2.28
COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/7/:/usr/lib/gcc/x86_64-linux-gnu/7/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/7/:/usr/lib/gcc/x86_64-linux-gnu/
LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/7/:/usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/7/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/7/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-std=c++14' '-v' '-pthread' '-fPIC' '-shared' '-O2' '-o'
'gcc.so' '-shared-libgcc' '-mtune=generic' '-march=x86-64'
 /usr/lib/gcc/x86_64-linux-gnu/7/collect2 -plugin
/usr/lib/gcc/x86_64-linux-gnu/7/liblto_plugin.so
-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/7/lto-wrapper
-plugin-opt=-fresolution=/tmp/cc9S0zbL.res -plugin-opt=-pass-through=-lgcc_s
-plugin-opt=-pass-through=-lpthread -plugin-opt=-pass-through=-lc
-plugin-opt=-pass-through=-lgcc_s --sysroot=/ --build-id --eh-frame-hdr -m
elf_x86_64 --hash-style=gnu -shared -o gcc.so
/usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu/crti.o
/usr/lib/gcc/x86_64-linux-gnu/7/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/7
-L/usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu
-L/usr/lib/gcc/x86_64-linux-gnu/7/../../../../lib -L/lib/x86_64-linux-gnu
-L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib
-L/usr/lib/gcc/x86_64-linux-gnu/7/../../.. /tmp/ccI2B3TO.o -lstdc++ -lm -lgcc_s
-lpthread -lc -lgcc_s /usr/lib/gcc/x86_64-linux-gnu/7/crtendS.o
/usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu/crtn.o
COLLECT_GCC_OPTIONS='-std=c++14' '-v' '-pthread' '-fPIC' '-shared' '-O2' '-o'
'gcc.so' '-shared-libgcc' '-mtune=generic' '-march=x86-64'
+ gdb -q -ex disassemble test -ex quit gcc.so
Reading symbols from gcc.so...(no debugging symbols found)...done.
Dump of assembler code for function _Z4testv:
   0x00000000000007f0 <+0>:     push   %rbx
   0x00000000000007f1 <+1>:     sub    $0x10,%rsp
   0x00000000000007f5 <+5>:     lea    0x2007cc(%rip),%rdi        # 0x200fc8
   0x00000000000007fc <+12>:    callq  0x6e0 <__tls_get_addr@plt>
   0x0000000000000801 <+17>:    cmpb   $0x0,0x0(%rax)
   0x0000000000000808 <+24>:    jne    0x840 <_Z4testv+80>
   0x000000000000080a <+26>:    lea    0x8(%rax),%rbx
   0x0000000000000811 <+33>:    mov    %rax,0x8(%rsp)
   0x0000000000000816 <+38>:    mov    %rbx,%rdi
   0x0000000000000819 <+41>:    callq  0x6d0 <_ZN3fooC1Ev@plt>
   0x000000000000081e <+46>:    mov    0x8(%rsp),%rax
   0x0000000000000823 <+51>:    mov    0x2007b6(%rip),%rdi        # 0x200fe0
   0x000000000000082a <+58>:    lea    0x2007ff(%rip),%rdx        # 0x201030
   0x0000000000000831 <+65>:    mov    %rbx,%rsi
   0x0000000000000834 <+68>:    movb   $0x1,0x0(%rax)
   0x000000000000083b <+75>:    callq  0x6f0 <__cxa_thread_atexit@plt>
   0x0000000000000840 <+80>:    lea    0x200781(%rip),%rdi        # 0x200fc8
   0x0000000000000847 <+87>:    callq  0x6e0 <__tls_get_addr@plt>
   0x000000000000084c <+92>:    add    $0x10,%rsp
   0x0000000000000850 <+96>:    add    $0x8,%rax
   0x0000000000000856 <+102>:   pop    %rbx
   0x0000000000000857 <+103>:   retq   
End of assembler dump.


As you can see after the first call to __tls_get_addr() jne jumps to a second
call to __tls_get_addr(). It should really only need to get the address once
here, like clang does:

+ clang++ -std=c++14 -pthread -fPIC -shared -O2 -o clang.so t.cc
+ gdb -q -ex disassemble test -ex quit clang.so
Reading symbols from clang.so...(no debugging symbols found)...done.
Dump of assembler code for function _Z4testv:
   0x00000000000007a0 <+0>:     push   %r14
   0x00000000000007a2 <+2>:     push   %rbx
   0x00000000000007a3 <+3>:     push   %rax
   0x00000000000007a4 <+4>:     lea    0x20081d(%rip),%rdi        # 0x200fc8
   0x00000000000007ab <+11>:    callq  0x690 <__tls_get_addr@plt>
   0x00000000000007b0 <+16>:    mov    %rax,%rbx
   0x00000000000007b3 <+19>:    mov    0x1(%rax),%al
   0x00000000000007b9 <+25>:    and    $0x1,%al
   0x00000000000007bb <+27>:    jne    0x7ef <_Z4testv+79>
   0x00000000000007bd <+29>:    mov    %rbx,%rax
   0x00000000000007c0 <+32>:    lea    0x0(%rax),%r14
   0x00000000000007c7 <+39>:    mov    %r14,%rdi
   0x00000000000007ca <+42>:    callq  0x680 <_ZN3fooC1Ev@plt>
   0x00000000000007cf <+47>:    mov    0x20080a(%rip),%rdi        # 0x200fe0
   0x00000000000007d6 <+54>:    lea    0x200853(%rip),%rdx        # 0x201030
   0x00000000000007dd <+61>:    mov    %r14,%rsi
   0x00000000000007e0 <+64>:    callq  0x6a0 <__cxa_thread_atexit@plt>
   0x00000000000007e5 <+69>:    mov    %rbx,%rax
   0x00000000000007e8 <+72>:    movb   $0x1,0x1(%rax)
   0x00000000000007ef <+79>:    mov    %rbx,%rax
   0x00000000000007f2 <+82>:    lea    0x0(%rax),%rax
   0x00000000000007f9 <+89>:    add    $0x8,%rsp
   0x00000000000007fd <+93>:    pop    %rbx
   0x00000000000007fe <+94>:    pop    %r14
   0x0000000000000800 <+96>:    retq   
End of assembler dump.

This has some performance overhead which I'd like to avoid.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]