This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug c++/80792] New: worse code generated compared to clang when using std::tuple
- From: "dvd at gnx dot it" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Tue, 16 May 2017 22:33:27 +0000
- Subject: [Bug c++/80792] New: worse code generated compared to clang when using std::tuple
- Auto-submitted: auto-generated
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80792
Bug ID: 80792
Summary: worse code generated compared to clang when using
std::tuple
Product: gcc
Version: 7.1.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
Assignee: unassigned at gcc dot gnu.org
Reporter: dvd at gnx dot it
Target Milestone: ---
This code:
#include <tuple>
// mimic std facility
template <size_t...T>
struct index_sequence {};
// inferior tuple, used to rule out std::tuple
struct tuple {
tuple(float v) : x{v} {}
float x;
};
template <typename... T>
auto make_tuple(T&&... args) {
return tuple(std::forward<T>(args)...);
}
template <typename T>
struct converter;
template <>
struct converter<float> {
static float convert(const char* t) {
return std::atof(t);
}
};
template <typename T>
auto execute_helper(char* value) {
return converter<T>::convert(value);
}
template <typename... Columns, size_t... Index>
auto construct_tuple(char** values, index_sequence<Index...>) {
return std::make_tuple(execute_helper<Columns>(values[Index])...);
// return make_tuple(execute_helper<Columns>(values[Index])...);
}
template <typename... Columns>
auto construct_tuple(char** values) {
return construct_tuple<Columns...>(values, index_sequence<0>());
}
template <typename... Columns>
auto executex(char** values) {
return construct_tuple<Columns...>(values);
}
auto g(char** v) {
return executex<float>(v);
}
produces the following assembler (gcc 7.1):
g(char**):
mov rax, rsi
push rbx
mov rbx, rdi
mov rdi, QWORD PTR [rax]
xor esi, esi
call strtod
pxor xmm1, xmm1
mov rax, rbx
cvtsd2ss xmm1, xmm0
movss DWORD PTR [rbx], xmm1
pop rbx
ret
while the clang output is shorter (clang trunk):
g(char**):
push rax
mov rdi, qword ptr [rdi]
xor esi, esi
call strtod
cvtsd2ss xmm0, xmm0
pop rax
ret
you can see it live on godbolt.org:
https://godbolt.org/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAKxAEZSBnVAV2OUxAHIBSAJgGY8AO2QAbZlgDU3fgGECzAA6jMM7NwAMAQU1aCmALbKAhvulyGeAF6YA%2BgQB0TgCprdDAsWbICk4VgAPWwZMAEdmTBFMaQB2ACFuGIARGQTtXX0jUVNomXkAT0VI4wNo1351bQ8vH0k0IQA3TGJ9YlTdd09vXwVlXPjdSUlelQgAM1FUU0kGgEpJEEkAxLiGxKTYhOTByQmp32X%2BNJ1to47tTJMzPIJC4tKnB0lyyq1jZiJJA2MAazsRzAQVy8ABsfBBj0kxmIwAY8xWO2ImAUxCEwyUKhWHnQIBAYxIAHdoeg8i8INDYbNHut2ultultENLtlrnI3FUurV6k0Ws08ntTGpNjsPKY8MhdpNptzmgQIPUPHUENCAFTDeEDRlDSRIlFo7G40yoMYQAizWlaIY0ukpM4MvSGK65OS3IpCEpldlvD6oSSYAKYZAfOwITCiIrEeXK4hqhrGcSYDXHIa61homW8tpyF64jNyuMJ812k4pe3MnLmApuj2Q2SocQGIQMUiSSw2eyQgCSQkCXvenwVnII9gxgOQ0ZVsfjEWbfh7/uCYQiUTy3cC1IqScRyLTrYIOJA3z%2BI76EH9geDtlD4b5cjrDabaggBZn3AArHE1/730kqU4i8mkgAPRAamqJfL8/yjmeAZBvoV5hhGeT3swjYME%2BL6YOhH5fssb6/o8AEdPSOgXI6LLOlW9yYLW9aoY%2BFS6P2vqDjUw4AlGqpTgmcLClqYHpqgTZDiemJ3nRaEbtgz7TlhLb%2BAuIThJE7B5BoT6zERNrnA6WQVjcdzug8TiSChaF9j6fqwcGAScTG3EzluWopju4Gsd0omqOJD4MFJMk8VpJY6cxkjAHZk4zE5lo6q5aLnnB35yAKBAYYF6ycLMpCiFwb6cKQQhcBoeWoFwsh8AkvBxK2LBsLkAi0HlBCFRlmU/CA/D8A4HXdT1vUgllXAACx5QVnBFaQJWcHlDAgBopBNWNGWkHAsBIGgRh4CoZAUPKqAbVtIDADE9BjJtrQzRAABGzWkOtpRCAQADyQiiPkN34EiPh4E0M2LeQD1hs1mWoIoBB4EJv0ALSPfwkiQ9iMhJMg5XlbQVjTTV7B0Jl2WcLl%2BU3ZNAQABwgpDIKDaFyASjEDi0JIEC4IQJDSPVLZ1vtzSs/wtCzI1QOte1nW9SL3X9bjw0E39k3TbN81A8tMCICge2KJtzTkJQ61qwdYjGEIwD2F4Qg/KQp2iOdlDXX9d2RE9L1vX9H2BmDP03cI%2Bi40VwOg%2BDTZcNDsPw/uiPI5VqPo4wmMcLzA14yNhNcCTZMU3U2QGwzXQm/MjP4EQxDc/Qpmq%2BrBd8PwvB8/Li2aaQbUdV1osi3HkujeNMuMHLC3ezjXC8An0tcPzNeZTylhCSAg1AA%3D%3D
If you comment the call to std::make_tuple and use my make_tuple both the
compiler produces the same output.
Moreover if you use the std::make_tuple but change the execute_helper to call
directly `std::atof`
template <typename T>
auto execute_helper(char* value) {
return std::atof(value);
}
The differences between the two compilers are even more greater
gcc output:
g(char**):
mov rax, rsi
push rbx
mov rbx, rdi
mov rdi, QWORD PTR [rax]
xor esi, esi
call strtod
mov rax, rbx
movsd QWORD PTR [rbx], xmm0
pop rbx
ret
clang output
g(char**): # @g(char**)
mov rdi, qword ptr [rdi]
xor esi, esi
jmp strtod # TAILCALL