Hi.
I'm working on a lib that uses policy templates quite extensively, in the vein
of the STL's template <class Compare> std::list::sort(Compare comp).
Compare can be a function or a struct with operator(). While the functionality
is equivalent the compiler inlines the struct but not the function. Changing
to the struct implementation decreased runtime by more than 40% in an image
filtering function.
Below you can find a tiny example. The commented code gives the alternate
implementation which is perfectly inlined. See attachments for assembler
code.
Can somebody explain why lessFunc is not inlined?
/usr/i686-pc-linux-gnu/gcc-bin/4.0.1-beta20050507/gcc --version
gcc (GCC) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507)
flags: -mtune=pentium4 -O3 -fomit-frame-pointer -S
gcc3.3.3 (SuSE9.1), gcc3.3.5(gentoo), gcc.3.4.3(Gentoo 3.4.3.20050110-r2)
perform much worse in both cases. Good point for gcc4.
icc8.0 produces roughly the same result as gcc4.0.1. Also very good.
I think the gcc4.x of gentoo is very close to or the same as the official
snapshot.
Thanks for hints,
Peter
--------------------------------------------------------------------------------------------
// struct less {
// inline bool operator()(const int a, const int b) {
// return a<b;
// }
// };
inline bool lessFunc(const int a, const int b) {
return a<b;
}
template <class Comp>
bool foo(const int a, const int b, Comp comp) {
return comp(a,b);
}
int main(int argc, char** argv) {
return foo(argc,1,lessFunc);
// return foo(argc,3,less());
}
------------------------------------------------------------------------
// struct less {
// inline bool operator()(const int a, const int b) {
// return a<b;
// }
// };
inline bool lessFunc(const int a, const int b) {
return a<b;
}
template <class Comp>
bool foo(const int a, const int b, Comp comp) {
return comp(a,b);
}
int main(int argc, char** argv) {
return foo(argc,1,lessFunc);
// return foo(argc,3,less());
}
------------------------------------------------------------------------
.file "testTemplateInline.cpp"
.section .gnu.linkonce.t._Z8lessFuncii,"ax",@progbits
.align 2
.weak _Z8lessFuncii
.type _Z8lessFuncii, @function
_Z8lessFuncii:
.LFB2:
movl 8(%esp), %eax
cmpl %eax, 4(%esp)
setl %al
movzbl %al, %eax
ret
.LFE2:
.size _Z8lessFuncii, .-_Z8lessFuncii
.text
.align 2
.globl main
.type main, @function
main:
.LFB4:
pushl %ebp
.LCFI0:
movl %esp, %ebp
.LCFI1:
subl $8, %esp
.LCFI2:
andl $-16, %esp
subl $16, %esp
movl $1, 4(%esp)
movl 8(%ebp), %eax
movl %eax, (%esp)
call _Z8lessFuncii
movzbl %al, %eax
leave
ret
.LFE4:
.size main, .-main
.ident "GCC: (GNU) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507)"
.section .note.GNU-stack,"",@progbits
------------------------------------------------------------------------
.file "testTemplateInline.cpp"
.text
.align 2
.globl main
.type main, @function
main:
.LFB4:
pushl %ebp
.LCFI0:
movl %esp, %ebp
.LCFI1:
subl $8, %esp
.LCFI2:
andl $-16, %esp
subl $16, %esp
cmpl $2, 8(%ebp)
setle %al
andl $1, %eax
leave
ret
.LFE4:
.size main, .-main
.ident "GCC: (GNU) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507)"
.section .note.GNU-stack,"",@progbits