struct A { typedef void ( A::* pmf )(); virtual ~A(); virtual void foo() = 0; }; void unoptimized_loop( A* a, A::pmf f ) { while ( 1 ) ( a->*f )(); } void optimized_loop( A* a, A::pmf f ) { typedef void (* pf)( A* ); pf __f = (pf)( a->*f ); while ( 1 ) __f( a ); } both loops do the same thing with different speed impact. $ g++ pmf_opt.cpp -Wall -c -O2 -Wno-pmf-conversions --save-temps -fverbose-asm _Z16unoptimized_loopP1AMS_FvvE: pushq %r12 # movq %rsi, %r12 # f, tmp68 andl $1, %r12d #, tmp68 pushq %rbp # leaq (%rdi,%rdx), %rbp #, tmp70 pushq %rbx # movq %rsi, %rbx # f, f subq $16, %rsp #, movq %rsi, (%rsp) # f, f movq %rdx, 8(%rsp) # f, f .L3: testq %r12, %r12 # tmp68 movq %rbx, %rax # f, f$__pfn je .L6 #, movq (%rbp), %rax #, tmp69 movq -1(%rax,%rbx), %rax #, f$__pfn .L6: movq %rbp, %rdi # tmp70, prephitmp.36 call *%rax # f$__pfn jmp .L3 _Z14optimized_loopP1AMS_FvvE: pushq %rbp # movq %rdi, %rbp # a, a pushq %rbx # movq %rsi, %rbx # f, f$__pfn subq $24, %rsp #, testb $1, %sil #, f$__pfn movq %rsi, 8(%rsp) # f, f movq %rdx, 16(%rsp) # f, f je .L14 #, movq (%rdi,%rdx), %rax #* f, tmp68 movq -1(%rax,%rsi), %rbx #, f$__pfn .L14: movq %rbp, %rdi # a, a call *%rbx # f$__pfn jmp .L14
-funswitch-loops fixes the loops. For the first function, we get: .L4: movl %ebx, (%esp) call *%esi jmp .L4 and: .L3: movl (%ebx), %eax movl %ebx, (%esp) call *-1(%eax,%edi) .p2align 4,,4 jmp .L3
So this is either fixed with -funswitch-loops and/or -O3 which enables -funswitch-loops. And this has been fixed since 3.4.0 which added -funswitch-loops.
(In reply to comment #1) > -funswitch-loops fixes the loops. i don't think it is fixed. imho this is only a partial fix. > For the first function, we get: > .L4: > movl %ebx, (%esp) > call *%esi > jmp .L4 > > and: > .L3: > movl (%ebx), %eax <=== [1] > movl %ebx, (%esp) > call *-1(%eax,%edi) <=== [1] > .p2align 4,,4 > jmp .L3 [1] is still less effective than optimized_loop. movl 12(%ebp), %eax # f, f movl 8(%ebp), %esi # a, a movl 16(%ebp), %edx # f, f testb $1, %al #, f$__pfn movl %eax, %ebx # f, f$__pfn je .L13 movl (%esi,%edx), %eax #* f$__delta, tmp65 movl -1(%eax,%ebx), %ebx #, f$__pfn .L13: movl %esi, (%esp) # a, call *%ebx # f$__pfn jmp .L13
i dont't think it's invalid report. -funswitch-loop produces only slightly better results.