This is the mail archive of the
libstdc++@sourceware.cygnus.com
mailing list for the libstdc++ project.
Re: black_count
- To: libstdc++@sourceware.cygnus.com
- Subject: Re: black_count
- From: Benjamin Kosnik <bkoz@cygnus.com>
- Date: Fri, 9 Jul 1999 13:57:14 -0700 (PDT)
I'm in favor of this patch, and will check it in, with the added argument,
later today unless I hear strong objections.
-Benjamin
On Tue, 6 Jul 1999, Nick Rasmussen wrote:
> >
> > Nick, can you please elaborate a bit more on your patch? Codegen
> > snippets of this with and without the patch, with and without the arg
> > would be welcome. ..
> >
> > thanks,
> > benjamin
>
> Here is the code generated in the three cases (without the patch, with
> the patch using a default argument, with the patch using two functions).
>
> I also put the patch and some source files showing the three cases at
> http://jive.org/~nick/black_count.tar.gz
>
> The output here and on the website is from egcs-1.1.2 (my gcc-2.95 testing
> box is at home). Results with gcc-2.95 (from CVS two nights ago) were
> similar, probably the same.
>
> The real solution, of course, would be to improve the optimizer so that
> all three cases produce the same code, but given my scanty knowledge
> of the egcs codebase, it's a bit beyond my abilities.
>
> -nick
>
> -------------------------------------------------
>
> Without the patch generates:
>
> -- caller function stuff deleted --
> testl %edx,%edx
> jne .L17
> xorl %eax,%eax
> jmp .L18
> .p2align 4,,7
> .L17:
> cmpb $1,(%edx)
> sete %al
> movzbl %al,%ebx
> cmpl %ecx,%edx
> jne .L20
> movl %ebx,%eax
> jmp .L18
> .p2align 4,,7
> .L20:
> pushl %ecx
> pushl 4(%edx)
> call __black_count__FP18_Rb_tree_node_baseT0
> addl %ebx,%eax
> -- caller function stuff deleted --
>
> for the call, and
>
> section .gnu.linkonce.t.__black_count__FP18_Rb_tree_node_baseT0,"ax",@progbits
> .align 4
> .weak __black_count__FP18_Rb_tree_node_baseT0
> .type __black_count__FP18_Rb_tree_node_baseT0,@function
> __black_count__FP18_Rb_tree_node_baseT0:
> pushl %ebp
> movl %esp,%ebp
> pushl %ebx
> movl 8(%ebp),%edx
> movl 12(%ebp),%ecx
> testl %edx,%edx
> jne .L12
> xorl %eax,%eax
> jmp .L11
> .p2align 4,,7
> .L12:
> cmpb $1,(%edx)
> sete %al
> movzbl %al,%ebx
> cmpl %ecx,%edx
> je .L14
> pushl %ecx
> pushl 4(%edx)
> call __black_count__FP18_Rb_tree_node_baseT0
> addl %ebx,%eax
> jmp .L23
> .p2align 4,,7
> .L14:
> movl %ebx,%eax
> .L23:
> .L11:
> movl -4(%ebp),%ebx
> leave
> ret
>
> for __black_count
>
> -------------------------------------------------
>
> With the patch, using the default argument generates:
>
> -- caller function stuff deleted --
> movl 8(%ebp),%edx
> xorl %eax,%eax
> .L19:
> testl %edx,%edx
> je .L21
> cmpb $1,(%edx)
> jne .L23
> incl %eax
> .L23:
> cmpl %ecx,%edx
> je .L21
> movl 4(%edx),%edx
> jmp .L19
> .p2align 4,,7
> .L21:
> -- caller function stuff deleted --
>
> -------------------------------------------------
>
> With the idea in the patch, using two functions rather than the default
> argument generates:
>
> -- caller function stuff deleted --
> testl %edx,%edx
> jne .L31
> xorl %eax,%eax
> jmp .L32
> .p2align 4,,7
> .L31:
> cmpb $1,(%edx)
> sete %al
> andl $255,%eax
> .L43:
> cmpl %ecx,%edx
> je .L32
> movl 4(%edx),%edx
> testl %edx,%edx
> je .L32
> cmpb $1,(%edx)
> jne .L43
> incl %eax
> jmp .L43
> .p2align 4,,7
> .L32:
> -- caller function stuff deleted --
>