This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Need advice: x86 redudant compare to zero
- From: Steven Bosscher <stevenb at suse dot de>
- To: gcc at gcc dot gnu dot org
- Cc: Evan Cheng <evan dot cheng at apple dot com>
- Date: Fri, 14 Oct 2005 01:53:46 +0200
- Subject: Re: Need advice: x86 redudant compare to zero
- References: <ED2304D2-7D79-48B2-9B2F-CE06CFA18CB6@apple.com>
On Friday 14 October 2005 01:41, Evan Cheng wrote:
#(insn:TI 126 125 40 (parallel [
# (set (reg:SI 0 ax [71])
# (minus:SI (reg:SI 0 ax [71])
# (reg:SI 5 di)))
# (clobber (reg:CC 17 flags))
# ]) 242 {*subsi_1} (insn_list:REG_DEP_TRUE 125 (insn_list:REG_DEP_TRUE 37 (nil)))
# (expr_list:REG_DEAD (reg:SI 5 di)
# (expr_list:REG_UNUSED (reg:CC 17 flags)
# (nil))))
subl %edi, %eax # 126 *subsi_1/1 [length = 2]
#(insn:TI 40 126 41 (set (reg:CC 17 flags)
# (compare:CC (reg:SI 0 ax [71])
# (const_int 0 [0x0]))) 5 {*cmpsi_1_insn} (insn_list:REG_DEP_TRUE 126 (nil))
# (expr_list:REG_DEAD (reg:SI 0 ax [71])
# (nil)))
cmpl $0, %eax # 40 *cmpsi_1_insn/1 [length = 3]
>
> .text
> globl _foo
> _foo:
> pushl %ebp
> movl %esp, %ebp
> pushl %edi
> pushl %esi
> subl $12, %esp
> movl 8(%ebp), %edx
> cmpl 12(%ebp), %edx
> jg L10
> movl 16(%ebp), %eax
> movl 20(%ebp), %ecx
> leal (%ecx,%eax,4), %eax
> movl %eax, -16(%ebp)
> jmp L4
> L12:
> movl -16(%ebp), %ecx
> movl (%ecx), %eax
> movl %eax, (%esi)
> movl -20(%ebp), %edi
> movl %edi, (%ecx)
> addl $4, %ecx
> movl %ecx, -16(%ebp)
> addl $1, %edx
> cmpl %edx, 12(%ebp)
> jl L10
> L13:
> movl 20(%ebp), %ecx
> L4:
> leal (%ecx,%edx,4), %esi
> movl (%esi), %edi
> movl %edi, -20(%ebp)
> movl 24(%ebp), %eax
> addl %edi, %eax
> movl 32(%ebp), %edi
> movsbl (%eax,%edi),%eax
> subl 28(%ebp), %eax
> cmpl $0, %eax <---- extra compare...
> je L12
> jg L10
> addl $1, %edx
> cmpl %edx, 12(%ebp)
> jge L13
> L10:
> addl $12, %esp
> popl %esi
> popl %edi
> popl %ebp
> ret
>
> The cmpl is not needed because subl has already set the flags.
>
> My question is: where and how would you suggest we do this
> optimization. With peephole2? Or in combine? In i386.md, I see
> pattern *subsi_2 looks like what I'd like to combine these two insn
> into:
>
> (define_insn "*subsi_2"
> [(set (reg FLAGS_REG)
> (compare
> (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
> (match_operand:SI 2 "general_operand" "ri,rm"))
> (const_int 0)))
> (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
> (minus:SI (match_dup 1) (match_dup 2)))]
> "ix86_match_ccmode (insn, CCGOCmode)
> && ix86_binary_operator_ok (MINUS, SImode, operands)"
> "sub{l}\t{%2, %0|%0, %2}"
> [(set_attr "type" "alu")
> (set_attr "mode" "SI")])
>
> But I do not see a peephole2 that would generate this insn. Does
> anyone know how this pattern is used?
>
> Suggestions are appreciated!
>
> Thanks,
>
> Evan Cheng
> Apple Computers, Inc.