[Bug target/79593] [6/7 Regression] Poor/Worse code generation for FPU on versions after 6

Tue Feb 21 11:04:00 GMT 2017

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79593

--- Comment #7 from Uroš Bizjak <ubizjak at gmail dot com> ---
(In reply to Jakub Jelinek from comment #2)
> That said, the reason why there is fld1 followed by fld %st(0) is that 1.0
> is used multiple times:

This one can be solved with following patch:

--cut here--

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index cfbe0b0..23f2ea0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3660,7 +3660,7 @@

 (define_split
   [(set (match_operand 0 "any_fp_register_operand")
-       (match_operand 1 "memory_operand"))]
+       (match_operand 1 "nonimmediate_operand"))]
   "reload_completed
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
@@ -3672,7 +3672,7 @@

 (define_split
   [(set (match_operand 0 "any_fp_register_operand")
-       (float_extend (match_operand 1 "memory_operand")))]
+       (float_extend (match_operand 1 "nonimmediate_operand")))]
   "reload_completed
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
--cut here--

This patch allows conversion from (reg->reg) move to (const->reg) when
appropriate constant can be determined from REG_EQUIV/REG_EQUAL note. It also
handles float_extend cases. Patched gcc now generates:

        subl    $12, %esp
        fldz
        movl    16(%esp), %eax
        movl    20(%esp), %edx
        cmpl    %edx, (%eax)
        jbe     .L1
        flds    global_data
        movl    4(%eax), %eax
        fld     %st(0)
        flds    global_data+4
        fxch    %st(3)
        fcomip  %st(2), %st
        fstp    %st(1)
        ja      .L12
        movl    %eax, (%esp)
        flds    (%esp)
        fsubp   %st, %st(2)
.L5:
        fdivrp  %st, %st(1)
        fldz
        fxch    %st(1)
        fcomi   %st(1), %st
        fcmovb  %st(1), %st
        fstp    %st(1)
        fld1
        fcomip  %st(1), %st
        jnb     .L6
        fstp    %st(0)
        fld1
.L6:
.L1:
        addl    $12, %esp
        ret
        .p2align 4,,10
        .p2align 3
.L12:
        movl    %eax, (%esp)
        movl    $0, 4(%esp)
        fildq   (%esp)
        fsubrp  %st, %st(2)
        jmp     .L5