This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug target/70007] [4.9/5/6 Regression] wrong code with -mbmi2


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70007

--- Comment #1 from UroÅ Bizjak <ubizjak at gmail dot com> ---
PRE pass is moving (insn 7) out of the loop. Before the transformation, we
have:

(code_label 84 2 5 3 2 "" [1 uses])
(note 5 84 6 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
(insn 6 5 7 3 (set (reg:DI 117 [ v32u64_1+24 ])
        (mem/j/c:DI (plus:DI (reg/f:DI 16 argp)
                (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])) pr70007.c:10 85
{*movdi_internal}
     (nil))
(insn 7 6 8 3 (parallel [
            (set (reg:DI 88 [ _4 ])
                (rotatert:DI (reg:DI 117 [ v32u64_1+24 ])
                    (const_int 19 [0x13])))
            (clobber (reg:CC 17 flags))
        ]) pr70007.c:10 607 {*rotrdi3_1}
     (expr_list:REG_DEAD (reg:DI 117 [ v32u64_1+24 ])
        (expr_list:REG_UNUSED (reg:CC 17 flags)
            (expr_list:REG_EQUAL (rotatert:DI (mem/j/c:DI (plus:DI (reg/f:DI 16
argp)
                            (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
                    (const_int 19 [0x13]))
                (nil)))))

But PRE pass moves this insn out of the loop, although we have update of
[argp+88] memory location inside the loop:

(note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(note 2 3 113 2 NOTE_INSN_FUNCTION_BEG)
(insn 113 2 116 2 (set (reg:DI 183 [ v32u64_1+24 ])
        (mem/j/c:DI (plus:DI (reg/f:DI 16 argp)
                (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])) -1
     (nil))
(insn 116 113 117 2 (set (reg:DI 178 [ _4 ])
        (rotatert:DI (mem/j/c:DI (plus:DI (reg/f:DI 16 argp)
                    (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
            (const_int 19 [0x13]))) 603 {*bmi2_rorxdi3_1}
     (nil))
...
(code_label 84 125 5 3 2 "" [1 uses])
(note 5 84 110 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
(insn 110 5 104 3 (set (reg:DI 117 [ v32u64_1+24 ])
        (reg:DI 183 [ v32u64_1+24 ])) pr70007.c:10 -1
     (expr_list:REG_EQUAL (mem/j/c:DI (plus:DI (reg/f:DI 16 argp)
                (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
        (nil)))
(insn 104 110 115 3 (set (reg:DI 88 [ _4 ])
        (reg:DI 178 [ _4 ])) pr70007.c:10 -1
     (expr_list:REG_EQUAL (rotatert:DI (mem/j/c:DI (plus:DI (reg/f:DI 16 argp)
                    (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
            (const_int 19 [0x13]))
        (nil)))
(insn 8 115 9 3 (set (mem/j/c:DI (plus:DI (reg/f:DI 16 argp)
                (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
        (reg:DI 183 [ v32u64_1+24 ])) pr70007.c:10 -1
     (nil))
...
(insn 45 114 107 3 (set (mem/c:DI (plus:DI (reg/f:DI 16 argp)
                (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
        (reg:DI 183 [ v32u64_1+24 ])) pr70007.c:11 -1
     (expr_list:REG_DEAD (reg:DI 142)
        (nil)))
...
(jump_insn 87 86 88 3 (set (pc)
        (if_then_else (ne (reg:CCZ 17 flags)
                (const_int 0 [0]))
            (label_ref 84)
            (pc))) pr70007.c:13 635 {*jcc_1}
     (expr_list:REG_DEAD (reg:CCZ 17 flags)
        (int_list:REG_BR_PROB 9100 (nil)))
 -> 84)
...
(note 88 87 109 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(insn 109 88 101 4 (set (reg:DI 173 [ v32u64_1+24 ])
        (reg:DI 183 [ v32u64_1+24 ])) pr70007.c:14 -1
     (expr_list:REG_EQUAL (mem/j/c:DI (plus:DI (reg/f:DI 16 argp)
                (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
        (nil)))
...

Moving rorx insn after .L2 label, IOW changing:

        rorx    $19, 160(%rsp), %r8
        movq    136(%rsp), %rax
        movq    144(%rsp), %rdi
        movq    152(%rsp), %rsi
        movq    72(%rsp), %r12
        movq    80(%rsp), %rbp
        movq    88(%rsp), %rbx
        pcmpeqd %xmm6, %xmm6
.L2:
        orw     %r8w, 98(%rsp)
        ...
        movq    %rdx, 160(%rsp)
        ...
to:

        movq    136(%rsp), %rax
        movq    144(%rsp), %rdi
        movq    152(%rsp), %rsi
        movq    72(%rsp), %r12
        movq    80(%rsp), %rbp
        movq    88(%rsp), %rbx
        pcmpeqd %xmm6, %xmm6
.L2:
        rorx    $19, 160(%rsp), %r8
        orw     %r8w, 98(%rsp)
        ...
        movq    %rdx, 160(%rsp)
        ...

fixes the testcase.

(BTW:
        rorx    $19, 160(%rsp), %r8

can be substituted with

        movq    160(%rsp), %r8
        rorq    $19, %r8

in the final assembly to "simulate" BMI2 insn without BMI.)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]