This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug rtl-optimization/84753] GCC does not fold xxswapd followed by vperm


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84753

--- Comment #3 from Jeffrey Walton <noloader at gmail dot com> ---
(In reply to Jeffrey Walton from comment #2)
> (In reply to Bill Schmidt from comment #1)
> > GCC 4.8.5 is out of service.  This is fixed in all in-service versions of
> > GCC (6.4 and later).
> 
> Interesting. I'm seeing it in GCC 7.2.0. Are you certain of this?

Here's an example to make sure we are on the same page.

$ /opt/cfarm/gcc-latest/bin/g++ --version
g++ (GCC) 7.2.0

$ /opt/cfarm/gcc-latest/bin/g++ -g3 -O3 -Wall -DTEST_MAIN -mcpu=power8
sha256-p8.cxx -o sha256-p8.exe

$ objdump --disassemble sha256-p8.exe | c++filt

0000000010000880 <SHA256_SCHEDULE(unsigned int*, unsigned char const*)>:
    10000880:   03 10 40 3c     lis     r2,4099
    10000884:   00 81 42 38     addi    r2,r2,-32512
    10000888:   f0 ff c1 fb     std     r30,-16(r1)
    1000088c:   f8 ff e1 fb     std     r31,-8(r1)
    10000890:   fe ff 22 3d     addis   r9,r2,-2
    10000894:   10 00 c4 3b     addi    r30,r4,16
    10000898:   70 8e 29 39     addi    r9,r9,-29072
    1000089c:   10 00 e3 3b     addi    r31,r3,16
    100008a0:   20 00 84 39     addi    r12,r4,32
    100008a4:   20 00 63 39     addi    r11,r3,32
    100008a8:   99 4e 00 7c     lxvd2x  vs32,0,r9
    100008ac:   30 00 a3 38     addi    r5,r3,48
    100008b0:   40 00 23 39     addi    r9,r3,64
    100008b4:   c4 ff c0 38     li      r6,-60
    100008b8:   c0 ff e0 38     li      r7,-64
    100008bc:   99 26 20 7c     lxvd2x  vs33,0,r4
    100008c0:   30 00 84 38     addi    r4,r4,48
    100008c4:   f8 ff 00 39     li      r8,-8
    100008c8:   e4 ff 40 39     li      r10,-28
    100008cc:   57 02 00 f0     xxswapd vs32,vs32
    100008d0:   57 0a 21 f0     xxswapd vs33,vs33
    100008d4:   97 05 00 f0     xxlnand vs32,vs32,vs32
    100008d8:   2b 08 21 10     vperm   v1,v1,v1,v0
    100008dc:   57 0a 21 f0     xxswapd vs33,vs33
    100008e0:   99 1f 20 7c     stxvd2x vs33,0,r3
    100008e4:   18 00 60 38     li      r3,24
    100008e8:   a6 03 69 7c     mtctr   r3
    100008ec:   99 f6 20 7c     lxvd2x  vs33,0,r30
    100008f0:   57 0a 21 f0     xxswapd vs33,vs33
    100008f4:   2b 08 21 10     vperm   v1,v1,v1,v0
    100008f8:   57 0a 21 f0     xxswapd vs33,vs33
    100008fc:   99 ff 20 7c     stxvd2x vs33,0,r31
    10000900:   99 66 20 7c     lxvd2x  vs33,0,r12
    10000904:   57 0a 21 f0     xxswapd vs33,vs33
    10000908:   2b 08 21 10     vperm   v1,v1,v1,v0
    1000090c:   57 0a 21 f0     xxswapd vs33,vs33
    10000910:   99 5f 20 7c     stxvd2x vs33,0,r11
    10000914:   99 26 20 7c     lxvd2x  vs33,0,r4
    10000918:   57 0a 21 f0     xxswapd vs33,vs33
    1000091c:   2b 08 01 10     vperm   v0,v1,v1,v0
    10000920:   57 02 00 f0     xxswapd vs32,vs32
    10000924:   99 2f 00 7c     stxvd2x vs32,0,r5
    10000928:   00 00 00 60     nop
    1000092c:   00 00 42 60     ori     r2,r2,0
    10000930:   99 36 09 7c     lxvd2x  vs32,r9,r6
    10000934:   99 3e 89 7d     lxvd2x  vs44,r9,r7
    10000938:   99 56 a9 7d     lxvd2x  vs45,r9,r10
    1000093c:   99 46 29 7c     lxvd2x  vs33,r9,r8
    10000940:   82 06 00 10     vshasigmaw v0,v0,0,0
    10000944:   82 7e 21 10     vshasigmaw v1,v1,0,15
    10000948:   80 60 00 10     vadduwm v0,v0,v12
    1000094c:   80 68 00 10     vadduwm v0,v0,v13
    10000950:   80 08 00 10     vadduwm v0,v0,v1
    10000954:   99 4f 00 7c     stxvd2x vs32,0,r9
    10000958:   08 00 29 39     addi    r9,r9,8
    1000095c:   d4 ff 00 42     bdnz    10000930 <SHA256_SCHEDULE(unsigned
int*, unsigned char const*)+0xb0>
    10000960:   f0 ff c1 eb     ld      r30,-16(r1)
    10000964:   f8 ff e1 eb     ld      r31,-8(r1)
    10000968:   20 00 80 4e     blr
    1000096c:   00 00 00 00     .long 0x0
    10000970:   00 09 00 00     .long 0x900
    10000974:   00 02 00 00     attn
    10000978:   00 00 00 60     nop
    1000097c:   00 00 42 60     ori     r2,r2,0

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]