[Bug target/95251] New: x86 code size expansion inserting field into a union

michaeljclark at mac dot com gcc-bugzilla@gcc.gnu.org
Thu May 21 03:07:11 GMT 2020


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95251

            Bug ID: 95251
           Summary: x86 code size expansion inserting field into a union
           Product: gcc
           Version: 10.1.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: michaeljclark at mac dot com
  Target Milestone: ---

Testing code on Godbolt and I came across some pathological code amplification
when SSE is enabled for field insertion into a structure containing a union. 

Here is the Godbolt link: https://godbolt.org/z/z_RpFt

Compiler flags: gcc -Os --save-temps -march=ivybridge -c x7b00.c

The function `x7b00`, inserts into the structure via char fields and it has a
voluminous translation (30 instructions).  The functionally equivalent `xyb87`
inserts into the structure via an 64-bit integer and it translates simply (5
instructions). `x`, `a7x` and `x7bcd` are for comparison.

Not adding  -march=ivybridge improves the code size but it is still nowhere
near optimal. `xyb87` serves as a reference for near optimal translation. It
seemed worthy of filing a bug due to the observed code amplification factor
(6X).

Can the backend choose the non-SSE code generation if it is more efficient?


--- CODE SNIPPET BEGINS ---

typedef unsigned long long u64;
typedef char u8;

typedef struct mr
{
    union {
        u64 y;
        struct {
            u8 a,b,c,d;
        } i;
    } u;
    u64 x;
} mr;

u64 x(mr mr) { return mr.x; }
mr a7x(u64 x) { return (mr) { .u = { .i = { 7,0,0,0 } }, .x = x }; }
mr x7bcd(u64 x,u8 b,u8 c,u8 d) { return (mr) {.u={.i={7,b,c,d }}, .x=x }; }
mr xyb87(u64 x, u8 b) { return (mr) {.u={ .y =(u64)b << 8|7},.x=x }; }
mr x7b00(u64 x, u8 b) { return (mr) {.u={ .i ={7,b,0,0}}, .x=x }; }


--- EXPECTED OUTPUT ---

        .cfi_startproc
        endbr64
        movsbq  %sil, %rax
        movq    %rdi, %rdx
        salq    $8, %rax
        orq     $7, %rax
        ret
        .cfi_endproc


--- OBSERVED OUTPUT ---

        .cfi_startproc
        endbr64
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        movq    %rdi, %r8
        xorl    %eax, %eax
        movl    $6, %ecx
        movq    %rsp, %rbp
        .cfi_def_cfa_register 6
        andq    $-32, %rsp
        leaq    -32(%rsp), %rdi
        rep stosb
        movq    $0, -48(%rsp)
        movabsq $281474976710655, %rax
        movq    $0, -40(%rsp)
        movq    -48(%rsp), %rdx
        andq    -32(%rsp), %rax
        movzwl  %dx, %edx
        salq    $16, %rax
        orq     %rax, %rdx
        movq    %rdx, -48(%rsp)
        movb    $7, -48(%rsp)
        vmovdqa -48(%rsp), %xmm1
        vpinsrb $1, %esi, %xmm1, %xmm0
        vmovaps %xmm0, -48(%rsp)
        movq    -48(%rsp), %rax
        movq    %r8, -40(%rsp)
        movq    -40(%rsp), %rdx
        leave
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc


More information about the Gcc-bugs mailing list