[Bug target/95251] New: x86 code size expansion inserting field into a union
michaeljclark at mac dot com
gcc-bugzilla@gcc.gnu.org
Thu May 21 03:07:11 GMT 2020
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95251
Bug ID: 95251
Summary: x86 code size expansion inserting field into a union
Product: gcc
Version: 10.1.1
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: michaeljclark at mac dot com
Target Milestone: ---
Testing code on Godbolt and I came across some pathological code amplification
when SSE is enabled for field insertion into a structure containing a union.
Here is the Godbolt link: https://godbolt.org/z/z_RpFt
Compiler flags: gcc -Os --save-temps -march=ivybridge -c x7b00.c
The function `x7b00`, inserts into the structure via char fields and it has a
voluminous translation (30 instructions). The functionally equivalent `xyb87`
inserts into the structure via an 64-bit integer and it translates simply (5
instructions). `x`, `a7x` and `x7bcd` are for comparison.
Not adding -march=ivybridge improves the code size but it is still nowhere
near optimal. `xyb87` serves as a reference for near optimal translation. It
seemed worthy of filing a bug due to the observed code amplification factor
(6X).
Can the backend choose the non-SSE code generation if it is more efficient?
--- CODE SNIPPET BEGINS ---
typedef unsigned long long u64;
typedef char u8;
typedef struct mr
{
union {
u64 y;
struct {
u8 a,b,c,d;
} i;
} u;
u64 x;
} mr;
u64 x(mr mr) { return mr.x; }
mr a7x(u64 x) { return (mr) { .u = { .i = { 7,0,0,0 } }, .x = x }; }
mr x7bcd(u64 x,u8 b,u8 c,u8 d) { return (mr) {.u={.i={7,b,c,d }}, .x=x }; }
mr xyb87(u64 x, u8 b) { return (mr) {.u={ .y =(u64)b << 8|7},.x=x }; }
mr x7b00(u64 x, u8 b) { return (mr) {.u={ .i ={7,b,0,0}}, .x=x }; }
--- EXPECTED OUTPUT ---
.cfi_startproc
endbr64
movsbq %sil, %rax
movq %rdi, %rdx
salq $8, %rax
orq $7, %rax
ret
.cfi_endproc
--- OBSERVED OUTPUT ---
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rdi, %r8
xorl %eax, %eax
movl $6, %ecx
movq %rsp, %rbp
.cfi_def_cfa_register 6
andq $-32, %rsp
leaq -32(%rsp), %rdi
rep stosb
movq $0, -48(%rsp)
movabsq $281474976710655, %rax
movq $0, -40(%rsp)
movq -48(%rsp), %rdx
andq -32(%rsp), %rax
movzwl %dx, %edx
salq $16, %rax
orq %rax, %rdx
movq %rdx, -48(%rsp)
movb $7, -48(%rsp)
vmovdqa -48(%rsp), %xmm1
vpinsrb $1, %esi, %xmm1, %xmm0
vmovaps %xmm0, -48(%rsp)
movq -48(%rsp), %rax
movq %r8, -40(%rsp)
movq -40(%rsp), %rdx
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
More information about the Gcc-bugs
mailing list