// David Li struct A{ int b1: 3; int b2: 3; int b3: 2; int b4: 17; }a; void foo() { a.b1 = 2; a.b2 = 3; a.b4 = 8; } This requires one LOAD + one OR + one STORE, but the generate code looks like: foo: .LFB2: movzbl a(%rip), %eax andl $-64, %eax orl $26, %eax movb %al, a(%rip) movl a(%rip), %eax andl $-33554177, %eax orb $8, %ah movl %eax, a(%rip) ret
(please make our work easier and make the initial reports Severity enhancement and add missed-optimization as a Keyword and make the report against a gcc version, preferably 4.3.0 or 4.4.0; testcases ready for inclusion into the gcc testsuite would be nice as well, as well as checking for duplicate reports yourself). This testcase actually requires one load, one and (mask out bits in the affected bitfield parts), one or and one store. The MEM_REF work might lay grounds for this to work, it makes the read-modify-write cycles explicit on the tree level and thus allows the loads and stores to be CSEd.
This works correctly for PowerPC*. PPC64 (with GCC 4.0.1): lwz r0,0(r11) rlwinm r0,r0,0,3,31 oris r0,r0,0x4000 and r0,r0,r9 oris r0,r0,0xc00 and r0,r0,r2 ori r0,r0,1024 stw r0,0(r11) PPC32 (GCC 4.0.2): lwz r0,0(r9) rlwimi r0,r2,29,0,2 li r2,3 rlwimi r0,r2,26,3,5 li r2,8 rlwimi r0,r2,7,8,24 stw r0,0(r9)
I would expect the equivalent of int tmp = a; tmp &= 0x0030; // fix the mask to be correct, all bits of b3 tmp |= 2 | 3 | 8; // constant folded and properly shifted a = tmp; I still see three ORs for ppc64 and non-combined rlwimi ops for ppc32.
So, the same code should be generated for union { struct { int b1: 3; int b2: 3; int b3: 2; int b4: 17; }a; int b; } a; void foo() { a.a.b1 = 2; a.a.b2 = 3; a.a.b4 = 8; } void bar() { a.b = (a.b & (-1u >> (sizeof(a.b)*8 - 2) << 6)) | 2 | (3 << 3) | (8 << 17); } modulo errors I made in the bar() case ;)
With MEM_REF I get on i686 foo: movl a, %eax pushl %ebp movl %esp, %ebp popl %ebp andl $-33554240, %eax orl $2074, %eax movl %eax, a ret .size foo, .-foo .p2align 4,,15 .globl bar .type bar, @function bar: movl a, %eax pushl %ebp movl %esp, %ebp popl %ebp andl $192, %eax orl $1048602, %eax movl %eax, a ret which shows that I made an error in the source ;)
The IL after MEM_REF lowering looks like bar () { int D.1193; unsigned int D.1192; unsigned int D.1191; unsigned int D.1190; int D.1189; D.1189 = MEM <int {0}, &a>; D.1190 = (unsigned int) D.1189; D.1191 = D.1190 & 192; D.1192 = D.1191 | 1048602; D.1193 = (int) D.1192; MEM <int {0}, &a> = D.1193; return; } foo () { int MEML.2; int MEML.1; int MEML.0; MEML.0 = MEM <int {0}, &a>; MEML.0 = BIT_FIELD_EXPR <MEML.0, 2, 3, 0>; MEM <int {0}, &a> = MEML.0; MEML.1 = MEM <int {0}, &a>; MEML.1 = BIT_FIELD_EXPR <MEML.1, 3, 3, 3>; MEM <int {0}, &a> = MEML.1; MEML.2 = MEM <int {0}, &a>; MEML.2 = BIT_FIELD_EXPR <MEML.2, 8, 17, 8>; MEM <int {0}, &a> = MEML.2; return; }
(In reply to comment #6) > The IL after MEM_REF lowering looks like > > bar () > { > int D.1193; > unsigned int D.1192; > unsigned int D.1191; > unsigned int D.1190; > int D.1189; > > D.1189 = MEM <int {0}, &a>; > D.1190 = (unsigned int) D.1189; > D.1191 = D.1190 & 192; > D.1192 = D.1191 | 1048602; > D.1193 = (int) D.1192; > MEM <int {0}, &a> = D.1193; > return; > } > > foo () > { > int MEML.2; > int MEML.1; > int MEML.0; > > MEML.0 = MEM <int {0}, &a>; > MEML.0 = BIT_FIELD_EXPR <MEML.0, 2, 3, 0>; > MEM <int {0}, &a> = MEML.0; > MEML.1 = MEM <int {0}, &a>; > MEML.1 = BIT_FIELD_EXPR <MEML.1, 3, 3, 3>; > MEM <int {0}, &a> = MEML.1; > MEML.2 = MEM <int {0}, &a>; > MEML.2 = BIT_FIELD_EXPR <MEML.2, 8, 17, 8>; > MEM <int {0}, &a> = MEML.2; > return; > } LLVM generates expected code: .Ltmp1: movl $-33554240, %eax andl a(%rip), %eax orl $2074, %eax movl %eax, a(%rip) open64 is also bad.
There are two issues here, one is SLOW_BYTE_ACCESS and the second is the lowering of bit-fields. I am going to fix the lower part for GCC 9. The SLOW_BTYE_ACCESS was being looked at by someone else in the recent past but I can't find the email about it.
Store merging fixes this for GCC 8 so closing as fixed: _5 = MEM[(struct A *)&a]; _6 = _5 & 4261413056; _7 = _6 | 2074; MEM[(struct A *)&a] = _7; I Will have to make sure we don't regress when adding lowering.