This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug rtl-optimization/47556] x86: fails to take advantage of high-byte addressing mode
- From: "hjl.tools at gmail dot com" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Sat, 16 Jul 2011 15:13:04 +0000
- Subject: [Bug rtl-optimization/47556] x86: fails to take advantage of high-byte addressing mode
- Auto-submitted: auto-generated
- References: <bug-47556-4@http.gcc.gnu.org/bugzilla/>
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47556
H.J. Lu <hjl.tools at gmail dot com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Last reconfirmed| |2011.07.16 15:12:15
CC| |ebotcazou at gcc dot
| |gnu.org
Component|target |rtl-optimization
Ever Confirmed|0 |1
--- Comment #3 from H.J. Lu <hjl.tools at gmail dot com> 2011-07-16 15:12:15 UTC ---
Here is the simplified testcase:
--
[hjl@gnu-6 pr47556]$ cat x.c
typedef unsigned short int uint16_t;
typedef unsigned char uint8_t;
typedef uint8_t __ticket_t;
typedef uint16_t __ticketpair_t;
typedef struct arch_spinlock {
union {
__ticketpair_t head_tail;
struct __raw_tickets {
__ticket_t head, tail;
} tickets;
};
} arch_spinlock_t;
static struct __raw_tickets __ticket_spin_claim(struct arch_spinlock *lock)
{
register struct __raw_tickets tickets = { .tail = 1 };
if (sizeof(lock->tickets.head) == sizeof(uint8_t))
asm volatile ("lock; " "xaddw %w0, %1\n"
: "+r" (tickets), "+m" (lock->tickets)
: : "memory", "cc");
else
asm volatile ("lock; " "xaddl %0, %1\n"
: "+r" (tickets), "+m" (lock->tickets)
: : "memory", "cc");
return tickets;
}
void __ticket_spin_lock(struct arch_spinlock *lock)
{
register struct __raw_tickets inc;
inc = __ticket_spin_claim(lock);
for (;;) {
if (inc.head == inc.tail)
goto out;
asm volatile ("pause");
inc.head = (*(volatile typeof(lock->tickets.head) *)&(lock->tickets.head));
}
out:
asm volatile ("" : : : "memory");
}
[hjl@gnu-6 pr47556]$
---
The generated code is
---
__ticket_spin_lock:
.LFB1:
.cfi_startproc
movl $256, %eax
#APP
# 17 "x.c" 1
lock; xaddw %ax, (%rdi)
# 0 "" 2
#NO_APP
movzbl %ah, %edx
cmpb %al, %dl
je .L2
.p2align 4,,10
.p2align 3
.L4:
#APP
# 33 "x.c" 1
pause
# 0 "" 2
#NO_APP
movzbl (%rdi), %eax
cmpb %dl, %al
jne .L4
.L2:
ret
.cfi_endproc
---
The main issue is
cmpb %dl, %al
jne .L4
But %Xh registers aren't really allocated by register allocator.
They are expressed via
(set (reg:CCZ 17 flags)
(compare:CCZ (subreg:QI (zero_extract:SI (subreg:DI (reg/v:HI 62 [
tickets ]) 0)
(const_int 8 [0x8])
(const_int 8 [0x8])) 0)
(subreg:QI (reg/v:HI 62 [ tickets ]) 0)))
combine doesn't turn
(insn 10 9 11 2 (set (subreg:SI (reg:QI 61 [ tickets$tail ]) 0)
(zero_extract:SI (subreg:SI (reg/v:HI 62 [ tickets ]) 0)
(const_int 8 [0x8])
(const_int 8 [0x8]))) x.c:17 89 {*movsi_extzv_1}
(nil))
(insn 11 10 12 2 (set (reg:CCZ 17 flags)
(compare:CCZ (reg:QI 61 [ tickets$tail ])
(subreg:QI (reg/v:HI 62 [ tickets ]) 0))) x.c:31 4 {*cmpqi_1}
(expr_list:REG_DEAD (reg/v:HI 62 [ tickets ])
(nil)))
....
(insn 17 15 18 3 (set (reg:CCZ 17 flags)
(compare:CCZ (reg:QI 59 [ inc$head ])
(reg:QI 61 [ tickets$tail ]))) x.c:31 4 {*cmpqi_1}
(expr_list:REG_DEAD (reg:QI 59 [ inc$head ])
(nil)))
into
(set (reg:CCZ 17 flags)
(compare:CCZ (subreg:QI (zero_extract:SI (subreg:DI (reg/v:HI 62 [
tickets ]) 0)
(const_int 8 [0x8])
(const_int 8 [0x8])) 0)
(subreg:QI (reg/v:HI 62 [ tickets ]) 0)))
....
(set (reg:CCZ 17 flags)
(compare:CCZ (reg:QI 59 [ inc$head ])
(subreg:QI (zero_extract:SI (subreg:DI (reg/v:HI 62 [ tickets
]) 0)
(const_int 8 [0x8])
(const_int 8 [0x8])) 0)))