This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] ARM: Weaker memory barriers
- From: John Carr <jfc at MIT dot EDU>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 10 Mar 2014 22:54:18 -0400
- Subject: [PATCH] ARM: Weaker memory barriers
- Authentication-results: sourceware.org; auth=none
A comment in arm/sync.md notes "We should consider issuing a inner
shareability zone barrier here instead." Here is my first attempt
at a patch to emit weaker memory barriers. Three instructions seem
to be relevant for user mode code on my Cortex A9 Linux box:
dmb ishst, dmb ish, dmb sy
I believe these correspond to a release barrier, a full barrier
with respect to other CPUs, and a full barrier that also orders
relative to I/O.
Consider this a request for comments on whether the approach is correct.
I haven't done any testing yet (beyond eyeballing the assembly output).
2014-03-10 John F. Carr <jfc@mit.edu>
* config/arm/sync.md (mem_thread_fence): New expander for weaker
memory barriers.
* config/arm/arm.c (arm_pre_atomic_barrier, arm_post_atomic_barrier):
Emit only as strong a fence as needed.
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c (revision 208470)
+++ config/arm/arm.c (working copy)
@@ -29813,7 +29813,12 @@
arm_pre_atomic_barrier (enum memmodel model)
{
if (need_atomic_barrier_p (model, true))
- emit_insn (gen_memory_barrier ());
+ {
+ if (HAVE_mem_thread_fence)
+ emit_insn (gen_mem_thread_fence (GEN_INT ((int) model)));
+ else
+ emit_insn (gen_memory_barrier ());
+ }
}
static void
@@ -29820,7 +29825,12 @@
arm_post_atomic_barrier (enum memmodel model)
{
if (need_atomic_barrier_p (model, false))
- emit_insn (gen_memory_barrier ());
+ {
+ if (HAVE_mem_thread_fence)
+ emit_insn (gen_mem_thread_fence (GEN_INT ((int) model)));
+ else
+ emit_insn (gen_memory_barrier ());
+ }
}
/* Emit the load-exclusive and store-exclusive instructions.
Index: config/arm/sync.md
===================================================================
--- config/arm/sync.md (revision 208470)
+++ config/arm/sync.md (working copy)
@@ -34,26 +34,54 @@
(define_mode_attr sync_sfx
[(QI "b") (HI "h") (SI "") (DI "d")])
+(define_expand "mem_thread_fence"
+ [(set (match_dup 1)
+ (unspec:BLK
+ [(match_dup 1)
+ (match_operand:SI 0 "const_int_operand")]
+ UNSPEC_MEMORY_BARRIER))]
+ "TARGET_HAVE_DMB"
+{
+ if (INTVAL(operands[0]) == MEMMODEL_RELAXED)
+ DONE;
+ operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[1]) = 1;
+})
+
(define_expand "memory_barrier"
[(set (match_dup 0)
- (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+ (unspec:BLK [(match_dup 0) (match_dup 1)]
+ UNSPEC_MEMORY_BARRIER))]
"TARGET_HAVE_MEMORY_BARRIER"
{
operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
MEM_VOLATILE_P (operands[0]) = 1;
+ operands[1] = GEN_INT((int) MEMMODEL_SEQ_CST);
})
(define_insn "*memory_barrier"
[(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
- "TARGET_HAVE_MEMORY_BARRIER"
+ (unspec:BLK
+ [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+ UNSPEC_MEMORY_BARRIER))]
+ "TARGET_HAVE_DMB || TARGET_HAVE_MEMORY_BARRIER"
{
if (TARGET_HAVE_DMB)
{
- /* Note we issue a system level barrier. We should consider issuing
- a inner shareabilty zone barrier here instead, ie. "DMB ISH". */
- /* ??? Differentiate based on SEQ_CST vs less strict? */
- return "dmb\tsy";
+ switch (INTVAL(operands[1]))
+ {
+ case MEMMODEL_RELEASE:
+ return "dmb\tishst";
+ case MEMMODEL_CONSUME:
+ case MEMMODEL_ACQUIRE:
+ case MEMMODEL_ACQ_REL:
+ return "dmb\tish";
+ case MEMMODEL_SEQ_CST:
+ return "dmb\tsy";
+ case MEMMODEL_RELAXED:
+ default:
+ gcc_unreachable ();
+ }
}
if (TARGET_HAVE_DMB_MCR)