This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch][ARM] Optimize __sync_* builtins
- From: Ken Werner <ken at linux dot vnet dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 15 Dec 2010 16:14:59 +0100
- Subject: [patch][ARM] Optimize __sync_* builtins
Hi,
The emitted code for the __sync_* builtins on ARM can be optimized as
suggested by Peter Maydell:
http://lists.linaro.org/pipermail/linaro-toolchain/2010-November/000498.html
The idea is to eliminate the need for an additional temporary register in case
the operation is reversible. This patch adds two new code attributes to the
sync.md. The sync_clobber indicates whether an additional register is going to
be used/clobbered or not and the sync_t2_reqd sets the sync_t2 attribute to
its default depending on the value of the syncop code iterator. The
arm.c:arm_output_sync_loop() function has been enhanced to place the return of
the strex into old_value instead of t2. The contents of old_value are restored
by reversing the operation.
This patch has been tested on arm-linux-gnueabi with no regressions.
Regards
Ken
2010-12-15 Ken Werner <ken.werner@de.ibm.com>
* config/arm/sync.md (sync_clobber, sync_t2_reqd): New code attribute.
(arm_sync_old_<sync_optab>si, arm_sync_old_<sync_optab><mode>): Use
the sync_clobber and sync_t2_reqd code attributes.
* config/arm/arm.c (arm_output_sync_loop): Reverse the operation if
the t2 argument is NULL.
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c (revision 167812)
+++ gcc/config/arm/arm.c (working copy)
@@ -23220,11 +23220,47 @@ arm_output_sync_loop (emit_f emit,
break;
}
- arm_output_strex (emit, mode, "", t2, t1, memory);
- operands[0] = t2;
- arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
- arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
+ if (t2)
+ {
+ arm_output_strex (emit, mode, "", t2, t1, memory);
+ operands[0] = t2;
+ arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
+ LOCAL_LABEL_PREFIX);
+ }
+ else
+ {
+ /* Use old_value for the return value because for some operations
+ the old_value can easily be restored. This saves one register. */
+ arm_output_strex (emit, mode, "", old_value, t1, memory);
+ operands[0] = old_value;
+ arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
+ LOCAL_LABEL_PREFIX);
+ switch (sync_op)
+ {
+ case SYNC_OP_ADD:
+ arm_output_op3 (emit, "sub", old_value, t1, new_value);
+ break;
+
+ case SYNC_OP_SUB:
+ arm_output_op3 (emit, "add", old_value, t1, new_value);
+ break;
+
+ case SYNC_OP_XOR:
+ arm_output_op3 (emit, "eor", old_value, t1, new_value);
+ break;
+
+ case SYNC_OP_NONE:
+ arm_output_op2 (emit, "mov", old_value, required_value);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
arm_process_output_memory_barrier (emit, NULL);
arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
}
Index: gcc/config/arm/sync.md
===================================================================
--- gcc/config/arm/sync.md (revision 167812)
+++ gcc/config/arm/sync.md (working copy)
@@ -103,6 +103,18 @@
(plus "add")
(minus "sub")])
+(define_code_attr sync_clobber [(ior "=&r")
+ (and "=&r")
+ (xor "X")
+ (plus "X")
+ (minus "X")])
+
+(define_code_attr sync_t2_reqd [(ior "4")
+ (and "4")
+ (xor "*")
+ (plus "*")
+ (minus "*")])
+
(define_expand "sync_<sync_optab>si"
[(match_operand:SI 0 "memory_operand")
(match_operand:SI 1 "s_register_operand")
@@ -286,7 +298,6 @@
VUNSPEC_SYNC_COMPARE_AND_SWAP))
(set (match_dup 1) (unspec_volatile:SI [(match_dup 2)]
VUNSPEC_SYNC_COMPARE_AND_SWAP))
- (clobber:SI (match_scratch:SI 4 "=&r"))
(set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
VUNSPEC_SYNC_COMPARE_AND_SWAP))
]
@@ -299,7 +310,6 @@
(set_attr "sync_required_value" "2")
(set_attr "sync_new_value" "3")
(set_attr "sync_t1" "0")
- (set_attr "sync_t2" "4")
(set_attr "conds" "clob")
(set_attr "predicable" "no")])
@@ -313,7 +323,6 @@
VUNSPEC_SYNC_COMPARE_AND_SWAP)))
(set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)]
VUNSPEC_SYNC_COMPARE_AND_SWAP))
- (clobber:SI (match_scratch:SI 4 "=&r"))
(set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
VUNSPEC_SYNC_COMPARE_AND_SWAP))
]
@@ -326,7 +335,6 @@
(set_attr "sync_required_value" "2")
(set_attr "sync_new_value" "3")
(set_attr "sync_t1" "0")
- (set_attr "sync_t2" "4")
(set_attr "conds" "clob")
(set_attr "predicable" "no")])
@@ -487,7 +495,7 @@
VUNSPEC_SYNC_OLD_OP))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:SI 3 "=&r"))
- (clobber (match_scratch:SI 4 "=&r"))]
+ (clobber (match_scratch:SI 4 "<sync_clobber>"))]
"TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
{
return arm_output_sync_insn (insn, operands);
@@ -496,7 +504,7 @@
(set_attr "sync_memory" "1")
(set_attr "sync_new_value" "2")
(set_attr "sync_t1" "3")
- (set_attr "sync_t2" "4")
+ (set_attr "sync_t2" "<sync_t2_reqd>")
(set_attr "sync_op" "<sync_optab>")
(set_attr "conds" "clob")
(set_attr "predicable" "no")])
@@ -540,7 +548,7 @@
VUNSPEC_SYNC_OLD_OP))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:SI 3 "=&r"))
- (clobber (match_scratch:SI 4 "=&r"))]
+ (clobber (match_scratch:SI 4 "<sync_clobber>"))]
"TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
{
return arm_output_sync_insn (insn, operands);
@@ -549,7 +557,7 @@
(set_attr "sync_memory" "1")
(set_attr "sync_new_value" "2")
(set_attr "sync_t1" "3")
- (set_attr "sync_t2" "4")
+ (set_attr "sync_t2" "<sync_t2_reqd>")
(set_attr "sync_op" "<sync_optab>")
(set_attr "conds" "clob")
(set_attr "predicable" "no")])