This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
improve ia64 atomic ops
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 10 May 2005 09:42:00 -0700
- Subject: improve ia64 atomic ops
Eliminates two to three unnecessary zero extensions per instance.
Eliminates a separate memory barrier instruction.
Tested on ia64-linux and vs libgomp testsuite.
r~
* config/ia64/ia64.c (ia64_expand_atomic_op): New.
* config/ia64/ia64-protos.h: Declare it.
* config/ia64/sync.md (I124MODE, FETCHOP, fetchop_name): New.
(sync_add<I48MODE>, sync_old_add<I48MODE>): Remove.
(sync_<FETCHOP><IMODE>, sync_nand<IMODE>): New.
(sync_old_<FETCHOP><IMODE>, sync_old_nand<IMODE>): New.
(sync_new_<FETCHOP><IMODE>, sync_new_nand<IMODE>): New.
(cmpxchg_rel_<I124MODE>): Split from cmpxchg_acq_<IMODE>. Zero
extend result; use release semantics.
(cmpxchg_rel_di): Rename from cmpxchg_acq_<IMODE>; use release.
(sync_val_compare_and_swap_<IMODE>): Update to match.
Index: config/ia64/ia64-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64-protos.h,v
retrieving revision 1.70
diff -u -p -d -c -r1.70 ia64-protos.h
*** config/ia64/ia64-protos.h 18 Jan 2005 12:01:31 -0000 1.70
--- config/ia64/ia64-protos.h 10 May 2005 16:30:54 -0000
*************** extern bool ia64_expand_vecint_minmax (e
*** 51,56 ****
--- 51,57 ----
extern void ia64_expand_call (rtx, rtx, rtx, int);
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
extern void ia64_reload_gp (void);
+ extern void ia64_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx);
extern HOST_WIDE_INT ia64_initial_elimination_offset (int, int);
extern void ia64_expand_prologue (void);
Index: config/ia64/ia64.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.c,v
retrieving revision 1.361
diff -u -p -d -c -r1.361 ia64.c
*** config/ia64/ia64.c 9 May 2005 14:38:07 -0000 1.361
--- config/ia64/ia64.c 10 May 2005 16:30:55 -0000
*************** ia64_split_call (rtx retval, rtx addr, r
*** 1662,1667 ****
--- 1662,1772 ----
if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
ia64_reload_gp ();
}
+
+ /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
+
+ This differs from the generic code in that we know about the zero-extending
+ properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
+ also know that ld.acq+cmpxchg.rel equals a full barrier.
+
+ The loop we want to generate looks like
+
+ cmp_reg = mem;
+ label:
+ old_reg = cmp_reg;
+ new_reg = cmp_reg op val;
+ cmp_reg = compare-and-swap(mem, old_reg, new_reg)
+ if (cmp_reg != old_reg)
+ goto label;
+
+ Note that we only do the plain load from memory once. Subsequent
+ iterations use the value loaded by the compare-and-swap pattern. */
+
+ void
+ ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+ rtx old_dst, rtx new_dst)
+ {
+ enum machine_mode mode = GET_MODE (mem);
+ rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
+ enum insn_code icode;
+
+ /* Special case for using fetchadd. */
+ if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode))
+ {
+ if (!old_dst)
+ old_dst = gen_reg_rtx (mode);
+
+ emit_insn (gen_memory_barrier ());
+
+ if (mode == SImode)
+ icode = CODE_FOR_fetchadd_acq_si;
+ else
+ icode = CODE_FOR_fetchadd_acq_di;
+ emit_insn (GEN_FCN (icode) (old_dst, mem, val));
+
+ if (new_dst)
+ {
+ new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
+ true, OPTAB_WIDEN);
+ if (new_reg != new_dst)
+ emit_move_insn (new_dst, new_reg);
+ }
+ return;
+ }
+
+ /* Because of the volatile mem read, we get an ld.acq, which is the
+ front half of the full barrier. The end half is the cmpxchg.rel. */
+ gcc_assert (MEM_VOLATILE_P (mem));
+
+ old_reg = gen_reg_rtx (DImode);
+ cmp_reg = gen_reg_rtx (DImode);
+ label = gen_label_rtx ();
+
+ if (mode != DImode)
+ {
+ val = simplify_gen_subreg (DImode, val, mode, 0);
+ emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
+ }
+ else
+ emit_move_insn (cmp_reg, mem);
+
+ emit_label (label);
+
+ ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+ emit_move_insn (old_reg, cmp_reg);
+ emit_move_insn (ar_ccv, cmp_reg);
+
+ if (old_dst)
+ emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
+
+ new_reg = cmp_reg;
+ if (code == NOT)
+ {
+ new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
+ code = AND;
+ }
+ new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
+ true, OPTAB_DIRECT);
+
+ if (mode != DImode)
+ new_reg = gen_lowpart (mode, new_reg);
+ if (new_dst)
+ emit_move_insn (new_dst, new_reg);
+
+ switch (mode)
+ {
+ case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
+ case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
+ case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
+ case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
+
+ emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label);
+ }
/* Begin the assembly file. */
Index: config/ia64/sync.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/sync.md,v
retrieving revision 1.4
diff -u -p -d -c -r1.4 sync.md
*** config/ia64/sync.md 10 May 2005 16:20:35 -0000 1.4
--- config/ia64/sync.md 10 May 2005 16:30:55 -0000
***************
*** 20,28 ****
--- 20,32 ----
;; Boston, MA 02111-1307, USA.
(define_mode_macro IMODE [QI HI SI DI])
+ (define_mode_macro I124MODE [QI HI SI])
(define_mode_macro I48MODE [SI DI])
(define_mode_attr modesuffix [(QI "1") (HI "2") (SI "4") (DI "8")])
+ (define_code_macro FETCHOP [plus minus ior xor and])
+ (define_code_attr fetchop_name
+ [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")])
(define_insn "memory_barrier"
[(set (mem:BLK (match_scratch:DI 0 "X"))
***************
*** 31,74 ****
"mf"
[(set_attr "itanium_class" "syst_m")])
! (define_expand "sync_add<mode>"
! [(match_operand:I48MODE 0 "memory_operand" "")
! (match_operand:I48MODE 1 "general_operand" "")]
""
{
! rtx tmp;
! if (!fetchadd_operand (operands[1], <MODE>mode))
! FAIL;
! tmp = gen_reg_rtx (<MODE>mode);
! emit_insn (gen_memory_barrier ());
! emit_insn (gen_fetchadd_acq_<mode> (tmp, operands[0], operands[1]));
DONE;
})
! (define_expand "sync_old_add<mode>"
! [(match_operand:I48MODE 0 "gr_register_operand" "")
! (match_operand:I48MODE 1 "memory_operand" "")
! (match_operand:I48MODE 2 "general_operand" "")]
""
{
! if (!fetchadd_operand (operands[2], <MODE>mode))
! FAIL;
! emit_insn (gen_memory_barrier ());
! emit_insn (gen_fetchadd_acq_<mode> (operands[0], operands[1], operands[2]));
DONE;
})
! (define_insn "fetchadd_acq_<mode>"
! [(set (match_operand:I48MODE 0 "gr_register_operand" "=r")
! (match_operand:I48MODE 1 "not_postinc_memory_operand" "+S"))
! (set (match_dup 1)
! (unspec:I48MODE [(match_dup 1)
! (match_operand:I48MODE 2 "fetchadd_operand" "n")]
! UNSPEC_FETCHADD_ACQ))]
""
! "fetchadd<modesuffix>.acq %0 = %1, %2"
! [(set_attr "itanium_class" "sem")])
(define_expand "sync_compare_and_swap<mode>"
[(match_operand:IMODE 0 "gr_register_operand" "")
--- 35,114 ----
"mf"
[(set_attr "itanium_class" "syst_m")])
! (define_insn "fetchadd_acq_<mode>"
! [(set (match_operand:I48MODE 0 "gr_register_operand" "=r")
! (match_operand:I48MODE 1 "not_postinc_memory_operand" "+S"))
! (set (match_dup 1)
! (unspec:I48MODE [(match_dup 1)
! (match_operand:I48MODE 2 "fetchadd_operand" "n")]
! UNSPEC_FETCHADD_ACQ))]
! ""
! "fetchadd<modesuffix>.acq %0 = %1, %2"
! [(set_attr "itanium_class" "sem")])
!
! (define_expand "sync_<fetchop_name><mode>"
! [(set (match_operand:IMODE 0 "memory_operand" "")
! (FETCHOP:IMODE (match_dup 0)
! (match_operand:IMODE 1 "general_operand" "")))]
""
{
! ia64_expand_atomic_op (<CODE>, operands[0], operands[1], NULL, NULL);
! DONE;
! })
! (define_expand "sync_nand<mode>"
! [(set (match_operand:IMODE 0 "memory_operand" "")
! (and:IMODE (not:IMODE (match_dup 0))
! (match_operand:IMODE 1 "general_operand" "")))]
! ""
! {
! ia64_expand_atomic_op (NOT, operands[0], operands[1], NULL, NULL);
DONE;
})
! (define_expand "sync_old_<fetchop_name><mode>"
! [(set (match_operand:IMODE 0 "gr_register_operand" "")
! (FETCHOP:IMODE
! (match_operand:IMODE 1 "memory_operand" "")
! (match_operand:IMODE 2 "general_operand" "")))]
""
{
! ia64_expand_atomic_op (<CODE>, operands[1], operands[2], operands[0], NULL);
DONE;
})
! (define_expand "sync_old_nand<mode>"
! [(set (match_operand:IMODE 0 "gr_register_operand" "")
! (and:IMODE
! (not:IMODE (match_operand:IMODE 1 "memory_operand" ""))
! (match_operand:IMODE 2 "general_operand" "")))]
""
! {
! ia64_expand_atomic_op (NOT, operands[1], operands[2], operands[0], NULL);
! DONE;
! })
!
! (define_expand "sync_new_<fetchop_name><mode>"
! [(set (match_operand:IMODE 0 "gr_register_operand" "")
! (FETCHOP:IMODE
! (match_operand:IMODE 1 "memory_operand" "")
! (match_operand:IMODE 2 "general_operand" "")))]
! ""
! {
! ia64_expand_atomic_op (<CODE>, operands[1], operands[2], NULL, operands[0]);
! DONE;
! })
!
! (define_expand "sync_new_nand<mode>"
! [(set (match_operand:IMODE 0 "gr_register_operand" "")
! (and:IMODE
! (not:IMODE (match_operand:IMODE 1 "memory_operand" ""))
! (match_operand:IMODE 2 "general_operand" "")))]
! ""
! {
! ia64_expand_atomic_op (NOT, operands[1], operands[2], NULL, operands[0]);
! DONE;
! })
(define_expand "sync_compare_and_swap<mode>"
[(match_operand:IMODE 0 "gr_register_operand" "")
***************
*** 78,100 ****
""
{
rtx ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
convert_move (ccv, operands[2], 1);
emit_insn (gen_memory_barrier ());
! emit_insn (gen_cmpxchg_acq_<mode> (operands[0], operands[1],
! ccv, operands[3]));
DONE;
})
! (define_insn "cmpxchg_acq_<mode>"
! [(set (match_operand:IMODE 0 "gr_register_operand" "=r")
! (match_operand:IMODE 1 "not_postinc_memory_operand" "+S"))
(set (match_dup 1)
! (unspec:IMODE [(match_dup 1)
! (match_operand:DI 2 "ar_ccv_reg_operand" "")
! (match_operand:IMODE 3 "gr_register_operand" "r")]
! UNSPEC_CMPXCHG_ACQ))]
""
! "cmpxchg<modesuffix>.acq %0 = %1, %3, %2"
[(set_attr "itanium_class" "sem")])
(define_insn "sync_lock_test_and_set<mode>"
--- 118,163 ----
""
{
rtx ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+ rtx dst;
+
convert_move (ccv, operands[2], 1);
+
+ dst = operands[0];
+ if (GET_MODE (dst) != DImode)
+ dst = gen_reg_rtx (DImode);
+
emit_insn (gen_memory_barrier ());
! emit_insn (gen_cmpxchg_rel_<mode> (dst, operands[1], ccv, operands[3]));
!
! if (dst != operands[0])
! emit_move_insn (operands[0], gen_lowpart (<MODE>mode, dst));
DONE;
})
! (define_insn "cmpxchg_rel_<mode>"
! [(set (match_operand:DI 0 "gr_register_operand" "=r")
! (zero_extend:DI
! (match_operand:I124MODE 1 "not_postinc_memory_operand" "+S")))
(set (match_dup 1)
! (unspec:I124MODE
! [(match_dup 1)
! (match_operand:DI 2 "ar_ccv_reg_operand" "")
! (match_operand:I124MODE 3 "gr_register_operand" "r")]
! UNSPEC_CMPXCHG_ACQ))]
""
! "cmpxchg<modesuffix>.rel %0 = %1, %3, %2"
! [(set_attr "itanium_class" "sem")])
!
! (define_insn "cmpxchg_rel_di"
! [(set (match_operand:DI 0 "gr_register_operand" "=r")
! (match_operand:DI 1 "not_postinc_memory_operand" "+S"))
! (set (match_dup 1)
! (unspec:DI [(match_dup 1)
! (match_operand:DI 2 "ar_ccv_reg_operand" "")
! (match_operand:DI 3 "gr_register_operand" "r")]
! UNSPEC_CMPXCHG_ACQ))]
! ""
! "cmpxchg8.rel %0 = %1, %3, %2"
[(set_attr "itanium_class" "sem")])
(define_insn "sync_lock_test_and_set<mode>"