This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

improve ia64 atomic ops


Eliminates two to three unnecessary zero extensions per instance.
Eliminates a separate memory barrier instruction.

Tested on ia64-linux and vs libgomp testsuite.


r~


        * config/ia64/ia64.c (ia64_expand_atomic_op): New.
        * config/ia64/ia64-protos.h: Declare it.
        * config/ia64/sync.md (I124MODE, FETCHOP, fetchop_name): New.
        (sync_add<I48MODE>, sync_old_add<I48MODE>): Remove.
        (sync_<FETCHOP><IMODE>, sync_nand<IMODE>): New.
        (sync_old_<FETCHOP><IMODE>, sync_old_nand<IMODE>): New.
        (sync_new_<FETCHOP><IMODE>, sync_new_nand<IMODE>): New.
        (cmpxchg_rel_<I124MODE>): Split from cmpxchg_acq_<IMODE>.  Zero
        extend result; use release semantics.
        (cmpxchg_rel_di): Rename from cmpxchg_acq_<IMODE>; use release.
        (sync_val_compare_and_swap_<IMODE>): Update to match.

Index: config/ia64/ia64-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64-protos.h,v
retrieving revision 1.70
diff -u -p -d -c -r1.70 ia64-protos.h
*** config/ia64/ia64-protos.h	18 Jan 2005 12:01:31 -0000	1.70
--- config/ia64/ia64-protos.h	10 May 2005 16:30:54 -0000
*************** extern bool ia64_expand_vecint_minmax (e
*** 51,56 ****
--- 51,57 ----
  extern void ia64_expand_call (rtx, rtx, rtx, int);
  extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
  extern void ia64_reload_gp (void);
+ extern void ia64_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx);
  
  extern HOST_WIDE_INT ia64_initial_elimination_offset (int, int);
  extern void ia64_expand_prologue (void);
Index: config/ia64/ia64.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.c,v
retrieving revision 1.361
diff -u -p -d -c -r1.361 ia64.c
*** config/ia64/ia64.c	9 May 2005 14:38:07 -0000	1.361
--- config/ia64/ia64.c	10 May 2005 16:30:55 -0000
*************** ia64_split_call (rtx retval, rtx addr, r
*** 1662,1667 ****
--- 1662,1772 ----
    if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
      ia64_reload_gp ();
  }
+ 
+ /* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
+ 
+    This differs from the generic code in that we know about the zero-extending
+    properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
+    also know that ld.acq+cmpxchg.rel equals a full barrier.
+ 
+    The loop we want to generate looks like
+ 
+ 	cmp_reg = mem;
+       label:
+         old_reg = cmp_reg;
+ 	new_reg = cmp_reg op val;
+ 	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
+ 	if (cmp_reg != old_reg)
+ 	  goto label;
+ 
+    Note that we only do the plain load from memory once.  Subsequent
+    iterations use the value loaded by the compare-and-swap pattern.  */
+ 
+ void
+ ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+ 		       rtx old_dst, rtx new_dst)
+ {
+   enum machine_mode mode = GET_MODE (mem);
+   rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
+   enum insn_code icode;
+ 
+   /* Special case for using fetchadd.  */
+   if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode))
+     {
+       if (!old_dst)
+         old_dst = gen_reg_rtx (mode);
+ 
+       emit_insn (gen_memory_barrier ());
+ 
+       if (mode == SImode)
+ 	icode = CODE_FOR_fetchadd_acq_si;
+       else
+ 	icode = CODE_FOR_fetchadd_acq_di;
+       emit_insn (GEN_FCN (icode) (old_dst, mem, val));
+ 
+       if (new_dst)
+ 	{
+ 	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
+ 					 true, OPTAB_WIDEN);
+ 	  if (new_reg != new_dst)
+ 	    emit_move_insn (new_dst, new_reg);
+ 	}
+       return;
+     }
+ 
+   /* Because of the volatile mem read, we get an ld.acq, which is the
+      front half of the full barrier.  The end half is the cmpxchg.rel.  */
+   gcc_assert (MEM_VOLATILE_P (mem));
+ 
+   old_reg = gen_reg_rtx (DImode);
+   cmp_reg = gen_reg_rtx (DImode);
+   label = gen_label_rtx ();
+ 
+   if (mode != DImode)
+     {
+       val = simplify_gen_subreg (DImode, val, mode, 0);
+       emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
+     }
+   else
+     emit_move_insn (cmp_reg, mem);
+ 
+   emit_label (label);
+ 
+   ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+   emit_move_insn (old_reg, cmp_reg);
+   emit_move_insn (ar_ccv, cmp_reg);
+ 
+   if (old_dst)
+     emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
+ 
+   new_reg = cmp_reg;
+   if (code == NOT)
+     {
+       new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
+       code = AND;
+     }
+   new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
+ 				 true, OPTAB_DIRECT);
+ 
+   if (mode != DImode)
+     new_reg = gen_lowpart (mode, new_reg);
+   if (new_dst)
+     emit_move_insn (new_dst, new_reg);
+ 
+   switch (mode)
+     {
+     case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
+     case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
+     case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
+     case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
+     default:
+       gcc_unreachable ();
+     }
+ 
+   emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
+ 
+   emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label);
+ }
  
  /* Begin the assembly file.  */
  
Index: config/ia64/sync.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/sync.md,v
retrieving revision 1.4
diff -u -p -d -c -r1.4 sync.md
*** config/ia64/sync.md	10 May 2005 16:20:35 -0000	1.4
--- config/ia64/sync.md	10 May 2005 16:30:55 -0000
***************
*** 20,28 ****
--- 20,32 ----
  ;; Boston, MA 02111-1307, USA.
  
  (define_mode_macro IMODE [QI HI SI DI])
+ (define_mode_macro I124MODE [QI HI SI])
  (define_mode_macro I48MODE [SI DI])
  (define_mode_attr modesuffix [(QI "1") (HI "2") (SI "4") (DI "8")])
  
+ (define_code_macro FETCHOP [plus minus ior xor and])
+ (define_code_attr fetchop_name
+   [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")])
  
  (define_insn "memory_barrier"
    [(set (mem:BLK (match_scratch:DI 0 "X"))
***************
*** 31,74 ****
    "mf"
    [(set_attr "itanium_class" "syst_m")])
  
! (define_expand "sync_add<mode>"
!   [(match_operand:I48MODE 0 "memory_operand" "")
!    (match_operand:I48MODE 1 "general_operand" "")]
    ""
  {
!   rtx tmp;
!   if (!fetchadd_operand (operands[1], <MODE>mode))
!     FAIL;
  
!   tmp = gen_reg_rtx (<MODE>mode);
!   emit_insn (gen_memory_barrier ());
!   emit_insn (gen_fetchadd_acq_<mode> (tmp, operands[0], operands[1]));
    DONE;
  })
  
! (define_expand "sync_old_add<mode>"
!   [(match_operand:I48MODE 0 "gr_register_operand" "")
!    (match_operand:I48MODE 1 "memory_operand" "")
!    (match_operand:I48MODE 2 "general_operand" "")]
    ""
  {
!   if (!fetchadd_operand (operands[2], <MODE>mode))
!     FAIL;
!   emit_insn (gen_memory_barrier ());
!   emit_insn (gen_fetchadd_acq_<mode> (operands[0], operands[1], operands[2]));
    DONE;
  })
  
! (define_insn "fetchadd_acq_<mode>"
!   [(set (match_operand:I48MODE 0 "gr_register_operand" "=r")
! 	(match_operand:I48MODE 1 "not_postinc_memory_operand" "+S"))
!    (set (match_dup 1)
! 	(unspec:I48MODE [(match_dup 1)
! 			 (match_operand:I48MODE 2 "fetchadd_operand" "n")]
! 		        UNSPEC_FETCHADD_ACQ))]
    ""
!   "fetchadd<modesuffix>.acq %0 = %1, %2"
!   [(set_attr "itanium_class" "sem")])
  
  (define_expand "sync_compare_and_swap<mode>"
    [(match_operand:IMODE 0 "gr_register_operand" "")
--- 35,114 ----
    "mf"
    [(set_attr "itanium_class" "syst_m")])
  
! (define_insn "fetchadd_acq_<mode>"
!   [(set (match_operand:I48MODE 0 "gr_register_operand" "=r")
! 	(match_operand:I48MODE 1 "not_postinc_memory_operand" "+S"))
!    (set (match_dup 1)
! 	(unspec:I48MODE [(match_dup 1)
! 			 (match_operand:I48MODE 2 "fetchadd_operand" "n")]
! 		        UNSPEC_FETCHADD_ACQ))]
!   ""
!   "fetchadd<modesuffix>.acq %0 = %1, %2"
!   [(set_attr "itanium_class" "sem")])
! 
! (define_expand "sync_<fetchop_name><mode>"
!   [(set (match_operand:IMODE 0 "memory_operand" "")
! 	(FETCHOP:IMODE (match_dup 0)
! 	  (match_operand:IMODE 1 "general_operand" "")))]
    ""
  {
!   ia64_expand_atomic_op (<CODE>, operands[0], operands[1], NULL, NULL);
!   DONE;
! })
  
! (define_expand "sync_nand<mode>"
!   [(set (match_operand:IMODE 0 "memory_operand" "")
! 	(and:IMODE (not:IMODE (match_dup 0))
! 	  (match_operand:IMODE 1 "general_operand" "")))]
!   ""
! {
!   ia64_expand_atomic_op (NOT, operands[0], operands[1], NULL, NULL);
    DONE;
  })
  
! (define_expand "sync_old_<fetchop_name><mode>"
!   [(set (match_operand:IMODE 0 "gr_register_operand" "")
! 	(FETCHOP:IMODE 
! 	  (match_operand:IMODE 1 "memory_operand" "")
! 	  (match_operand:IMODE 2 "general_operand" "")))]
    ""
  {
!   ia64_expand_atomic_op (<CODE>, operands[1], operands[2], operands[0], NULL);
    DONE;
  })
  
! (define_expand "sync_old_nand<mode>"
!   [(set (match_operand:IMODE 0 "gr_register_operand" "")
! 	(and:IMODE 
! 	  (not:IMODE (match_operand:IMODE 1 "memory_operand" ""))
! 	  (match_operand:IMODE 2 "general_operand" "")))]
    ""
! {
!   ia64_expand_atomic_op (NOT, operands[1], operands[2], operands[0], NULL);
!   DONE;
! })
! 
! (define_expand "sync_new_<fetchop_name><mode>"
!   [(set (match_operand:IMODE 0 "gr_register_operand" "")
! 	(FETCHOP:IMODE 
! 	  (match_operand:IMODE 1 "memory_operand" "")
! 	  (match_operand:IMODE 2 "general_operand" "")))]
!   ""
! {
!   ia64_expand_atomic_op (<CODE>, operands[1], operands[2], NULL, operands[0]);
!   DONE;
! })
! 
! (define_expand "sync_new_nand<mode>"
!   [(set (match_operand:IMODE 0 "gr_register_operand" "")
! 	(and:IMODE 
! 	  (not:IMODE (match_operand:IMODE 1 "memory_operand" ""))
! 	  (match_operand:IMODE 2 "general_operand" "")))]
!   ""
! {
!   ia64_expand_atomic_op (NOT, operands[1], operands[2], NULL, operands[0]);
!   DONE;
! })
  
  (define_expand "sync_compare_and_swap<mode>"
    [(match_operand:IMODE 0 "gr_register_operand" "")
***************
*** 78,100 ****
    ""
  {
    rtx ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
    convert_move (ccv, operands[2], 1);
    emit_insn (gen_memory_barrier ());
!   emit_insn (gen_cmpxchg_acq_<mode> (operands[0], operands[1],
! 				     ccv, operands[3]));
    DONE;
  })
  
! (define_insn "cmpxchg_acq_<mode>"
!   [(set (match_operand:IMODE 0 "gr_register_operand" "=r")
! 	(match_operand:IMODE 1 "not_postinc_memory_operand" "+S"))
     (set (match_dup 1)
!         (unspec:IMODE [(match_dup 1)
! 		       (match_operand:DI 2 "ar_ccv_reg_operand" "")
! 		       (match_operand:IMODE 3 "gr_register_operand" "r")]
! 		      UNSPEC_CMPXCHG_ACQ))]
    ""
!   "cmpxchg<modesuffix>.acq %0 = %1, %3, %2"
    [(set_attr "itanium_class" "sem")])
  
  (define_insn "sync_lock_test_and_set<mode>"
--- 118,163 ----
    ""
  {
    rtx ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+   rtx dst;
+ 
    convert_move (ccv, operands[2], 1);
+ 
+   dst = operands[0];
+   if (GET_MODE (dst) != DImode)
+     dst = gen_reg_rtx (DImode);
+ 
    emit_insn (gen_memory_barrier ());
!   emit_insn (gen_cmpxchg_rel_<mode> (dst, operands[1], ccv, operands[3]));
! 
!   if (dst != operands[0])
!     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, dst));
    DONE;
  })
  
! (define_insn "cmpxchg_rel_<mode>"
!   [(set (match_operand:DI 0 "gr_register_operand" "=r")
! 	(zero_extend:DI
! 	  (match_operand:I124MODE 1 "not_postinc_memory_operand" "+S")))
     (set (match_dup 1)
!         (unspec:I124MODE
! 	  [(match_dup 1)
! 	   (match_operand:DI 2 "ar_ccv_reg_operand" "")
! 	   (match_operand:I124MODE 3 "gr_register_operand" "r")]
! 	  UNSPEC_CMPXCHG_ACQ))]
    ""
!   "cmpxchg<modesuffix>.rel %0 = %1, %3, %2"
!   [(set_attr "itanium_class" "sem")])
! 
! (define_insn "cmpxchg_rel_di"
!   [(set (match_operand:DI 0 "gr_register_operand" "=r")
! 	(match_operand:DI 1 "not_postinc_memory_operand" "+S"))
!    (set (match_dup 1)
!         (unspec:DI [(match_dup 1)
! 		    (match_operand:DI 2 "ar_ccv_reg_operand" "")
! 		    (match_operand:DI 3 "gr_register_operand" "r")]
! 		   UNSPEC_CMPXCHG_ACQ))]
!   ""
!   "cmpxchg8.rel %0 = %1, %3, %2"
    [(set_attr "itanium_class" "sem")])
  
  (define_insn "sync_lock_test_and_set<mode>"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]