This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 8/9] Convert i386 backend to new atomic patterns.


Cc: jakub@redhat.com
Cc: ubizjak@gmail.com
---
 gcc/config/i386/i386.md |    5 +-
 gcc/config/i386/sync.md |  306 +++++++++++++++++++++++++----------------------
 2 files changed, 167 insertions(+), 144 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a11a71b..7ce57d8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -262,7 +262,10 @@
   UNSPECV_ALIGN
   UNSPECV_MONITOR
   UNSPECV_MWAIT
-  UNSPECV_CMPXCHG
+  UNSPECV_CMPXCHG_1
+  UNSPECV_CMPXCHG_2
+  UNSPECV_CMPXCHG_3
+  UNSPECV_CMPXCHG_4
   UNSPECV_XCHG
   UNSPECV_LOCK
   UNSPECV_PROLOGUE_USE
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 1044255..e5579b1 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -18,31 +18,27 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_iterator CASMODE
-  [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B")
-	    (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
-(define_mode_iterator DCASMODE
-  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
-   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
-(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
-(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
-
-(define_expand "memory_barrier"
-  [(set (match_dup 0)
-	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand" "")]		;; model
   ""
 {
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
+  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
+     enough not to require barriers of any kind.  */
+  if (INTVAL (operands[0]) != MEMMODEL_SEQ_CST)
+    DONE;
 
-  if (!(TARGET_64BIT || TARGET_SSE2))
+  if (TARGET_64BIT || TARGET_SSE2)
+    emit_insn (gen_sse2_mfence ());
+  else
     {
-      emit_insn (gen_memory_barrier_nosse (operands[0]));
-      DONE;
+      rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+      MEM_VOLATILE_P (mem) = 1;
+      emit_insn (gen_mfence_nosse (mem));
     }
+  DONE;
 })
 
-(define_insn "memory_barrier_nosse"
+(define_insn "mfence_nosse"
   [(set (match_operand:BLK 0 "" "")
 	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
    (clobber (reg:CC FLAGS_REG))]
@@ -50,127 +46,152 @@
   "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
   [(set_attr "memory" "unknown")])
 
-;; ??? It would be possible to use cmpxchg8b on pentium for DImode
-;; changes.  It's complicated because the insn uses ecx:ebx as the
-;; new value; note that the registers are reversed from the order
-;; that they'd be in with (reg:DI 2 ecx).  Similarly for TImode
-;; data in 64-bit mode.
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:SWI124 1 "register_operand" "")	;; oldval output
+   (match_operand:SWI124 2 "memory_operand" "")		;; memory
+   (match_operand:SWI124 3 "register_operand" "")	;; expected input
+   (match_operand:SWI124 4 "register_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_CMPXCHG"
+{
+  emit_insn (gen_atomic_compare_and_swap_single<mode>
+	     (operands[1], operands[2], operands[3], operands[4]));
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
+})
 
-(define_expand "sync_compare_and_swap<mode>"
-  [(parallel
-    [(set (match_operand:CASMODE 0 "register_operand" "")
-	  (match_operand:CASMODE 1 "memory_operand" ""))
-     (set (match_dup 1)
-	  (unspec_volatile:CASMODE
-	    [(match_dup 1)
-	     (match_operand:CASMODE 2 "register_operand" "")
-	     (match_operand:CASMODE 3 "register_operand" "")]
-	    UNSPECV_CMPXCHG))
-   (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:CASMODE
-            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
-          (match_dup 2)))])]
+(define_mode_iterator CASMODE
+  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_iterator DCASMODE
+  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
+(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:CASMODE 1 "register_operand" "")	;; oldval output
+   (match_operand:CASMODE 2 "memory_operand" "")	;; memory
+   (match_operand:CASMODE 3 "register_operand" "")	;; expected input
+   (match_operand:CASMODE 4 "register_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
   "TARGET_CMPXCHG"
 {
-  if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
+  if (<MODE>mode == DImode && TARGET_64BIT)
+    {
+      emit_insn (gen_atomic_compare_and_swap_singledi
+		 (operands[1], operands[2], operands[3], operands[4]));
+    }
+  else
     {
-      enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
-      rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
-      rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
-				      GET_MODE_SIZE (hmode));
-      low = force_reg (hmode, low);
-      high = force_reg (hmode, high);
-      if (<MODE>mode == DImode)
-	{
-	  if (flag_pic && !cmpxchg8b_pic_memory_operand (operands[1], DImode))
-	    operands[1] = replace_equiv_address (operands[1],
-						 force_reg (Pmode,
-							    XEXP (operands[1],
-								  0)));
-	  emit_insn (gen_sync_double_compare_and_swapdi
-		     (operands[0], operands[1], operands[2], low, high));
-	}
-      else if (<MODE>mode == TImode)
-	emit_insn (gen_sync_double_compare_and_swapti
-		   (operands[0], operands[1], operands[2], low, high));
-      else
-	gcc_unreachable ();
-      DONE;
+      enum machine_mode hmode = <DCASHMODE>mode;
+      rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem;
+
+      lo_o = operands[1];
+      mem  = operands[2];
+      lo_e = operands[3];
+      lo_n = operands[4];
+      hi_o = gen_highpart (hmode, lo_o);
+      hi_e = gen_highpart (hmode, lo_e);
+      hi_n = gen_highpart (hmode, lo_n);
+      lo_o = gen_lowpart (hmode, lo_o);
+      lo_e = gen_lowpart (hmode, lo_e);
+      lo_n = gen_lowpart (hmode, lo_n);
+
+      if (<MODE>mode == DImode
+	  && !TARGET_64BIT
+	  && flag_pic
+	  && !cmpxchg8b_pic_memory_operand (mem, DImode))
+	mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+
+      emit_insn (gen_atomic_compare_and_swap_double<mode>
+		 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
     }
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
 })
 
-(define_insn "*sync_compare_and_swap<mode>"
+(define_insn "atomic_compare_and_swap_single<mode>"
   [(set (match_operand:SWI 0 "register_operand" "=a")
-	(match_operand:SWI 1 "memory_operand" "+m"))
-   (set (match_dup 1)
 	(unspec_volatile:SWI
-	  [(match_dup 1)
-	   (match_operand:SWI 2 "register_operand" "a")
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SWI 2 "register_operand" "0")
 	   (match_operand:SWI 3 "register_operand" "<r>")]
-	  UNSPECV_CMPXCHG))
+	  UNSPECV_CMPXCHG_1))
+   (set (match_dup 1)
+	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2))
    (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:SWI
-            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
-          (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))]
   "TARGET_CMPXCHG"
   "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
-(define_insn "sync_double_compare_and_swap<mode>"
-  [(set (match_operand:DCASMODE 0 "register_operand" "=A")
-	(match_operand:DCASMODE 1 "memory_operand" "+m"))
-   (set (match_dup 1)
-	(unspec_volatile:DCASMODE
-	  [(match_dup 1)
-	   (match_operand:DCASMODE 2 "register_operand" "A")
-	   (match_operand:<DCASHMODE> 3 "register_operand" "b")
-	   (match_operand:<DCASHMODE> 4 "register_operand" "c")]
-	  UNSPECV_CMPXCHG))
+;; For double-word compare and swap, we are obliged to play tricks with
+;; the input newval (op5:op6) because the Intel register numbering does
+;; not match the gcc register numbering, so the pair must be CX:BX.
+;; That said, in order to take advantage of possible lower-subreg opts,
+;; treat all of the integral operands in the same way.
+(define_insn "atomic_compare_and_swap_double<mode>"
+  [(set (match_operand:<DCASHMODE> 0 "register_operand" "=a")
+	(unspec_volatile:<DCASHMODE>
+	  [(match_operand:DCASMODE 2 "memory_operand" "+m")
+	   (match_operand:<DCASHMODE> 3 "register_operand" "0")
+	   (match_operand:<DCASHMODE> 4 "register_operand" "1")
+	   (match_operand:<DCASHMODE> 5 "register_operand" "b")
+	   (match_operand:<DCASHMODE> 6 "register_operand" "c")]
+	  UNSPECV_CMPXCHG_1))
+   (set (match_operand:<DCASHMODE> 1 "register_operand" "=d")
+	(unspec_volatile:<DCASHMODE> [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (match_dup 2)
+	(unspec_volatile:DCASMODE [(const_int 0)] UNSPECV_CMPXCHG_3))
    (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:DCASMODE
-            [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
-	    UNSPECV_CMPXCHG)
-          (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   ""
-  "lock{%;} cmpxchg<doublemodesuffix>b\t%1")
+  "lock{%;} cmpxchg<doublemodesuffix>b\t%2")
 
-;; Theoretically we'd like to use constraint "r" (any reg) for operand
-;; 3, but that includes ecx.  If operand 3 and 4 are the same (like when
-;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like
-;; operand 4.  This breaks, as the xchg will move the PIC register contents
-;; to %ecx then --> boom.  Operands 3 and 4 really need to be different
-;; registers, which in this case means operand 3 must not be ecx.
-;; Instead of playing tricks with fake early clobbers or the like we
-;; just enumerate all regs possible here, which (as this is !TARGET_64BIT)
+;; Theoretically we'd like to use constraint "r" (any reg) for op5,
+;; but that includes ecx.  If op5 and op6 are the same (like when
+;; the input is -1LL) GCC might chose to allocate op5 to ecx, like
+;; op6.  This breaks, as the xchg will move the PIC register contents
+;; to %ecx then --> boom.  Operands 5 and 6 really need to be different
+;; registers, which in this case means op5 must not be ecx.  Instead
+;; of playing tricks with fake early clobbers or the like we just
+;; enumerate all regs possible here, which (as this is !TARGET_64BIT)
 ;; are just esi and edi.
-(define_insn "*sync_double_compare_and_swapdi_pic"
-  [(set (match_operand:DI 0 "register_operand" "=A")
-	(match_operand:DI 1 "cmpxchg8b_pic_memory_operand" "+m"))
-   (set (match_dup 1)
-	(unspec_volatile:DI
-	  [(match_dup 1)
-	   (match_operand:DI 2 "register_operand" "A")
-	   (match_operand:SI 3 "register_operand" "SD")
-	   (match_operand:SI 4 "register_operand" "c")]
-	  UNSPECV_CMPXCHG))
+(define_insn "*atomic_compare_and_swap_doubledi_pic"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec_volatile:SI
+	  [(match_operand:DI 2 "cmpxchg8b_pic_memory_operand" "+m")
+	   (match_operand:SI 3 "register_operand" "0")
+	   (match_operand:SI 4 "register_operand" "1")
+	   (match_operand:SI 5 "register_operand" "SD")
+	   (match_operand:SI 6 "register_operand" "c")]
+	  UNSPECV_CMPXCHG_1))
+   (set (match_operand:SI 1 "register_operand" "=d")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (match_dup 2)
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG_3))
    (set (reg:CCZ FLAGS_REG)
-	(compare:CCZ
-	  (unspec_volatile:DI
-	    [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
-	    UNSPECV_CMPXCHG)
-	  (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
-  "xchg{l}\t%%ebx, %3\;lock{%;} cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+  "xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")
 
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
 ;; additions of constants.
-(define_insn "sync_old_add<mode>"
+(define_insn "atomic_fetch_add<mode>"
   [(set (match_operand:SWI 0 "register_operand" "=<r>")
 	(unspec_volatile:SWI
-	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  UNSPECV_XCHG))
    (set (match_dup 1)
 	(plus:SWI (match_dup 1)
 		  (match_operand:SWI 2 "nonmemory_operand" "0")))
@@ -186,7 +207,9 @@
 	(match_operand:SWI 2 "const_int_operand" ""))
    (parallel [(set (match_dup 0)
 		   (unspec_volatile:SWI
-		     [(match_operand:SWI 1 "memory_operand" "")] UNSPECV_XCHG))
+		     [(match_operand:SWI 1 "memory_operand" "")
+		      (match_operand:SI 4 "const_int_operand" "")]
+		     UNSPECV_XCHG))
 	      (set (match_dup 1)
 		   (plus:SWI (match_dup 1)
 			     (match_dup 0)))
@@ -199,17 +222,19 @@
       == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
    && !reg_overlap_mentioned_p (operands[0], operands[1])"
   [(parallel [(set (reg:CCZ FLAGS_REG)
-		   (compare:CCZ (unspec_volatile:SWI [(match_dup 1)]
-						     UNSPECV_XCHG)
-				(match_dup 3)))
+		   (compare:CCZ
+		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
+					  UNSPECV_XCHG)
+		     (match_dup 3)))
 	      (set (match_dup 1)
 		   (plus:SWI (match_dup 1)
 			     (match_dup 2)))])])
 
-(define_insn "*sync_old_add_cmp<mode>"
+(define_insn "*atomic_fetch_add_cmp<mode>"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (unspec_volatile:SWI
-		       [(match_operand:SWI 0 "memory_operand" "+m")]
+		       [(match_operand:SWI 0 "memory_operand" "+m")
+		        (match_operand:SI 3 "const_int_operand" "")]
 		       UNSPECV_XCHG)
 		     (match_operand:SWI 2 "const_int_operand" "i")))
    (set (match_dup 0)
@@ -232,35 +257,25 @@
   return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
-(define_expand "atomic_exchange<mode>"
-  [(match_operand:SWI 0 "register_operand" "")		;; output
-   (match_operand:SWI 1 "memory_operand" "")		;; memory
-   (match_operand:SWI 2 "register_operand" "")		;; input
-   (match_operand:SI  3 "const_int_operand" "")]	;; memory model
-  ""
-{
-  /* On i386 the xchg instruction is a full barrier.  Thus we
-     can completely ignore the memory model operand.  */
-  emit_insn (gen_sync_lock_test_and_set<mode>
-		(operands[0], operands[1], operands[2]));
-  DONE;
-})
-
 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
-(define_insn "sync_lock_test_and_set<mode>"
-  [(set (match_operand:SWI 0 "register_operand" "=<r>")
+;; In addition, it is always a full barrier, so we can ignore the memory model.
+(define_insn "atomic_exchange<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
 	(unspec_volatile:SWI
-	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  UNSPECV_XCHG))
    (set (match_dup 1)
-	(match_operand:SWI 2 "register_operand" "0"))]
+	(match_operand:SWI 2 "register_operand" "0"))]		;; input
   ""
   "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
 
-(define_insn "sync_add<mode>"
+(define_insn "atomic_add<mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(plus:SWI (match_dup 0)
-		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -279,11 +294,12 @@
   return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
-(define_insn "sync_sub<mode>"
+(define_insn "atomic_sub<mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(minus:SWI (match_dup 0)
-		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -296,14 +312,18 @@
 	return "lock{%;} inc{<imodesuffix>}\t%0";
     }
 
+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
+
   return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
-(define_insn "sync_<code><mode>"
+(define_insn "atomic_<code><mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(any_logic:SWI (match_dup 0)
-			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
-- 
1.7.6.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]