This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[4.8, PATCH 19/26] Backport Power8 and LE support: Quad memory atomic


Hi,

This patch (diff-quad-memory) backports support for quad-memory atomic
operations.

Thanks,
Bill


[gcc/testsuite]

2014-03-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	Back port from mainline
	2014-01-23  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/59909
	* gcc.target/powerpc/quad-atomic.c: New file to test power8 quad
	word atomic functions at runtime.

[gcc]

2014-03-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	Back port from mainline
	2014-01-23  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/59909
	* doc/invoke.texi (RS/6000 and PowerPC Options): Document
	-mquad-memory-atomic.  Update -mquad-memory documentation to say
	it is only used for non-atomic loads/stores.

	* config/rs6000/predicates.md (quad_int_reg_operand): Allow either
	-mquad-memory or -mquad-memory-atomic switches.

	* config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Add
	-mquad-memory-atomic to ISA 2.07 support.

	* config/rs6000/rs6000.opt (-mquad-memory-atomic): Add new switch
	to separate support of normal quad word memory operations (ldq,
	stq) from the atomic quad word memory operations.

	* config/rs6000/rs6000.c (rs6000_option_override_internal): Add
	support to separate non-atomic quad word operations from atomic
	quad word operations.  Disable non-atomic quad word operations in
	little endian mode so that we don't have to swap words after the
	load and before the store.
	(quad_load_store_p): Add comment about atomic quad word support.
	(rs6000_opt_masks): Add -mquad-memory-atomic to the list of
	options printed with -mdebug=reg.

	* config/rs6000/rs6000.h (TARGET_SYNC_TI): Use
	-mquad-memory-atomic as the test for whether we have quad word
	atomic instructions.
	(TARGET_SYNC_HI_QI): If either -mquad-memory-atomic,
	-mquad-memory, or -mp8-vector are used, allow byte/half-word
	atomic operations.

	* config/rs6000/sync.md (load_lockedti): Insure that the address
	is a proper indexed or indirect address for the lqarx instruction.
	On little endian systems, swap the hi/lo registers after the lqarx
	instruction.
	(load_lockedpti): Use indexed_or_indirect_operand predicate to
	insure the address is valid for the lqarx instruction.
	(store_conditionalti): Insure that the address is a proper indexed
	or indirect address for the stqcrx. instruction.  On little endian
	systems, swap the hi/lo registers before doing the stqcrx.
	instruction.
	(store_conditionalpti): Use indexed_or_indirect_operand predicate to
	insure the address is valid for the stqcrx. instruction.

	* gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros):
	Define __QUAD_MEMORY__ and __QUAD_MEMORY_ATOMIC__ based on what
	type of quad memory support is available.


Index: gcc-4_8-test/gcc/config/rs6000/predicates.md
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/predicates.md
+++ gcc-4_8-test/gcc/config/rs6000/predicates.md
@@ -270,7 +270,7 @@
 {
   HOST_WIDE_INT r;
 
-  if (!TARGET_QUAD_MEMORY)
+  if (!TARGET_QUAD_MEMORY && !TARGET_QUAD_MEMORY_ATOMIC)
     return 0;
 
   if (GET_CODE (op) == SUBREG)
@@ -633,6 +633,7 @@
        (match_test "offsettable_nonstrict_memref_p (op)")))
 
 ;; Return 1 if the operand is suitable for load/store quad memory.
+;; This predicate only checks for non-atomic loads/stores.
 (define_predicate "quad_memory_operand"
   (match_code "mem")
 {
Index: gcc-4_8-test/gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/rs6000-c.c
+++ gcc-4_8-test/gcc/config/rs6000/rs6000-c.c
@@ -337,6 +337,10 @@ rs6000_target_modify_macros (bool define
     rs6000_define_or_undefine_macro (define_p, "__HTM__");
   if ((flags & OPTION_MASK_P8_VECTOR) != 0)
     rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+  if ((flags & OPTION_MASK_QUAD_MEMORY) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY__");
+  if ((flags & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__");
   if ((flags & OPTION_MASK_CRYPTO) != 0)
     rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
 
Index: gcc-4_8-test/gcc/config/rs6000/rs6000-cpus.def
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/rs6000-cpus.def
+++ gcc-4_8-test/gcc/config/rs6000/rs6000-cpus.def
@@ -53,7 +53,8 @@
 				 | OPTION_MASK_CRYPTO			\
 				 | OPTION_MASK_DIRECT_MOVE		\
 				 | OPTION_MASK_HTM			\
-				 | OPTION_MASK_QUAD_MEMORY)
+				 | OPTION_MASK_QUAD_MEMORY		\
+  				 | OPTION_MASK_QUAD_MEMORY_ATOMIC)
 
 #define POWERPC_7400_MASK	(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
 
Index: gcc-4_8-test/gcc/config/rs6000/rs6000.c
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/rs6000.c
+++ gcc-4_8-test/gcc/config/rs6000/rs6000.c
@@ -3317,14 +3317,37 @@ rs6000_option_override_internal (bool gl
 
   /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
      silently turn off quad memory mode.  */
-  if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+  if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
     {
       if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
 	warning (0, N_("-mquad-memory requires 64-bit mode"));
 
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+	warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
+
+      rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
+			    | OPTION_MASK_QUAD_MEMORY_ATOMIC);
+    }
+
+  /* Non-atomic quad memory load/store are disabled for little endian, since
+     the words are reversed, but atomic operations can still be done by
+     swapping the words.  */
+  if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+	warning (0, N_("-mquad-memory is not available in little endian mode"));
+
       rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
     }
 
+  /* Assume if the user asked for normal quad memory instructions, they want
+     the atomic versions as well, unless they explicity told us not to use quad
+     word atomic instructions.  */
+  if (TARGET_QUAD_MEMORY
+      && !TARGET_QUAD_MEMORY_ATOMIC
+      && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
+    rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
+
   /* Enable power8 fusion if we are tuning for power8, even if we aren't
      generating power8 instructions.  */
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
@@ -5875,7 +5898,8 @@ direct_move_p (rtx op0, rtx op1)
   return false;
 }
 
-/* Return true if this is a load or store quad operation.  */
+/* Return true if this is a load or store quad operation.  This function does
+   not handle the atomic quad memory instructions.  */
 
 bool
 quad_load_store_p (rtx op0, rtx op1)
@@ -30675,6 +30699,7 @@ static struct rs6000_opt_mask const rs60
   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
   { "quad-memory",		OPTION_MASK_QUAD_MEMORY,	false, true  },
+  { "quad-memory-atomic",	OPTION_MASK_QUAD_MEMORY_ATOMIC,	false, true  },
   { "recip-precision",		OPTION_MASK_RECIP_PRECISION,	false, true  },
   { "string",			OPTION_MASK_STRING,		false, true  },
   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
Index: gcc-4_8-test/gcc/config/rs6000/rs6000.h
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/rs6000.h
+++ gcc-4_8-test/gcc/config/rs6000/rs6000.h
@@ -524,8 +524,11 @@ extern int rs6000_vector_align[];
 /* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
    in power7, so conditionalize them on p8 features.  TImode syncs need quad
    memory support.  */
-#define TARGET_SYNC_HI_QI	(TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
-#define TARGET_SYNC_TI		TARGET_QUAD_MEMORY
+#define TARGET_SYNC_HI_QI	(TARGET_QUAD_MEMORY			\
+				 || TARGET_QUAD_MEMORY_ATOMIC		\
+				 || TARGET_DIRECT_MOVE)
+
+#define TARGET_SYNC_TI		TARGET_QUAD_MEMORY_ATOMIC
 
 /* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
    to allocate the SDmode stack slot to get the value into the proper location
Index: gcc-4_8-test/gcc/config/rs6000/rs6000.opt
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/rs6000.opt
+++ gcc-4_8-test/gcc/config/rs6000/rs6000.opt
@@ -556,7 +556,11 @@ Use ISA 2.07 transactional memory (HTM)
 
 mquad-memory
 Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
-Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
+Generate the quad word memory instructions (lq/stq).
+
+mquad-memory-atomic
+Target Report Mask(QUAD_MEMORY_ATOMIC) Var(rs6000_isa_flags)
+Generate the quad word memory atomic instructions (lqarx/stqcx).
 
 mcompat-align-parm
 Target Report Var(rs6000_compat_align_parm) Init(1) Save
Index: gcc-4_8-test/gcc/config/rs6000/sync.md
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/sync.md
+++ gcc-4_8-test/gcc/config/rs6000/sync.md
@@ -204,25 +204,46 @@
   "<QHI:larx> %0,%y1"
   [(set_attr "type" "load_l")])
 
-;; Use PTImode to get even/odd register pairs
+;; Use PTImode to get even/odd register pairs.
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions.  Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
 (define_expand "load_lockedti"
   [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
    (use (match_operand:TI 1 "memory_operand" ""))]
   "TARGET_SYNC_TI"
 {
-  /* Use a temporary register to force getting an even register for the
-     lqarx/stqcrx. instructions.  Normal optimizations will eliminate this
-     extra copy.  */
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
   rtx pti = gen_reg_rtx (PTImode);
-  emit_insn (gen_load_lockedpti (pti, operands[1]));
-  emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+
+  if (!indexed_or_indirect_operand (op1, TImode))
+    {
+      rtx old_addr = XEXP (op1, 0);
+      rtx new_addr = force_reg (Pmode, old_addr);
+      operands[1] = op1 = change_address (op1, TImode, new_addr);
+    }
+
+  emit_insn (gen_load_lockedpti (pti, op1));
+  if (WORDS_BIG_ENDIAN)
+    emit_move_insn (op0, gen_lowpart (TImode, pti));
+  else
+    {
+      emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti));
+      emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti));
+    }
   DONE;
 })
 
 (define_insn "load_lockedpti"
   [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
 	(unspec_volatile:PTI
-         [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+         [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))]
   "TARGET_SYNC_TI
    && !reg_mentioned_p (operands[0], operands[1])
    && quad_int_reg_operand (operands[0], PTImode)"
@@ -238,6 +259,14 @@
   "<stcx> %2,%y1"
   [(set_attr "type" "store_c")])
 
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions.  Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
 (define_expand "store_conditionalti"
   [(use (match_operand:CC 0 "cc_reg_operand" ""))
    (use (match_operand:TI 1 "memory_operand" ""))
@@ -247,21 +276,36 @@
   rtx op0 = operands[0];
   rtx op1 = operands[1];
   rtx op2 = operands[2];
-  rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
-  rtx pti_op2 = gen_reg_rtx (PTImode);
+  rtx addr = XEXP (op1, 0);
+  rtx pti_mem;
+  rtx pti_reg;
+
+  if (!indexed_or_indirect_operand (op1, TImode))
+    {
+      rtx new_addr = force_reg (Pmode, addr);
+      operands[1] = op1 = change_address (op1, TImode, new_addr);
+      addr = new_addr;
+    }
+
+  pti_mem = change_address (op1, PTImode, addr);
+  pti_reg = gen_reg_rtx (PTImode);
+
+  if (WORDS_BIG_ENDIAN)
+    emit_move_insn (pti_reg, gen_lowpart (PTImode, op2));
+  else
+    {
+      emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op2));
+      emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op2));
+    }
 
-  /* Use a temporary register to force getting an even register for the
-     lqarx/stqcrx. instructions.  Normal optimizations will eliminate this
-     extra copy.  */
-  emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
-  emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+  emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg));
   DONE;
 })
 
 (define_insn "store_conditionalpti"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x")
 	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
-   (set (match_operand:PTI 1 "memory_operand" "=Z")
+   (set (match_operand:PTI 1 "indexed_or_indirect_operand" "=Z")
 	(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
   "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
   "stqcx. %2,%y1"
Index: gcc-4_8-test/gcc/doc/invoke.texi
===================================================================
--- gcc-4_8-test.orig/gcc/doc/invoke.texi
+++ gcc-4_8-test/gcc/doc/invoke.texi
@@ -858,7 +858,9 @@ See RS/6000 and PowerPC Options.
 -msave-toc-indirect -mno-save-toc-indirect @gol
 -mpower8-fusion -mno-mpower8-fusion -mpower8-vector -mno-power8-vector @gol
 -mcrypto -mno-crypto -mdirect-move -mno-direct-move @gol
--mquad-memory -mno-quad-memory}
+-mquad-memory -mno-quad-memory @gol
+-mquad-memory-atomic -mno-quad-memory-atomic @gol
+-mcompat-align-parm -mno-compat-align-parm}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -17243,8 +17245,8 @@ following options:
 -mpopcntb -mpopcntd  -mpowerpc64 @gol
 -mpowerpc-gpopt  -mpowerpc-gfxopt  -msingle-float -mdouble-float @gol
 -msimple-fpu -mstring  -mmulhw  -mdlmzb  -mmfpgpr -mvsx @gol
--mcrypto -mdirect-move -mpower8-fusion -mpower8-vector -mquad-memory @gol
--mcompat-align-parm -mno-compat-align-parm}
+-mcrypto -mdirect-move -mpower8-fusion -mpower8-vector @gol
+-mquad-memory -mquad-memory-atomic}
 
 The particular options set for any particular CPU varies between
 compiler versions, depending on what setting seems to produce optimal
@@ -17399,10 +17401,18 @@ the vector instructions.
 @itemx -mno-quad-memory
 @opindex mquad-memory
 @opindex mno-quad-memory
-Generate code that uses (does not use) the quad word memory
+Generate code that uses (does not use) the non-atomic quad word memory
 instructions.  The @option{-mquad-memory} option requires use of
 64-bit mode.
 
+@item -mquad-memory-atomic
+@itemx -mno-quad-memory-atomic
+@opindex mquad-memory-atomic
+@opindex mno-quad-memory-atomic
+Generate code that uses (does not use) the atomic quad word memory
+instructions.  The @option{-mquad-memory-atomic} option requires use of
+64-bit mode.
+
 @item -mfloat-gprs=@var{yes/single/double/no}
 @itemx -mfloat-gprs
 @opindex mfloat-gprs
Index: gcc-4_8-test/gcc/testsuite/gcc.target/powerpc/quad-atomic.c
===================================================================
--- /dev/null
+++ gcc-4_8-test/gcc/testsuite/gcc.target/powerpc/quad-atomic.c
@@ -0,0 +1,67 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Test whether we get the right bits for quad word atomic instructions.  */
+#include <stdlib.h>
+
+static __int128_t quad_fetch_and (__int128_t *, __int128_t value) __attribute__((__noinline__));
+static __int128_t quad_fetch_or  (__int128_t *, __int128_t value) __attribute__((__noinline__));
+static __int128_t quad_fetch_add (__int128_t *, __int128_t value) __attribute__((__noinline__));
+
+static __int128_t
+quad_fetch_and (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+static __int128_t
+quad_fetch_or (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+static __int128_t
+quad_fetch_add (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+int
+main (void)
+{
+  __int128_t result;
+  __int128_t value;
+  __int128_t and_input	= ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
+  __int128_t and_value	= ((((__int128_t) 0xfffffffffffffff0ULL) << 64) | ((__int128_t) 0xfffffffffffffff0ULL));
+  __int128_t and_exp	= ((((__int128_t) 0x1234567890abcde0ULL) << 64) | ((__int128_t) 0xfedcba0987654320ULL));
+
+  __int128_t or_input	= ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
+  __int128_t or_value	= ((((__int128_t) 0x0000000000000010ULL) << 64) | ((__int128_t) 0x000000000000000eULL));
+  __int128_t or_exp	= ((((__int128_t) 0x1234567890abcdffULL) << 64) | ((__int128_t) 0xfedcba098765432fULL));
+
+  __int128_t add_input	= ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
+  __int128_t add_value	= ((((__int128_t) 0x0000000001000000ULL) << 64) | ((__int128_t) 0x0000001000000000ULL));
+  __int128_t add_exp	= ((((__int128_t) 0x1234567891abcdefULL) << 64) | ((__int128_t) 0xfedcba1987654321ULL));
+
+
+  value = and_input;
+  result = quad_fetch_and (&value, and_value);
+  if (result != and_input || value != and_exp)
+    abort ();
+
+  value = or_input;
+  result = quad_fetch_or (&value, or_value);
+  if (result != or_input || value != or_exp)
+    abort ();
+
+  value = add_input;
+  result = quad_fetch_add (&value, add_value);
+  if (result != add_input || value != add_exp)
+    abort ();
+
+  return 0;
+}
+




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]