This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [trans-mem] rms-tm bug report


> How about callbacks:
> 
>   tree (* builtin_tm_load)(tree type, tree *ptype);
>   tree (* builtin_tm_store)(tree type, tree *ptype);
> 
...
>   typedef int _ITM_TYPE_M64 __attribute__((vector_size(8), may_alias));
>   typedef float _ITM_TYPE_M128 __attribute__((vector_size(16), may_alias));
>   typedef float _ITM_TYPE_M256 __attribute__((vector_size(32), may_alias));
> 
> I.e. v2di, v4sf, v8sf.

Let's see if you like this approach.

First, I only used the TYPE argument for the callbacks, as the caller of
the callback already handles the appropriate cast.  See
built_tm_{load,store}.

Unfortunately I needed some more callbacks to implement
is_tm_*{load,store}.

I added a FIXME for adding vector logging functions later.  Right now
they'll be handled with the ubiquitous ITM_LB.

Phew... this is a far bigger patch than I expected.

OK for branch?

	* targhooks.c (default_builtin_tm_load_store): New.
	(default_tm_vector_p): New.
	* targhooks.h (default_builtin_tm_load_store): Declare.
	(default_tm_vector_p): Declare.
	* target.h (struct gcc_target): Add builtin_tm_load,
	builtin_tm_store, tm_vector_load_p, tm_vector_simple_load_p,
	tm_vector_store_p, tm_vector_simple_store_p.
	* trans-mem.c (is_tm_load): Call tm_vector_load_p callback.
	(is_tm_simple_load): Call tm_vector_simple_load_p.
	(is_tm_store): Call tm_vector_store_p callback.
	(is_tm_simple_store): Call tm_vector_simple_store_p.
	(transaction_invariant_address_p): Handle MISALIGNED_INDIRECT_REF.
	(tm_log_emit_stmt): Add FIXME note.
	(requires_barrier): Handle MISALIGNED_INDIRECT_REF.
	(build_tm_load): Call builtin_tm_load callback.
	(build_tm_store): Call builtin_tm_store callback.
	* target-def.h (TARGET_VECTORIZE_BUILTIN_TM_LOAD): Define.
	(TARGET_VECTORIZE_BUILTIN_TM_STORE): Same.
	(TARGET_VECTORIZE_TM_VECTOR_LOAD_P): Same.
	(TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P): Same.
	(TARGET_VECTORIZE_TM_VECTOR_STORE_P): Same.
	(TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P): Same.
	(TARGET_VECTORIZE): Add TM callbacks.
	* config/i386/i386-builtin-types.def (PV2SI): Define.
	(PCV2SI): Define.
	Define V2SI_FTYPE_PCV2SI.
	Define V4SF_FTYPE_PCV4SF.
	Define V8SF_FTYPE_PCV8SF.
	Define VOID_PV2SI_V2SI.
	* config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_TM_*.
	Declare bdesc_tm.
	(ix86_init_tm_builtins): New.
	(ix86_init_builtins): Initialize TM builtins.
	(ix86_tm_vector_load_p): New.
	(ix86_tm_vector_simple_load_p): New.
	(ix86_tm_vector_store_p): New.
	(ix86_tm_vector_simple_store_p): New.
	(ix86_builtin_tm_load): New.
	(ix86_builtin_tm_store): New.
	Define TARGET_VECTORIZE* transactional variants.

Index: targhooks.c
===================================================================
--- targhooks.c	(revision 159821)
+++ targhooks.c	(working copy)
@@ -1009,4 +1009,16 @@ default_have_conditional_execution (void
 #endif
 }
 
+tree
+default_builtin_tm_load_store (tree ARG_UNUSED (type))
+{
+  return NULL_TREE;
+}
+
+bool
+default_tm_vector_p (enum built_in_function ARG_UNUSED (code))
+{
+  return false;
+}
+
 #include "gt-targhooks.h"
Index: targhooks.h
===================================================================
--- targhooks.h	(revision 159821)
+++ targhooks.h	(working copy)
@@ -132,3 +132,6 @@ extern bool default_addr_space_subset_p 
 extern rtx default_addr_space_convert (rtx, tree, tree);
 extern unsigned int default_case_values_threshold (void);
 extern bool default_have_conditional_execution (void);
+
+extern tree default_builtin_tm_load_store (tree);
+extern bool default_tm_vector_p (enum built_in_function);
Index: target.h
===================================================================
--- target.h	(revision 159821)
+++ target.h	(working copy)
@@ -506,6 +506,19 @@ struct gcc_target
        is true if the access is defined in a packed struct.  */
     bool (* builtin_support_vector_misalignment) (enum machine_mode,
                                                   const_tree, int, bool);
+
+    /* Target builtin that implements transactional memory load.  */
+    tree (* builtin_tm_load) (tree);
+    /* Target builtin that implements transactional memory store.  */
+    tree (* builtin_tm_store) (tree);
+    /* Return true if a given built-in is a TM vector load.  */
+    bool (* tm_vector_load_p) (unsigned int /*enum built_in_function*/);
+    /* Same as above, but exclude the RaR, RaW, and RfW variants.  */
+    bool (* tm_vector_simple_load_p) (unsigned int /*enum built_in_function*/);
+    /* Return true if a given built-in is a TM vector store.  */
+    bool (* tm_vector_store_p) (unsigned int /*enum built_in_function*/);
+    /* Same as above, but exclude the WaR and WaW variants.  */
+    bool (* tm_vector_simple_store_p) (unsigned int /*enum built_in_function*/);
   } vectorize;
 
   /* The initial value of target_flags.  */
Index: testsuite/gcc.dg/tm/vector-1.c
===================================================================
--- testsuite/gcc.dg/tm/vector-1.c	(revision 0)
+++ testsuite/gcc.dg/tm/vector-1.c	(revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -O3" } */
+
+/* On x86-64, the vectorizer creates V2DI uses which we must handle.
+   Similarly for other vector architectures.  */
+
+void ** newElements;
+
+__attribute__((transaction_safe))
+long
+TMqueue_push (void** queuePtr)
+{  
+   long src;
+   for (src = 1; src < 9; src++) {
+     newElements[src+1] = queuePtr[src];
+   }
+   return 1;
+}
Index: testsuite/g++.dg/tm/vector-1.C
===================================================================
--- testsuite/g++.dg/tm/vector-1.C	(revision 0)
+++ testsuite/g++.dg/tm/vector-1.C	(revision 0)
@@ -0,0 +1,15 @@
+// { dg-do compile }
+// { dg-options "-fgnu-tm -O3" }
+
+class HashTree
+{
+   __attribute__((transaction_safe)) void rehash();
+   HashTree **Hash_table;
+   int Hash_function;
+};
+
+__attribute__((transaction_safe)) void HashTree::rehash()
+{
+   for (int i=0; i < Hash_function; i++)
+      Hash_table[i] = 0;
+}
Index: trans-mem.c
===================================================================
--- trans-mem.c	(revision 159821)
+++ trans-mem.c	(working copy)
@@ -321,8 +321,13 @@ is_tm_load (gimple stmt)
     return false;
 
   fndecl = gimple_call_fndecl (stmt);
-  return (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
-	  && BUILTIN_TM_LOAD_P (DECL_FUNCTION_CODE (fndecl)));
+  if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
+      return (BUILTIN_TM_LOAD_P (code)
+	      || targetm.vectorize.tm_vector_load_p (code));
+    }
+  return false;
 }
 
 /* Same as above, but for simple TM loads, that is, not the
@@ -346,7 +351,8 @@ is_tm_simple_load (gimple stmt)
 	      || fcode == BUILT_IN_TM_LOAD_8
 	      || fcode == BUILT_IN_TM_LOAD_FLOAT
 	      || fcode == BUILT_IN_TM_LOAD_DOUBLE
-	      || fcode == BUILT_IN_TM_LOAD_LDOUBLE);
+	      || fcode == BUILT_IN_TM_LOAD_LDOUBLE
+	      || targetm.vectorize.tm_vector_simple_load_p (fcode));
     }
   return false;
 }
@@ -362,8 +368,13 @@ is_tm_store (gimple stmt)
     return false;
 
   fndecl = gimple_call_fndecl (stmt);
-  return (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
-	  && BUILTIN_TM_STORE_P (DECL_FUNCTION_CODE (fndecl)));
+  if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
+      return (BUILTIN_TM_STORE_P (fcode)
+	      || targetm.vectorize.tm_vector_store_p (fcode));
+    }
+  return false;
 }
 
 /* Same as above, but for simple TM stores, that is, not the
@@ -387,7 +398,8 @@ is_tm_simple_store (gimple stmt)
 	      || fcode == BUILT_IN_TM_STORE_8
 	      || fcode == BUILT_IN_TM_STORE_FLOAT
 	      || fcode == BUILT_IN_TM_STORE_DOUBLE
-	      || fcode == BUILT_IN_TM_STORE_LDOUBLE);
+	      || fcode == BUILT_IN_TM_STORE_LDOUBLE
+	      || targetm.vectorize.tm_vector_simple_store_p (fcode));
     }
   return false;
 }
@@ -889,7 +901,8 @@ tm_log_delete (void)
 static bool
 transaction_invariant_address_p (const_tree mem, basic_block region_entry_block)
 {
-  if (TREE_CODE (mem) == INDIRECT_REF
+  if ((TREE_CODE (mem) == INDIRECT_REF
+       || TREE_CODE (mem) == MISALIGNED_INDIRECT_REF)
       && TREE_CODE (TREE_OPERAND (mem, 0)) == SSA_NAME)
     {
       basic_block def_bb;
@@ -1039,6 +1052,7 @@ tm_log_emit_stmt (tree addr, gimple stmt
 	code = BUILT_IN_TM_LOG_8;
 	break;
       default:
+	/* FIXME: Add support for vector logging functions.  */
 	code = BUILT_IN_TM_LOG;
 	break;
       }
@@ -1358,6 +1372,7 @@ requires_barrier (basic_block entry_bloc
   switch (TREE_CODE (x))
     {
     case INDIRECT_REF:
+    case MISALIGNED_INDIRECT_REF:
       {
 	enum thread_memory_type ret;
 
@@ -1376,7 +1391,6 @@ requires_barrier (basic_block entry_bloc
       }
 
     case ALIGN_INDIRECT_REF:
-    case MISALIGNED_INDIRECT_REF:
       /* ??? Insert an irrevocable when it comes to vectorized loops,
 	 or handle these somehow.  */
       gcc_unreachable ();
@@ -1870,7 +1884,7 @@ static gimple
 build_tm_load (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator *gsi)
 {
   enum built_in_function code = END_BUILTINS;
-  tree t, type = TREE_TYPE (rhs);
+  tree t, type = TREE_TYPE (rhs), decl;
   gimple gcall;
 
   if (type == float_type_node)
@@ -1900,13 +1914,19 @@ build_tm_load (location_t loc, tree lhs,
     }
 
   if (code == END_BUILTINS)
-    return NULL;
+    {
+      decl = targetm.vectorize.builtin_tm_load (type);
+      if (!decl)
+	return NULL;
+    }
+  else
+    decl = built_in_decls[code];
 
   t = gimplify_addr (gsi, rhs);
-  gcall = gimple_build_call (built_in_decls[code], 1, t);
+  gcall = gimple_build_call (decl, 1, t);
   gimple_set_location (gcall, loc);
 
-  t = TREE_TYPE (TREE_TYPE (built_in_decls[code]));
+  t = TREE_TYPE (TREE_TYPE (decl));
   if (useless_type_conversion_p (type, t))
     {
       gimple_call_set_lhs (gcall, lhs);
@@ -1966,9 +1986,14 @@ build_tm_store (location_t loc, tree lhs
     }
 
   if (code == END_BUILTINS)
-    return NULL;
+    {
+      fn = targetm.vectorize.builtin_tm_store (type);
+      if (!fn)
+	return NULL;
+    }
+  else
+    fn = built_in_decls[code];
 
-  fn = built_in_decls[code];
   simple_type = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (fn))));
 
   if (!useless_type_conversion_p (simple_type, type))
@@ -1986,7 +2011,7 @@ build_tm_store (location_t loc, tree lhs
     }
 
   t = gimplify_addr (gsi, lhs);
-  gcall = gimple_build_call (built_in_decls[code], 2, t, rhs);
+  gcall = gimple_build_call (fn, 2, t, rhs);
   gimple_set_location (gcall, loc);
   gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
   
Index: target-def.h
===================================================================
--- target-def.h	(revision 159821)
+++ target-def.h	(working copy)
@@ -400,6 +400,18 @@
   hook_bool_tree_tree_true
 #define TARGET_SUPPORT_VECTOR_MISALIGNMENT \
   default_builtin_support_vector_misalignment
+#define TARGET_VECTORIZE_BUILTIN_TM_LOAD \
+  default_builtin_tm_load_store
+#define TARGET_VECTORIZE_BUILTIN_TM_STORE \
+  default_builtin_tm_load_store
+#define TARGET_VECTORIZE_TM_VECTOR_LOAD_P \
+  default_tm_vector_p
+#define TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P \
+  default_tm_vector_p
+#define TARGET_VECTORIZE_TM_VECTOR_STORE_P \
+  default_tm_vector_p
+#define TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P \
+  default_tm_vector_p
 
 
 #define TARGET_VECTORIZE                                                \
@@ -413,7 +425,13 @@
     TARGET_VECTOR_ALIGNMENT_REACHABLE,                                  \
     TARGET_VECTORIZE_BUILTIN_VEC_PERM,					\
     TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK,				\
-    TARGET_SUPPORT_VECTOR_MISALIGNMENT					\
+    TARGET_SUPPORT_VECTOR_MISALIGNMENT,					\
+    TARGET_VECTORIZE_BUILTIN_TM_LOAD,                                   \
+    TARGET_VECTORIZE_BUILTIN_TM_STORE,                                  \
+    TARGET_VECTORIZE_TM_VECTOR_LOAD_P,			                \
+    TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P,                           \
+    TARGET_VECTORIZE_TM_VECTOR_STORE_P,                                 \
+    TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P				\
   }
 
 #define TARGET_DEFAULT_TARGET_FLAGS 0
Index: config/i386/i386-builtin-types.def
===================================================================
--- config/i386/i386-builtin-types.def	(revision 159821)
+++ config/i386/i386-builtin-types.def	(working copy)
@@ -111,6 +111,7 @@ DEF_POINTER_TYPE (PINT, INT)
 DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
 DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
 
+DEF_POINTER_TYPE (PV2SI, V2SI)
 DEF_POINTER_TYPE (PV2DF, V2DF)
 DEF_POINTER_TYPE (PV2DI, V2DI)
 DEF_POINTER_TYPE (PV2SF, V2SF)
@@ -119,6 +120,7 @@ DEF_POINTER_TYPE (PV4DI, V4DI)
 DEF_POINTER_TYPE (PV4SF, V4SF)
 DEF_POINTER_TYPE (PV8SF, V8SF)
 
+DEF_POINTER_TYPE (PCV2SI, V2SI, CONST)
 DEF_POINTER_TYPE (PCV2DF, V2DF, CONST)
 DEF_POINTER_TYPE (PCV2SF, V2SF, CONST)
 DEF_POINTER_TYPE (PCV4DF, V4DF, CONST)
@@ -164,6 +166,7 @@ DEF_FUNCTION_TYPE (V2SF, V2SI)
 DEF_FUNCTION_TYPE (V2SI, V2DF)
 DEF_FUNCTION_TYPE (V2SI, V2SF)
 DEF_FUNCTION_TYPE (V2SI, V2SI)
+DEF_FUNCTION_TYPE (V2SI, PCV2SI)
 DEF_FUNCTION_TYPE (V2SI, V4SF)
 DEF_FUNCTION_TYPE (V32QI, PCCHAR)
 DEF_FUNCTION_TYPE (V4DF, PCDOUBLE)
@@ -177,6 +180,7 @@ DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
 DEF_FUNCTION_TYPE (V4SF, V2DF)
 DEF_FUNCTION_TYPE (V4SF, V4DF)
 DEF_FUNCTION_TYPE (V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, PCV4SF)
 DEF_FUNCTION_TYPE (V4SF, V4SI)
 DEF_FUNCTION_TYPE (V4SF, V8SF)
 DEF_FUNCTION_TYPE (V4SI, V16QI)
@@ -191,6 +195,7 @@ DEF_FUNCTION_TYPE (V8HI, V8HI)
 DEF_FUNCTION_TYPE (V8QI, V8QI)
 DEF_FUNCTION_TYPE (V8SF, PCFLOAT)
 DEF_FUNCTION_TYPE (V8SF, PCV4SF)
+DEF_FUNCTION_TYPE (V8SF, PCV8SF)
 DEF_FUNCTION_TYPE (V8SF, V4SF)
 DEF_FUNCTION_TYPE (V8SF, V8SF)
 DEF_FUNCTION_TYPE (V8SF, V8SI)
@@ -296,9 +301,12 @@ DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF)
 DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF)
 DEF_FUNCTION_TYPE (VOID, PINT, INT)
 DEF_FUNCTION_TYPE (VOID, PULONGLONG, ULONGLONG)
+DEF_FUNCTION_TYPE (VOID, PV2SI, V2SI)
 DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI)
 DEF_FUNCTION_TYPE (VOID, PV2SF, V4SF)
 DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI)
+DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF)
+DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
 
 DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 159821)
+++ config/i386/i386.c	(working copy)
@@ -21418,6 +21418,29 @@ enum ix86_builtins
 
   IX86_BUILTIN_CLZS,
 
+  /* TM vector builtins.  Note: These are in order.  */
+  IX86_BUILTIN_TM_LOAD_M64,
+  IX86_BUILTIN_TM_LOAD_RAR_M64,
+  IX86_BUILTIN_TM_LOAD_RAW_M64,
+  IX86_BUILTIN_TM_LOAD_RFW_M64,
+  IX86_BUILTIN_TM_LOAD_M128,
+  IX86_BUILTIN_TM_LOAD_RAR_M128,
+  IX86_BUILTIN_TM_LOAD_RAW_M128,
+  IX86_BUILTIN_TM_LOAD_RFW_M128,
+  IX86_BUILTIN_TM_LOAD_M256,
+  IX86_BUILTIN_TM_LOAD_RAR_M256,
+  IX86_BUILTIN_TM_LOAD_RAW_M256,
+  IX86_BUILTIN_TM_LOAD_RFW_M256,
+  IX86_BUILTIN_TM_STORE_M64,
+  IX86_BUILTIN_TM_STORE_WAR_M64,
+  IX86_BUILTIN_TM_STORE_WAW_M64,
+  IX86_BUILTIN_TM_STORE_M128,
+  IX86_BUILTIN_TM_STORE_WAR_M128,
+  IX86_BUILTIN_TM_STORE_WAW_M128,
+  IX86_BUILTIN_TM_STORE_M256,
+  IX86_BUILTIN_TM_STORE_WAR_M256,
+  IX86_BUILTIN_TM_STORE_WAW_M256,
+
   IX86_BUILTIN_MAX
 };
 
@@ -22319,6 +22342,34 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
 };
 
+/* TM vector builtins.  */
+static const struct builtin_description bdesc_tm[] =
+{
+  { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_WM64", IX86_BUILTIN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_WaRM64", IX86_BUILTIN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_WaWM64", IX86_BUILTIN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_RM64", IX86_BUILTIN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_RaRM64", IX86_BUILTIN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_RaWM64", IX86_BUILTIN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_RfWM64", IX86_BUILTIN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+
+  { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_WM128", IX86_BUILTIN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_WaRM128", IX86_BUILTIN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_WaWM128", IX86_BUILTIN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_RM128", IX86_BUILTIN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+  { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_RaRM128", IX86_BUILTIN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+  { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_RaWM128", IX86_BUILTIN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+  { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_RfWM128", IX86_BUILTIN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+
+  { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_WM256", IX86_BUILTIN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_WaRM256", IX86_BUILTIN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_WaWM256", IX86_BUILTIN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_RM256", IX86_BUILTIN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+  { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_RaRM256", IX86_BUILTIN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+  { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_RaWM256", IX86_BUILTIN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+  { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_RfWM256", IX86_BUILTIN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+};
+
 /* FMA4 and XOP.  */
 #define MULTI_ARG_4_DF2_DI_I	V2DF_FTYPE_V2DF_V2DF_V2DI_INT
 #define MULTI_ARG_4_DF2_DI_I1	V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -22573,6 +22624,44 @@ static const struct builtin_description 
 
 };
 
+
+/* Initialize the transactional memory vector load/store builtins.  */
+
+static void
+ix86_init_tm_builtins (void)
+{
+  enum ix86_builtin_func_type ftype;
+  const struct builtin_description *d;
+  size_t i;
+  tree decl, attrs;
+
+  if (!flag_tm)
+    return;
+
+  attrs = tree_cons (get_identifier ("transaction_pure"), NULL, NULL);
+
+  for (i = 0, d = bdesc_tm;
+       i < ARRAY_SIZE (bdesc_tm);
+       i++, d++)
+    {
+      if ((d->mask & ix86_isa_flags) != 0
+	  || (lang_hooks.builtin_function
+	      == lang_hooks.builtin_function_ext_scope))
+	{
+	  ftype = (enum ix86_builtin_func_type) d->flag;
+	  tree type = ix86_get_builtin_func_type (ftype);
+
+	  decl = add_builtin_function (d->name, type, d->code, BUILT_IN_NORMAL,
+				       /* The builtin without the prefix for
+					  calling it directly.  */
+				       d->name + strlen ("__builtin_"),
+				       attrs);
+	  ix86_builtins[(int) d->code] = decl;
+	  ix86_builtins_isa[(int) d->code].set_and_not_built_p = false;
+	}
+    }
+}
+
 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
    in the current target ISA to allow the user to compile particular modules
    with different target specific options that differ from the command line
@@ -22855,6 +22944,7 @@ ix86_init_builtins (void)
   TREE_READONLY (t) = 1;
   ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
 
+  ix86_init_tm_builtins ();
   ix86_init_mmx_sse_builtins ();
 
   if (TARGET_64BIT)
@@ -24978,7 +25068,88 @@ avx_vperm2f128_parallel (rtx par, enum m
   return mask + 1;
 }
 
+/* TM callbacks.  */
+
+/* Return true if CODE is one of the TM vector loads.  */
+
+static bool
+ix86_tm_vector_load_p (enum built_in_function code)
+{
+  return (code >= IX86_BUILTIN_TM_LOAD_M64
+	  && code <= IX86_BUILTIN_TM_LOAD_RFW_M256);
+}
+
+/* Same as above, but exclude the RaR, RaW, RfW variants.  */
+
+static bool
+ix86_tm_vector_simple_load_p (enum built_in_function code)
+{
+  return (code == IX86_BUILTIN_TM_LOAD_M64
+	  || code == IX86_BUILTIN_TM_LOAD_M128
+	  || code == IX86_BUILTIN_TM_LOAD_M256);
+}
+
+/* Return true if CODE is one of the TM vector stores.  */
 
+static bool
+ix86_tm_vector_store_p (enum built_in_function code)
+{
+  return (code >= IX86_BUILTIN_TM_STORE_M64
+	  && code <= IX86_BUILTIN_TM_STORE_WAW_M256);
+}
+
+/* Same as above, but exclude the WaR and WaW variants.  */
+
+static bool
+ix86_tm_vector_simple_store_p (enum built_in_function code)
+{
+  return (code == IX86_BUILTIN_TM_STORE_M64
+	  || code == IX86_BUILTIN_TM_STORE_M128
+	  || code == IX86_BUILTIN_TM_STORE_M256);
+}
+
+/* Return the builtin decl needed to load a vector of TYPE.  */
+
+static tree
+ix86_builtin_tm_load (tree type)
+{
+  if (TYPE_SIZE_UNIT (type) != NULL
+      && host_integerp (TYPE_SIZE_UNIT (type), 1))
+    {
+      switch (tree_low_cst (TYPE_SIZE_UNIT (type), 1) * BITS_PER_UNIT)
+	{
+	case 64:
+	  return ix86_builtins[IX86_BUILTIN_TM_LOAD_M64];
+	case 128:
+	  return ix86_builtins[IX86_BUILTIN_TM_LOAD_M128];
+	case 256:
+	  return ix86_builtins[IX86_BUILTIN_TM_LOAD_M256];
+	}
+    }
+  return NULL_TREE;
+}
+
+/* Return the builtin decl needed to store a vector of TYPE.  */
+
+static tree
+ix86_builtin_tm_store (tree type)
+{
+  if (TYPE_SIZE_UNIT (type) != NULL
+      && host_integerp (TYPE_SIZE_UNIT (type), 1))
+    {
+      switch (tree_low_cst (TYPE_SIZE_UNIT (type), 1) * BITS_PER_UNIT)
+	{
+	case 64:
+	  return ix86_builtins[IX86_BUILTIN_TM_STORE_M64];
+	case 128:
+	  return ix86_builtins[IX86_BUILTIN_TM_STORE_M128];
+	case 256:
+	  return ix86_builtins[IX86_BUILTIN_TM_STORE_M256];
+	}
+    }
+  return NULL_TREE;
+}
+
 /* Store OPERAND to the memory after reload is completed.  This means
    that we can't easily use assign_stack_local.  */
 rtx
@@ -30514,6 +30685,24 @@ ix86_enum_va_list (int idx, const char *
 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
 
+#undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
+#define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
+
+#undef TARGET_VECTORIZE_BUILTIN_TM_STORE
+#define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
+
+#undef TARGET_VECTORIZE_TM_VECTOR_LOAD_P
+#define TARGET_VECTORIZE_TM_VECTOR_LOAD_P ix86_tm_vector_load_p
+
+#undef TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P
+#define TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P ix86_tm_vector_simple_load_p
+
+#undef TARGET_VECTORIZE_TM_VECTOR_STORE_P
+#define TARGET_VECTORIZE_TM_VECTOR_STORE_P ix86_tm_vector_store_p
+
+#undef TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P
+#define TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P ix86_tm_vector_simple_store_p
+
 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]