This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [trans-mem] rms-tm bug report
- From: Aldy Hernandez <aldyh at redhat dot com>
- To: Richard Henderson <rth at redhat dot com>
- Cc: Patrick Marlier <patrick dot marlier at unine dot ch>, FELBER Pascal <pascal dot felber at unine dot ch>, Javier Arias <javier dot arias at bsc dot es>, "gokcen dot kestor at bsc dot es" <gokcen dot kestor at bsc dot es>, gcc-patches at gcc dot gnu dot org
- Date: Fri, 28 May 2010 17:53:47 -0400
- Subject: Re: [trans-mem] rms-tm bug report
- References: <4BF4DD38.3080708@unine.ch> <20100524171655.GA19900@redhat.com> <20100524171812.GB19900@redhat.com> <4BFAEE43.40709@redhat.com> <20100525141103.GA26234@redhat.com> <4BFBE254.5010309@redhat.com>
> How about callbacks:
>
> tree (* builtin_tm_load)(tree type, tree *ptype);
> tree (* builtin_tm_store)(tree type, tree *ptype);
>
...
> typedef int _ITM_TYPE_M64 __attribute__((vector_size(8), may_alias));
> typedef float _ITM_TYPE_M128 __attribute__((vector_size(16), may_alias));
> typedef float _ITM_TYPE_M256 __attribute__((vector_size(32), may_alias));
>
> I.e. v2di, v4sf, v8sf.
Let's see if you like this approach.
First, I only used the TYPE argument for the callbacks, as the caller of
the callback already handles the appropriate cast. See
built_tm_{load,store}.
Unfortunately I needed some more callbacks to implement
is_tm_*{load,store}.
I added a FIXME for adding vector logging functions later. Right now
they'll be handled with the ubiquitous ITM_LB.
Phew... this is a far bigger patch than I expected.
OK for branch?
* targhooks.c (default_builtin_tm_load_store): New.
(default_tm_vector_p): New.
* targhooks.h (default_builtin_tm_load_store): Declare.
(default_tm_vector_p): Declare.
* target.h (struct gcc_target): Add builtin_tm_load,
builtin_tm_store, tm_vector_load_p, tm_vector_simple_load_p,
tm_vector_store_p, tm_vector_simple_store_p.
* trans-mem.c (is_tm_load): Call tm_vector_load_p callback.
(is_tm_simple_load): Call tm_vector_simple_load_p.
(is_tm_store): Call tm_vector_store_p callback.
(is_tm_simple_store): Call tm_vector_simple_store_p.
(transaction_invariant_address_p): Handle MISALIGNED_INDIRECT_REF.
(tm_log_emit_stmt): Add FIXME note.
(requires_barrier): Handle MISALIGNED_INDIRECT_REF.
(build_tm_load): Call builtin_tm_load callback.
(build_tm_store): Call builtin_tm_store callback.
* target-def.h (TARGET_VECTORIZE_BUILTIN_TM_LOAD): Define.
(TARGET_VECTORIZE_BUILTIN_TM_STORE): Same.
(TARGET_VECTORIZE_TM_VECTOR_LOAD_P): Same.
(TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P): Same.
(TARGET_VECTORIZE_TM_VECTOR_STORE_P): Same.
(TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P): Same.
(TARGET_VECTORIZE): Add TM callbacks.
* config/i386/i386-builtin-types.def (PV2SI): Define.
(PCV2SI): Define.
Define V2SI_FTYPE_PCV2SI.
Define V4SF_FTYPE_PCV4SF.
Define V8SF_FTYPE_PCV8SF.
Define VOID_PV2SI_V2SI.
* config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_TM_*.
Declare bdesc_tm.
(ix86_init_tm_builtins): New.
(ix86_init_builtins): Initialize TM builtins.
(ix86_tm_vector_load_p): New.
(ix86_tm_vector_simple_load_p): New.
(ix86_tm_vector_store_p): New.
(ix86_tm_vector_simple_store_p): New.
(ix86_builtin_tm_load): New.
(ix86_builtin_tm_store): New.
Define TARGET_VECTORIZE* transactional variants.
Index: targhooks.c
===================================================================
--- targhooks.c (revision 159821)
+++ targhooks.c (working copy)
@@ -1009,4 +1009,16 @@ default_have_conditional_execution (void
#endif
}
+tree
+default_builtin_tm_load_store (tree ARG_UNUSED (type))
+{
+ return NULL_TREE;
+}
+
+bool
+default_tm_vector_p (enum built_in_function ARG_UNUSED (code))
+{
+ return false;
+}
+
#include "gt-targhooks.h"
Index: targhooks.h
===================================================================
--- targhooks.h (revision 159821)
+++ targhooks.h (working copy)
@@ -132,3 +132,6 @@ extern bool default_addr_space_subset_p
extern rtx default_addr_space_convert (rtx, tree, tree);
extern unsigned int default_case_values_threshold (void);
extern bool default_have_conditional_execution (void);
+
+extern tree default_builtin_tm_load_store (tree);
+extern bool default_tm_vector_p (enum built_in_function);
Index: target.h
===================================================================
--- target.h (revision 159821)
+++ target.h (working copy)
@@ -506,6 +506,19 @@ struct gcc_target
is true if the access is defined in a packed struct. */
bool (* builtin_support_vector_misalignment) (enum machine_mode,
const_tree, int, bool);
+
+ /* Target builtin that implements transactional memory load. */
+ tree (* builtin_tm_load) (tree);
+ /* Target builtin that implements transactional memory store. */
+ tree (* builtin_tm_store) (tree);
+ /* Return true if a given built-in is a TM vector load. */
+ bool (* tm_vector_load_p) (unsigned int /*enum built_in_function*/);
+ /* Same as above, but exclude the RaR, RaW, and RfW variants. */
+ bool (* tm_vector_simple_load_p) (unsigned int /*enum built_in_function*/);
+ /* Return true if a given built-in is a TM vector store. */
+ bool (* tm_vector_store_p) (unsigned int /*enum built_in_function*/);
+ /* Same as above, but exclude the WaR and WaW variants. */
+ bool (* tm_vector_simple_store_p) (unsigned int /*enum built_in_function*/);
} vectorize;
/* The initial value of target_flags. */
Index: testsuite/gcc.dg/tm/vector-1.c
===================================================================
--- testsuite/gcc.dg/tm/vector-1.c (revision 0)
+++ testsuite/gcc.dg/tm/vector-1.c (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -O3" } */
+
+/* On x86-64, the vectorizer creates V2DI uses which we must handle.
+ Similarly for other vector architectures. */
+
+void ** newElements;
+
+__attribute__((transaction_safe))
+long
+TMqueue_push (void** queuePtr)
+{
+ long src;
+ for (src = 1; src < 9; src++) {
+ newElements[src+1] = queuePtr[src];
+ }
+ return 1;
+}
Index: testsuite/g++.dg/tm/vector-1.C
===================================================================
--- testsuite/g++.dg/tm/vector-1.C (revision 0)
+++ testsuite/g++.dg/tm/vector-1.C (revision 0)
@@ -0,0 +1,15 @@
+// { dg-do compile }
+// { dg-options "-fgnu-tm -O3" }
+
+class HashTree
+{
+ __attribute__((transaction_safe)) void rehash();
+ HashTree **Hash_table;
+ int Hash_function;
+};
+
+__attribute__((transaction_safe)) void HashTree::rehash()
+{
+ for (int i=0; i < Hash_function; i++)
+ Hash_table[i] = 0;
+}
Index: trans-mem.c
===================================================================
--- trans-mem.c (revision 159821)
+++ trans-mem.c (working copy)
@@ -321,8 +321,13 @@ is_tm_load (gimple stmt)
return false;
fndecl = gimple_call_fndecl (stmt);
- return (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
- && BUILTIN_TM_LOAD_P (DECL_FUNCTION_CODE (fndecl)));
+ if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+ {
+ enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
+ return (BUILTIN_TM_LOAD_P (code)
+ || targetm.vectorize.tm_vector_load_p (code));
+ }
+ return false;
}
/* Same as above, but for simple TM loads, that is, not the
@@ -346,7 +351,8 @@ is_tm_simple_load (gimple stmt)
|| fcode == BUILT_IN_TM_LOAD_8
|| fcode == BUILT_IN_TM_LOAD_FLOAT
|| fcode == BUILT_IN_TM_LOAD_DOUBLE
- || fcode == BUILT_IN_TM_LOAD_LDOUBLE);
+ || fcode == BUILT_IN_TM_LOAD_LDOUBLE
+ || targetm.vectorize.tm_vector_simple_load_p (fcode));
}
return false;
}
@@ -362,8 +368,13 @@ is_tm_store (gimple stmt)
return false;
fndecl = gimple_call_fndecl (stmt);
- return (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
- && BUILTIN_TM_STORE_P (DECL_FUNCTION_CODE (fndecl)));
+ if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+ {
+ enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
+ return (BUILTIN_TM_STORE_P (fcode)
+ || targetm.vectorize.tm_vector_store_p (fcode));
+ }
+ return false;
}
/* Same as above, but for simple TM stores, that is, not the
@@ -387,7 +398,8 @@ is_tm_simple_store (gimple stmt)
|| fcode == BUILT_IN_TM_STORE_8
|| fcode == BUILT_IN_TM_STORE_FLOAT
|| fcode == BUILT_IN_TM_STORE_DOUBLE
- || fcode == BUILT_IN_TM_STORE_LDOUBLE);
+ || fcode == BUILT_IN_TM_STORE_LDOUBLE
+ || targetm.vectorize.tm_vector_simple_store_p (fcode));
}
return false;
}
@@ -889,7 +901,8 @@ tm_log_delete (void)
static bool
transaction_invariant_address_p (const_tree mem, basic_block region_entry_block)
{
- if (TREE_CODE (mem) == INDIRECT_REF
+ if ((TREE_CODE (mem) == INDIRECT_REF
+ || TREE_CODE (mem) == MISALIGNED_INDIRECT_REF)
&& TREE_CODE (TREE_OPERAND (mem, 0)) == SSA_NAME)
{
basic_block def_bb;
@@ -1039,6 +1052,7 @@ tm_log_emit_stmt (tree addr, gimple stmt
code = BUILT_IN_TM_LOG_8;
break;
default:
+ /* FIXME: Add support for vector logging functions. */
code = BUILT_IN_TM_LOG;
break;
}
@@ -1358,6 +1372,7 @@ requires_barrier (basic_block entry_bloc
switch (TREE_CODE (x))
{
case INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
{
enum thread_memory_type ret;
@@ -1376,7 +1391,6 @@ requires_barrier (basic_block entry_bloc
}
case ALIGN_INDIRECT_REF:
- case MISALIGNED_INDIRECT_REF:
/* ??? Insert an irrevocable when it comes to vectorized loops,
or handle these somehow. */
gcc_unreachable ();
@@ -1870,7 +1884,7 @@ static gimple
build_tm_load (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator *gsi)
{
enum built_in_function code = END_BUILTINS;
- tree t, type = TREE_TYPE (rhs);
+ tree t, type = TREE_TYPE (rhs), decl;
gimple gcall;
if (type == float_type_node)
@@ -1900,13 +1914,19 @@ build_tm_load (location_t loc, tree lhs,
}
if (code == END_BUILTINS)
- return NULL;
+ {
+ decl = targetm.vectorize.builtin_tm_load (type);
+ if (!decl)
+ return NULL;
+ }
+ else
+ decl = built_in_decls[code];
t = gimplify_addr (gsi, rhs);
- gcall = gimple_build_call (built_in_decls[code], 1, t);
+ gcall = gimple_build_call (decl, 1, t);
gimple_set_location (gcall, loc);
- t = TREE_TYPE (TREE_TYPE (built_in_decls[code]));
+ t = TREE_TYPE (TREE_TYPE (decl));
if (useless_type_conversion_p (type, t))
{
gimple_call_set_lhs (gcall, lhs);
@@ -1966,9 +1986,14 @@ build_tm_store (location_t loc, tree lhs
}
if (code == END_BUILTINS)
- return NULL;
+ {
+ fn = targetm.vectorize.builtin_tm_store (type);
+ if (!fn)
+ return NULL;
+ }
+ else
+ fn = built_in_decls[code];
- fn = built_in_decls[code];
simple_type = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (fn))));
if (!useless_type_conversion_p (simple_type, type))
@@ -1986,7 +2011,7 @@ build_tm_store (location_t loc, tree lhs
}
t = gimplify_addr (gsi, lhs);
- gcall = gimple_build_call (built_in_decls[code], 2, t, rhs);
+ gcall = gimple_build_call (fn, 2, t, rhs);
gimple_set_location (gcall, loc);
gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
Index: target-def.h
===================================================================
--- target-def.h (revision 159821)
+++ target-def.h (working copy)
@@ -400,6 +400,18 @@
hook_bool_tree_tree_true
#define TARGET_SUPPORT_VECTOR_MISALIGNMENT \
default_builtin_support_vector_misalignment
+#define TARGET_VECTORIZE_BUILTIN_TM_LOAD \
+ default_builtin_tm_load_store
+#define TARGET_VECTORIZE_BUILTIN_TM_STORE \
+ default_builtin_tm_load_store
+#define TARGET_VECTORIZE_TM_VECTOR_LOAD_P \
+ default_tm_vector_p
+#define TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P \
+ default_tm_vector_p
+#define TARGET_VECTORIZE_TM_VECTOR_STORE_P \
+ default_tm_vector_p
+#define TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P \
+ default_tm_vector_p
#define TARGET_VECTORIZE \
@@ -413,7 +425,13 @@
TARGET_VECTOR_ALIGNMENT_REACHABLE, \
TARGET_VECTORIZE_BUILTIN_VEC_PERM, \
TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK, \
- TARGET_SUPPORT_VECTOR_MISALIGNMENT \
+ TARGET_SUPPORT_VECTOR_MISALIGNMENT, \
+ TARGET_VECTORIZE_BUILTIN_TM_LOAD, \
+ TARGET_VECTORIZE_BUILTIN_TM_STORE, \
+ TARGET_VECTORIZE_TM_VECTOR_LOAD_P, \
+ TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P, \
+ TARGET_VECTORIZE_TM_VECTOR_STORE_P, \
+ TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P \
}
#define TARGET_DEFAULT_TARGET_FLAGS 0
Index: config/i386/i386-builtin-types.def
===================================================================
--- config/i386/i386-builtin-types.def (revision 159821)
+++ config/i386/i386-builtin-types.def (working copy)
@@ -111,6 +111,7 @@ DEF_POINTER_TYPE (PINT, INT)
DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
+DEF_POINTER_TYPE (PV2SI, V2SI)
DEF_POINTER_TYPE (PV2DF, V2DF)
DEF_POINTER_TYPE (PV2DI, V2DI)
DEF_POINTER_TYPE (PV2SF, V2SF)
@@ -119,6 +120,7 @@ DEF_POINTER_TYPE (PV4DI, V4DI)
DEF_POINTER_TYPE (PV4SF, V4SF)
DEF_POINTER_TYPE (PV8SF, V8SF)
+DEF_POINTER_TYPE (PCV2SI, V2SI, CONST)
DEF_POINTER_TYPE (PCV2DF, V2DF, CONST)
DEF_POINTER_TYPE (PCV2SF, V2SF, CONST)
DEF_POINTER_TYPE (PCV4DF, V4DF, CONST)
@@ -164,6 +166,7 @@ DEF_FUNCTION_TYPE (V2SF, V2SI)
DEF_FUNCTION_TYPE (V2SI, V2DF)
DEF_FUNCTION_TYPE (V2SI, V2SF)
DEF_FUNCTION_TYPE (V2SI, V2SI)
+DEF_FUNCTION_TYPE (V2SI, PCV2SI)
DEF_FUNCTION_TYPE (V2SI, V4SF)
DEF_FUNCTION_TYPE (V32QI, PCCHAR)
DEF_FUNCTION_TYPE (V4DF, PCDOUBLE)
@@ -177,6 +180,7 @@ DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
DEF_FUNCTION_TYPE (V4SF, V2DF)
DEF_FUNCTION_TYPE (V4SF, V4DF)
DEF_FUNCTION_TYPE (V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, PCV4SF)
DEF_FUNCTION_TYPE (V4SF, V4SI)
DEF_FUNCTION_TYPE (V4SF, V8SF)
DEF_FUNCTION_TYPE (V4SI, V16QI)
@@ -191,6 +195,7 @@ DEF_FUNCTION_TYPE (V8HI, V8HI)
DEF_FUNCTION_TYPE (V8QI, V8QI)
DEF_FUNCTION_TYPE (V8SF, PCFLOAT)
DEF_FUNCTION_TYPE (V8SF, PCV4SF)
+DEF_FUNCTION_TYPE (V8SF, PCV8SF)
DEF_FUNCTION_TYPE (V8SF, V4SF)
DEF_FUNCTION_TYPE (V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SI)
@@ -296,9 +301,12 @@ DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF)
DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF)
DEF_FUNCTION_TYPE (VOID, PINT, INT)
DEF_FUNCTION_TYPE (VOID, PULONGLONG, ULONGLONG)
+DEF_FUNCTION_TYPE (VOID, PV2SI, V2SI)
DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI)
DEF_FUNCTION_TYPE (VOID, PV2SF, V4SF)
DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI)
+DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF)
+DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF)
DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 159821)
+++ config/i386/i386.c (working copy)
@@ -21418,6 +21418,29 @@ enum ix86_builtins
IX86_BUILTIN_CLZS,
+ /* TM vector builtins. Note: These are in order. */
+ IX86_BUILTIN_TM_LOAD_M64,
+ IX86_BUILTIN_TM_LOAD_RAR_M64,
+ IX86_BUILTIN_TM_LOAD_RAW_M64,
+ IX86_BUILTIN_TM_LOAD_RFW_M64,
+ IX86_BUILTIN_TM_LOAD_M128,
+ IX86_BUILTIN_TM_LOAD_RAR_M128,
+ IX86_BUILTIN_TM_LOAD_RAW_M128,
+ IX86_BUILTIN_TM_LOAD_RFW_M128,
+ IX86_BUILTIN_TM_LOAD_M256,
+ IX86_BUILTIN_TM_LOAD_RAR_M256,
+ IX86_BUILTIN_TM_LOAD_RAW_M256,
+ IX86_BUILTIN_TM_LOAD_RFW_M256,
+ IX86_BUILTIN_TM_STORE_M64,
+ IX86_BUILTIN_TM_STORE_WAR_M64,
+ IX86_BUILTIN_TM_STORE_WAW_M64,
+ IX86_BUILTIN_TM_STORE_M128,
+ IX86_BUILTIN_TM_STORE_WAR_M128,
+ IX86_BUILTIN_TM_STORE_WAW_M128,
+ IX86_BUILTIN_TM_STORE_M256,
+ IX86_BUILTIN_TM_STORE_WAR_M256,
+ IX86_BUILTIN_TM_STORE_WAW_M256,
+
IX86_BUILTIN_MAX
};
@@ -22319,6 +22342,34 @@ static const struct builtin_description
{ OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
};
+/* TM vector builtins. */
+static const struct builtin_description bdesc_tm[] =
+{
+ { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_WM64", IX86_BUILTIN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_WaRM64", IX86_BUILTIN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_WaWM64", IX86_BUILTIN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_RM64", IX86_BUILTIN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_RaRM64", IX86_BUILTIN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_RaWM64", IX86_BUILTIN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX, 0, "__builtin__ITM_RfWM64", IX86_BUILTIN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+
+ { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_WM128", IX86_BUILTIN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_WaRM128", IX86_BUILTIN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_WaWM128", IX86_BUILTIN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_RM128", IX86_BUILTIN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+ { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_RaRM128", IX86_BUILTIN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+ { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_RaWM128", IX86_BUILTIN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+ { OPTION_MASK_ISA_SSE, 0, "__builtin__ITM_RfWM128", IX86_BUILTIN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+
+ { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_WM256", IX86_BUILTIN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_WaRM256", IX86_BUILTIN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_WaWM256", IX86_BUILTIN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_RM256", IX86_BUILTIN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+ { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_RaRM256", IX86_BUILTIN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+ { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_RaWM256", IX86_BUILTIN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+ { OPTION_MASK_ISA_AVX, 0, "__builtin__ITM_RfWM256", IX86_BUILTIN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -22573,6 +22624,44 @@ static const struct builtin_description
};
+
+/* Initialize the transactional memory vector load/store builtins. */
+
+static void
+ix86_init_tm_builtins (void)
+{
+ enum ix86_builtin_func_type ftype;
+ const struct builtin_description *d;
+ size_t i;
+ tree decl, attrs;
+
+ if (!flag_tm)
+ return;
+
+ attrs = tree_cons (get_identifier ("transaction_pure"), NULL, NULL);
+
+ for (i = 0, d = bdesc_tm;
+ i < ARRAY_SIZE (bdesc_tm);
+ i++, d++)
+ {
+ if ((d->mask & ix86_isa_flags) != 0
+ || (lang_hooks.builtin_function
+ == lang_hooks.builtin_function_ext_scope))
+ {
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ tree type = ix86_get_builtin_func_type (ftype);
+
+ decl = add_builtin_function (d->name, type, d->code, BUILT_IN_NORMAL,
+ /* The builtin without the prefix for
+ calling it directly. */
+ d->name + strlen ("__builtin_"),
+ attrs);
+ ix86_builtins[(int) d->code] = decl;
+ ix86_builtins_isa[(int) d->code].set_and_not_built_p = false;
+ }
+ }
+}
+
/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
in the current target ISA to allow the user to compile particular modules
with different target specific options that differ from the command line
@@ -22855,6 +22944,7 @@ ix86_init_builtins (void)
TREE_READONLY (t) = 1;
ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
+ ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
if (TARGET_64BIT)
@@ -24978,7 +25068,88 @@ avx_vperm2f128_parallel (rtx par, enum m
return mask + 1;
}
+/* TM callbacks. */
+
+/* Return true if CODE is one of the TM vector loads. */
+
+static bool
+ix86_tm_vector_load_p (enum built_in_function code)
+{
+ return (code >= IX86_BUILTIN_TM_LOAD_M64
+ && code <= IX86_BUILTIN_TM_LOAD_RFW_M256);
+}
+
+/* Same as above, but exclude the RaR, RaW, RfW variants. */
+
+static bool
+ix86_tm_vector_simple_load_p (enum built_in_function code)
+{
+ return (code == IX86_BUILTIN_TM_LOAD_M64
+ || code == IX86_BUILTIN_TM_LOAD_M128
+ || code == IX86_BUILTIN_TM_LOAD_M256);
+}
+
+/* Return true if CODE is one of the TM vector stores. */
+static bool
+ix86_tm_vector_store_p (enum built_in_function code)
+{
+ return (code >= IX86_BUILTIN_TM_STORE_M64
+ && code <= IX86_BUILTIN_TM_STORE_WAW_M256);
+}
+
+/* Same as above, but exclude the WaR and WaW variants. */
+
+static bool
+ix86_tm_vector_simple_store_p (enum built_in_function code)
+{
+ return (code == IX86_BUILTIN_TM_STORE_M64
+ || code == IX86_BUILTIN_TM_STORE_M128
+ || code == IX86_BUILTIN_TM_STORE_M256);
+}
+
+/* Return the builtin decl needed to load a vector of TYPE. */
+
+static tree
+ix86_builtin_tm_load (tree type)
+{
+ if (TYPE_SIZE_UNIT (type) != NULL
+ && host_integerp (TYPE_SIZE_UNIT (type), 1))
+ {
+ switch (tree_low_cst (TYPE_SIZE_UNIT (type), 1) * BITS_PER_UNIT)
+ {
+ case 64:
+ return ix86_builtins[IX86_BUILTIN_TM_LOAD_M64];
+ case 128:
+ return ix86_builtins[IX86_BUILTIN_TM_LOAD_M128];
+ case 256:
+ return ix86_builtins[IX86_BUILTIN_TM_LOAD_M256];
+ }
+ }
+ return NULL_TREE;
+}
+
+/* Return the builtin decl needed to store a vector of TYPE. */
+
+static tree
+ix86_builtin_tm_store (tree type)
+{
+ if (TYPE_SIZE_UNIT (type) != NULL
+ && host_integerp (TYPE_SIZE_UNIT (type), 1))
+ {
+ switch (tree_low_cst (TYPE_SIZE_UNIT (type), 1) * BITS_PER_UNIT)
+ {
+ case 64:
+ return ix86_builtins[IX86_BUILTIN_TM_STORE_M64];
+ case 128:
+ return ix86_builtins[IX86_BUILTIN_TM_STORE_M128];
+ case 256:
+ return ix86_builtins[IX86_BUILTIN_TM_STORE_M256];
+ }
+ }
+ return NULL_TREE;
+}
+
/* Store OPERAND to the memory after reload is completed. This means
that we can't easily use assign_stack_local. */
rtx
@@ -30514,6 +30685,24 @@ ix86_enum_va_list (int idx, const char *
#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
+#undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
+#define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
+
+#undef TARGET_VECTORIZE_BUILTIN_TM_STORE
+#define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
+
+#undef TARGET_VECTORIZE_TM_VECTOR_LOAD_P
+#define TARGET_VECTORIZE_TM_VECTOR_LOAD_P ix86_tm_vector_load_p
+
+#undef TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P
+#define TARGET_VECTORIZE_TM_VECTOR_SIMPLE_LOAD_P ix86_tm_vector_simple_load_p
+
+#undef TARGET_VECTORIZE_TM_VECTOR_STORE_P
+#define TARGET_VECTORIZE_TM_VECTOR_STORE_P ix86_tm_vector_store_p
+
+#undef TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P
+#define TARGET_VECTORIZE_TM_VECTOR_SIMPLE_STORE_P ix86_tm_vector_simple_store_p
+
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal