[PATCH 1/3] Add PTWRITE builtins for x86
Andi Kleen
andi@firstfloor.org
Mon Feb 12 02:53:00 GMT 2018
From: Andi Kleen <ak@linux.intel.com>
Add builtins/intrinsics for PTWRITE. PTWRITE is a new instruction on Intel Cherry Trail
that allows to write values into the Processor Trace log.
This is fairly straight forward, except I had to add isa2 support for variable number
of operands.
gcc/:
2018-02-10 Andi Kleen <ak@linux.intel.com>
* common/config/i386/i386-common.c (OPTION_MASK_ISA_PTWRITE_SET):
(OPTION_MASK_ISA_PTWRITE_UNSET): New.
(ix86_handle_option): Handle OPT_mptwrite.
* config/i386/cpuid.h (bit_PTWRITE): Add.
* config/i386/driver-i386.c (host_detect_local_cpu): Detect
PTWRITE CPUID.
* config/i386/i386-builtin.def (PTWRITE): Add PTWRITE.
* config/i386/i386-c.c (ix86_target_macros_internal):
Support __PTWRITE__.
* config/i386/i386.c (ix86_target_string): Add -mptwrite.
(ix86_valid_target_attribute_inner_p): Support ptwrite.
(BDESC_VERIFYS): Verify SPECIAL_ARGS2.
(ix86_init_mmx_sse_builtins): Handle special args2.
* config/i386/i386.h (TARGET_PTWRITE): Add.
(TARGET_PTWRITE_P): Add.
* config/i386/i386.md: Add ptwrite.
* config/i386/i386.opt: Add -mptwrite.
* config/i386/immintrin.h (target):
(_ptwrite_u64): Add.
(_ptwrite_u32): Add.
* doc/extend.texi: Document ptwrite builtins.
* doc/invoke.texi: Document -mptwrite.
gcc/testsuite/:
2018-02-10 Andi Kleen <ak@linux.intel.com>
* gcc.target/i386/ptwrite1.c: New test.
* gcc.target/i386/ptwrite2.c: New test.
---
gcc/common/config/i386/i386-common.c | 15 +++++++++++
gcc/config/i386/cpuid.h | 4 +++
gcc/config/i386/driver-i386.c | 13 +++++++++-
gcc/config/i386/i386-builtin.def | 8 +++++-
gcc/config/i386/i386-c.c | 2 ++
gcc/config/i386/i386.c | 50 +++++++++++++++++++++++++++++++++---
gcc/config/i386/i386.h | 2 ++
gcc/config/i386/i386.md | 10 ++++++++
gcc/config/i386/i386.opt | 4 +++
gcc/config/i386/immintrin.h | 24 +++++++++++++++++
gcc/doc/extend.texi | 9 +++++++
gcc/doc/invoke.texi | 5 +++-
12 files changed, 140 insertions(+), 6 deletions(-)
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 4fdd489b98e..7554dd4e85c 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -138,6 +138,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
#define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_PTWRITE_SET OPTION_MASK_ISA_PTWRITE
#define OPTION_MASK_ISA_F16C_SET \
(OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
#define OPTION_MASK_ISA_MWAITX_SET OPTION_MASK_ISA_MWAITX
@@ -254,6 +255,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
#define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_PTWRITE_UNSET OPTION_MASK_ISA_PTWRITE
#define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
#define OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET \
@@ -1040,6 +1042,19 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mptwrite:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_PTWRITE_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_UNSET;
+ }
+ return true;
+
case OPT_mf16c:
if (value)
{
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index aa903630c2c..6ec8cfe22e3 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -121,6 +121,10 @@
#define bit_XSAVEC (1 << 1)
#define bit_XSAVES (1 << 3)
+/* PT sub leaf (%eax == 14, %ecx == 0) */
+/* %ebx */
+#define bit_PTWRITE (1 << 4)
+
/* Signatures for different CPU implementations as returned in uses
of cpuid with level 0. */
#define signature_AMD_ebx 0x68747541
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index ca1a2e0c37e..ebc9aa4608f 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -423,6 +423,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_avx512vnni = 0, has_vaes = 0;
unsigned int has_vpclmulqdq = 0;
+ unsigned int has_ptwrite = 0;
+
bool arch;
unsigned int l2sizekb = 0;
@@ -534,6 +536,13 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_xsaves = eax & bit_XSAVES;
}
+ if (max_level >= 0x14)
+ {
+ __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
+
+ has_ptwrite = ebx & bit_PTWRITE;
+ }
+
/* Check cpuid level of extended features. */
__cpuid (0x80000000, ext_level, ebx, ecx, edx);
@@ -1089,6 +1098,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
+ const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
+
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
@@ -1100,7 +1111,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk,
avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
- avx512bitalg, NULL);
+ avx512bitalg, ptwrite, NULL);
}
done:
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 2caac8825f6..2ba0fc71aa1 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -414,8 +414,14 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv1
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv16qi_maskz, "__builtin_ia32_expandloadqi128_maskz", IX86_BUILTIN_PEXPANDBLOAD128Z, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandloadhi128_mask", IX86_BUILTIN_PEXPANDWLOAD128, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandloadhi128_maskz", IX86_BUILTIN_PEXPANDWLOAD128Z, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI)
+BDESC_END (SPECIAL_ARGS, SPECIAL_ARGS2)
-BDESC_END (SPECIAL_ARGS, ARGS)
+/* PTWRITE */
+BDESC_FIRST (special_args2, SPECIAL_ARGS2,
+ OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritesi, "__builtin_ia32_ptwrite32", IX86_BUILTIN_PTWRITE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
+BDESC (OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritedi, "__builtin_ia32_ptwrite64", IX86_BUILTIN_PTWRITE64, UNKNOWN, (int) VOID_FTYPE_UINT64)
+
+BDESC_END (SPECIAL_ARGS2, ARGS)
/* Builtins with variable number of arguments. */
BDESC_FIRST (args, ARGS,
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index c0b4ffed7ea..fcde8c039e9 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -497,6 +497,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__VAES__");
if (isa_flag & OPTION_MASK_ISA_VPCLMULQDQ)
def_or_undef (parse_in, "__VPCLMULQDQ__");
+ if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
+ def_or_undef (parse_in, "__PTWRITE__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a8709972e9c..d11d4909450 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2772,7 +2772,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
{ "-mhle", OPTION_MASK_ISA_HLE },
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-mclzero", OPTION_MASK_ISA_CLZERO },
- { "-mmwaitx", OPTION_MASK_ISA_MWAITX }
+ { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
+ { "-mptwrite", OPTION_MASK_ISA_PTWRITE }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -3451,6 +3452,7 @@ ix86_option_override_internal (bool main_args_p,
const wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4);
const wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5);
const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
+ /* Add PTA_PTWRITE here if it has a cpu */
const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
| PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -5394,6 +5396,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("shstk", OPT_mshstk),
IX86_ATTR_ISA ("vaes", OPT_mvaes),
IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
+ IX86_ATTR_ISA ("ptwrite", OPT_mptwrite),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -31113,8 +31116,10 @@ BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
+BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
+BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
+ IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST, 1);
BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
IX86_BUILTIN__BDESC_ARGS_LAST, 1);
BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
@@ -31142,8 +31147,9 @@ ix86_init_mmx_sse_builtins (void)
const struct builtin_description * d;
enum ix86_builtin_func_type ftype;
size_t i;
+ tree decl;
- /* Add all special builtins with variable number of operands. */
+ /* Add isa1 special builtins with variable number of operands. */
for (i = 0, d = bdesc_special_args;
i < ARRAY_SIZE (bdesc_special_args);
i++, d++)
@@ -31159,6 +31165,36 @@ ix86_init_mmx_sse_builtins (void)
IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
ARRAY_SIZE (bdesc_special_args) - 1);
+ /* Add isa2 special builtins with variable number of operands. */
+ for (i = 0, d = bdesc_special_args2;
+ i < ARRAY_SIZE (bdesc_special_args2);
+ i++, d++)
+ {
+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST, i);
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin2 (d->mask, d->name, ftype, d->code);
+
+ /* Avoid edges for ptwrites generated by vartrace pass. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+
+ }
+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST,
+ IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
+ ARRAY_SIZE (bdesc_special_args2) - 1);
+
/* Add all builtins with variable number of operands. */
for (i = 0, d = bdesc_args;
i < ARRAY_SIZE (bdesc_args);
@@ -38188,6 +38224,14 @@ rdseed_step:
target);
}
+ if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
+ && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST)
+ {
+ i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST;
+ return ix86_expand_special_args_builtin (bdesc_special_args2 + i, exp,
+ target);
+ }
+
if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
&& fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
{
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9d864501d4f..9b2eb468d63 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -183,6 +183,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_IBT_P(x) TARGET_ISA_IBT_P(x)
#define TARGET_SHSTK TARGET_ISA_SHSTK
#define TARGET_SHSTK_P(x) TARGET_ISA_SHSTK_P(x)
+#define TARGET_PTWRITE TARGET_ISA_PTWRITE
+#define TARGET_PTWRITE_P(x) TARGET_ISA_PTWRITE_P(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a4832bf696f..fd0c1f06b6e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -287,6 +287,8 @@
UNSPECV_WRUSS
UNSPECV_SETSSBSY
UNSPECV_CLRSSBSY
+
+ UNSPECV_PTWRITE
])
;; Constants to represent rounding modes in the ROUND instruction
@@ -20125,6 +20127,14 @@
[(set_attr "type" "other")
(set_attr "prefix_extra" "2")])
+(define_insn "ptwrite<mode>"
+ [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+ UNSPECV_PTWRITE)]
+ "TARGET_PTWRITE"
+ "ptwrite\t%0"
+ [(set_attr "type" "other")
+ (set_attr "prefix_extra" "2")])
+
(define_insn "rdrand<mode>_1"
[(set (match_operand:SWI248 0 "register_operand" "=r")
(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 3a306bbd73f..7191caa76ad 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -901,6 +901,10 @@ mrdrnd
Target Report Mask(ISA_RDRND) Var(ix86_isa_flags) Save
Support RDRND built-in functions and code generation.
+mptwrite
+Target Report Mask(ISA_PTWRITE) Var(ix86_isa_flags2) Save
+Support PTWRITE built-in functions and code generation.
+
mf16c
Target Report Mask(ISA_F16C) Var(ix86_isa_flags) Save
Support F16C built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index a5ad8af32b0..1a513a3cfaa 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -234,4 +234,28 @@ _rdrand64_step (unsigned long long *__P)
#endif /* __x86_64__ */
+#ifndef __PTWRITE__
+#pragma GCC push_options
+#pragma GCC target("ptwrite")
+#define __DISABLE_PTWRITE__
+#endif
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite_u64 (unsigned long long __B)
+{
+ __builtin_ia32_ptwrite64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite_u32 (unsigned __B)
+{
+ __builtin_ia32_ptwrite32 (__B);
+}
+#ifdef __DISABLE_PTWRITE__
+#undef __DISABLE_PTWRITE__
+#pragma GCC pop_options
+#endif /* __DISABLE_PTWRITE__ */
+
#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index cb9df971a5f..8efea867262 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21446,6 +21446,15 @@ unsigned int __builtin_ia32_rdrand32_step (unsigned int *)
unsigned int __builtin_ia32_rdrand64_step (unsigned long long *)
@end smallexample
+The following built-in function is available when @option{-mptwrite} is
+used. All of them generate the machine instruction that is part of the
+name.
+
+@smallexample
+void __builtin_ia32_ptwrite32 (unsigned)
+void __builtin_ia32_ptwrite64 (unsigned long long)
+@end smallexample
+
The following built-in functions are available when @option{-msse4a} is used.
All of them generate the machine instruction that is part of the name.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index df357bea7dc..602cf8e3edc 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1245,7 +1245,7 @@ See RS/6000 and PowerPC Options.
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
-mavx2 -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl @gol
-mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi -msha -maes @gol
--mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
+-mpclmul -mfsgsbase -mrdrnd -mf16c -mfma -mptwrite @gol
-mprefetchwt1 -mclflushopt -mxsavec -mxsaves @gol
-msse4a -m3dnow -m3dnowa -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop @gol
-mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx @gol
@@ -27064,6 +27064,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mfsgsbase
@opindex mfsgsbase
@need 200
+@itemx -mptwrite
+@opindex mptwrite
+@need 200
@itemx -mrdrnd
@opindex mrdrnd
@need 200
--
2.15.1
More information about the Gcc-patches
mailing list