[PATCH 1/3] Add PTWRITE builtins for x86

Andi Kleen andi@firstfloor.org
Mon Feb 12 02:53:00 GMT 2018


From: Andi Kleen <ak@linux.intel.com>

Add builtins/intrinsics for PTWRITE. PTWRITE is a new instruction on Intel Cherry Trail
that allows to write values into the Processor Trace log.

This is fairly straight forward, except I had to add isa2 support for variable number
of operands.

gcc/:

2018-02-10  Andi Kleen  <ak@linux.intel.com>

	* common/config/i386/i386-common.c (OPTION_MASK_ISA_PTWRITE_SET):
	(OPTION_MASK_ISA_PTWRITE_UNSET): New.
	(ix86_handle_option): Handle OPT_mptwrite.
	* config/i386/cpuid.h (bit_PTWRITE): Add.
	* config/i386/driver-i386.c (host_detect_local_cpu): Detect
	PTWRITE CPUID.
	* config/i386/i386-builtin.def (PTWRITE): Add PTWRITE.
	* config/i386/i386-c.c (ix86_target_macros_internal):
	Support __PTWRITE__.
	* config/i386/i386.c (ix86_target_string): Add -mptwrite.
	(ix86_valid_target_attribute_inner_p): Support ptwrite.
	(BDESC_VERIFYS): Verify SPECIAL_ARGS2.
	(ix86_init_mmx_sse_builtins): Handle special args2.
	* config/i386/i386.h (TARGET_PTWRITE): Add.
	(TARGET_PTWRITE_P): Add.
	* config/i386/i386.md: Add ptwrite.
	* config/i386/i386.opt: Add -mptwrite.
	* config/i386/immintrin.h (target):
	(_ptwrite_u64): Add.
	(_ptwrite_u32): Add.
	* doc/extend.texi: Document ptwrite builtins.
	* doc/invoke.texi: Document -mptwrite.

gcc/testsuite/:

2018-02-10  Andi Kleen  <ak@linux.intel.com>

	* gcc.target/i386/ptwrite1.c: New test.
	* gcc.target/i386/ptwrite2.c: New test.
---
 gcc/common/config/i386/i386-common.c | 15 +++++++++++
 gcc/config/i386/cpuid.h              |  4 +++
 gcc/config/i386/driver-i386.c        | 13 +++++++++-
 gcc/config/i386/i386-builtin.def     |  8 +++++-
 gcc/config/i386/i386-c.c             |  2 ++
 gcc/config/i386/i386.c               | 50 +++++++++++++++++++++++++++++++++---
 gcc/config/i386/i386.h               |  2 ++
 gcc/config/i386/i386.md              | 10 ++++++++
 gcc/config/i386/i386.opt             |  4 +++
 gcc/config/i386/immintrin.h          | 24 +++++++++++++++++
 gcc/doc/extend.texi                  |  9 +++++++
 gcc/doc/invoke.texi                  |  5 +++-
 12 files changed, 140 insertions(+), 6 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 4fdd489b98e..7554dd4e85c 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -138,6 +138,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_PTWRITE_SET OPTION_MASK_ISA_PTWRITE
 #define OPTION_MASK_ISA_F16C_SET \
   (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
 #define OPTION_MASK_ISA_MWAITX_SET OPTION_MASK_ISA_MWAITX
@@ -254,6 +255,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_PTWRITE_UNSET OPTION_MASK_ISA_PTWRITE
 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
 
 #define OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET \
@@ -1040,6 +1042,19 @@ ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_mptwrite:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE_SET;
+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_PTWRITE_UNSET;
+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_UNSET;
+	}
+      return true;
+
     case OPT_mf16c:
       if (value)
 	{
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index aa903630c2c..6ec8cfe22e3 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -121,6 +121,10 @@
 #define bit_XSAVEC	(1 << 1)
 #define bit_XSAVES	(1 << 3)
 
+/* PT sub leaf (%eax == 14, %ecx == 0) */
+/* %ebx */
+#define bit_PTWRITE	(1 << 4)
+
 /* Signatures for different CPU implementations as returned in uses
    of cpuid with level 0.  */
 #define signature_AMD_ebx	0x68747541
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index ca1a2e0c37e..ebc9aa4608f 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -423,6 +423,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_avx512vnni = 0, has_vaes = 0;
   unsigned int has_vpclmulqdq = 0;
 
+  unsigned int has_ptwrite = 0;
+
   bool arch;
 
   unsigned int l2sizekb = 0;
@@ -534,6 +536,13 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_xsaves = eax & bit_XSAVES;
     }
 
+  if (max_level >= 0x14)
+    {
+      __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
+
+      has_ptwrite = ebx & bit_PTWRITE;
+    }
+
   /* Check cpuid level of extended features.  */
   __cpuid (0x80000000, ext_level, ebx, ecx, edx);
 
@@ -1089,6 +1098,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
       const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
       const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
+      const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
+
       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
 			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
 			popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
@@ -1100,7 +1111,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 			avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
 			clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk,
 			avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
-			avx512bitalg, NULL);
+			avx512bitalg, ptwrite, NULL);
     }
 
 done:
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 2caac8825f6..2ba0fc71aa1 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -414,8 +414,14 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv1
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv16qi_maskz, "__builtin_ia32_expandloadqi128_maskz", IX86_BUILTIN_PEXPANDBLOAD128Z, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandloadhi128_mask", IX86_BUILTIN_PEXPANDWLOAD128, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandloadhi128_maskz", IX86_BUILTIN_PEXPANDWLOAD128Z, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI)
+BDESC_END (SPECIAL_ARGS, SPECIAL_ARGS2)
 
-BDESC_END (SPECIAL_ARGS, ARGS)
+/* PTWRITE */
+BDESC_FIRST (special_args2, SPECIAL_ARGS2,
+     OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritesi, "__builtin_ia32_ptwrite32", IX86_BUILTIN_PTWRITE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
+BDESC (OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritedi, "__builtin_ia32_ptwrite64", IX86_BUILTIN_PTWRITE64, UNKNOWN, (int) VOID_FTYPE_UINT64)
+
+BDESC_END (SPECIAL_ARGS2, ARGS)
 
 /* Builtins with variable number of arguments.  */
 BDESC_FIRST (args, ARGS,
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index c0b4ffed7ea..fcde8c039e9 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -497,6 +497,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__VAES__");
   if (isa_flag & OPTION_MASK_ISA_VPCLMULQDQ)
     def_or_undef (parse_in, "__VPCLMULQDQ__");
+  if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
+    def_or_undef (parse_in, "__PTWRITE__");
   if (TARGET_IAMCU)
     {
       def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a8709972e9c..d11d4909450 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2772,7 +2772,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
     { "-mhle",		OPTION_MASK_ISA_HLE },
     { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
     { "-mclzero",	OPTION_MASK_ISA_CLZERO },
-    { "-mmwaitx",	OPTION_MASK_ISA_MWAITX }
+    { "-mmwaitx",	OPTION_MASK_ISA_MWAITX },
+    { "-mptwrite",	OPTION_MASK_ISA_PTWRITE }
   };
   static struct ix86_target_opts isa_opts[] =
   {
@@ -3451,6 +3452,7 @@ ix86_option_override_internal (bool main_args_p,
   const wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4);
   const wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5);
   const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
+  /* Add PTA_PTWRITE here if it has a cpu */
 
   const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
     | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -5394,6 +5396,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("shstk",	OPT_mshstk),
     IX86_ATTR_ISA ("vaes",	OPT_mvaes),
     IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
+    IX86_ATTR_ISA ("ptwrite",	OPT_mptwrite),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
@@ -31113,8 +31116,10 @@ BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
 	       IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
 	       IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
+BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
 	       IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
+BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
+	       IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST, 1);
 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
 	       IX86_BUILTIN__BDESC_ARGS_LAST, 1);
 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
@@ -31142,8 +31147,9 @@ ix86_init_mmx_sse_builtins (void)
   const struct builtin_description * d;
   enum ix86_builtin_func_type ftype;
   size_t i;
+  tree decl;
 
-  /* Add all special builtins with variable number of operands.  */
+  /* Add isa1 special builtins with variable number of operands.  */
   for (i = 0, d = bdesc_special_args;
        i < ARRAY_SIZE (bdesc_special_args);
        i++, d++)
@@ -31159,6 +31165,36 @@ ix86_init_mmx_sse_builtins (void)
 		 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
 		 ARRAY_SIZE (bdesc_special_args) - 1);
 
+  /* Add isa2 special builtins with variable number of operands.  */
+  for (i = 0, d = bdesc_special_args2;
+       i < ARRAY_SIZE (bdesc_special_args2);
+       i++, d++)
+    {
+      BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST, i);
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      decl = def_builtin2 (d->mask, d->name, ftype, d->code);
+
+      /* Avoid edges for ptwrites generated by vartrace pass.  */
+      if (decl)
+	{
+	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+						    NULL_TREE);
+	  TREE_NOTHROW (decl) = 1;
+	}
+      else
+	{
+	  ix86_builtins_isa[(int)d->code].leaf_p = true;
+	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
+	}
+
+    }
+  BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST,
+		 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
+		 ARRAY_SIZE (bdesc_special_args2) - 1);
+
   /* Add all builtins with variable number of operands.  */
   for (i = 0, d = bdesc_args;
        i < ARRAY_SIZE (bdesc_args);
@@ -38188,6 +38224,14 @@ rdseed_step:
 					       target);
     }
 
+  if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
+      && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST)
+    {
+      i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST;
+      return ix86_expand_special_args_builtin (bdesc_special_args2 + i, exp,
+					       target);
+    }
+
   if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
       && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
     {
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9d864501d4f..9b2eb468d63 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -183,6 +183,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_IBT_P(x)	TARGET_ISA_IBT_P(x)
 #define TARGET_SHSTK	TARGET_ISA_SHSTK
 #define TARGET_SHSTK_P(x)	TARGET_ISA_SHSTK_P(x)
+#define TARGET_PTWRITE	TARGET_ISA_PTWRITE
+#define TARGET_PTWRITE_P(x)	TARGET_ISA_PTWRITE_P(x)
 
 #define TARGET_LP64	TARGET_ABI_64
 #define TARGET_LP64_P(x)	TARGET_ABI_64_P(x)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a4832bf696f..fd0c1f06b6e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -287,6 +287,8 @@
   UNSPECV_WRUSS
   UNSPECV_SETSSBSY
   UNSPECV_CLRSSBSY
+
+  UNSPECV_PTWRITE
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -20125,6 +20127,14 @@
   [(set_attr "type" "other")
    (set_attr "prefix_extra" "2")])
 
+(define_insn "ptwrite<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+		    UNSPECV_PTWRITE)]
+  "TARGET_PTWRITE"
+  "ptwrite\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
 (define_insn "rdrand<mode>_1"
   [(set (match_operand:SWI248 0 "register_operand" "=r")
 	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 3a306bbd73f..7191caa76ad 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -901,6 +901,10 @@ mrdrnd
 Target Report Mask(ISA_RDRND) Var(ix86_isa_flags) Save
 Support RDRND built-in functions and code generation.
 
+mptwrite
+Target Report Mask(ISA_PTWRITE) Var(ix86_isa_flags2) Save
+Support PTWRITE built-in functions and code generation.
+
 mf16c
 Target Report Mask(ISA_F16C) Var(ix86_isa_flags) Save
 Support F16C built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index a5ad8af32b0..1a513a3cfaa 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -234,4 +234,28 @@ _rdrand64_step (unsigned long long *__P)
 
 #endif /* __x86_64__  */
 
+#ifndef __PTWRITE__
+#pragma GCC push_options
+#pragma GCC target("ptwrite")
+#define __DISABLE_PTWRITE__
+#endif
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite_u64 (unsigned long long __B)
+{
+  __builtin_ia32_ptwrite64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite_u32 (unsigned __B)
+{
+  __builtin_ia32_ptwrite32 (__B);
+}
+#ifdef __DISABLE_PTWRITE__
+#undef __DISABLE_PTWRITE__
+#pragma GCC pop_options
+#endif /* __DISABLE_PTWRITE__ */
+
 #endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index cb9df971a5f..8efea867262 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21446,6 +21446,15 @@ unsigned int __builtin_ia32_rdrand32_step (unsigned int *)
 unsigned int __builtin_ia32_rdrand64_step (unsigned long long *)
 @end smallexample
 
+The following built-in function is available when @option{-mptwrite} is
+used.  All of them generate the machine instruction that is part of the
+name.
+
+@smallexample
+void __builtin_ia32_ptwrite32 (unsigned)
+void __builtin_ia32_ptwrite64 (unsigned long long)
+@end smallexample
+
 The following built-in functions are available when @option{-msse4a} is used.
 All of them generate the machine instruction that is part of the name.
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index df357bea7dc..602cf8e3edc 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1245,7 +1245,7 @@ See RS/6000 and PowerPC Options.
 -mmmx  -msse  -msse2  -msse3  -mssse3  -msse4.1  -msse4.2  -msse4  -mavx @gol
 -mavx2  -mavx512f  -mavx512pf  -mavx512er  -mavx512cd  -mavx512vl @gol
 -mavx512bw  -mavx512dq  -mavx512ifma  -mavx512vbmi  -msha  -maes @gol
--mpclmul  -mfsgsbase  -mrdrnd  -mf16c  -mfma @gol
+-mpclmul  -mfsgsbase  -mrdrnd  -mf16c  -mfma -mptwrite @gol
 -mprefetchwt1  -mclflushopt  -mxsavec  -mxsaves @gol
 -msse4a  -m3dnow  -m3dnowa  -mpopcnt  -mabm  -mbmi  -mtbm  -mfma4  -mxop @gol
 -mlzcnt  -mbmi2  -mfxsr  -mxsave  -mxsaveopt  -mrtm  -mlwp  -mmpx  @gol
@@ -27064,6 +27064,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mfsgsbase
 @opindex mfsgsbase
 @need 200
+@itemx -mptwrite
+@opindex mptwrite
+@need 200
 @itemx -mrdrnd
 @opindex mrdrnd
 @need 200
-- 
2.15.1



More information about the Gcc-patches mailing list