This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix __builtin_cpu_supports (PR target/84945)


Hi!

As mentioned in the PR, this is another case where we've run into too many
separate feature bits issues on x86. unfortunately in this case it affects
ABI.  __cpu_model variable contains just 32 bits for the features, and we
now need 4 extra features that won't fit in there.  The current code just
invokes in the compiler as well as runtime initialization.

While on {x86_64,i?86}-linux __cpu_model is exported from libgcc_s.so.1
only for backwards compatibility and new code is always using a hidden
copy in each DSO or binary from libgcc.a, that is not the case on other
x86 targets.  For a Linux only solution, we could just grow up __cpu_model
except for the one in libgcc_s.so.1 (i.e. #ifdef SHARED) and as long as GCC
8+ libgcc.a would be used for linking of __builtin_cpu_supports code from
both old and new gcc things would just work.  For other targets, as
__cpu_model is exported data object, binaries could have copy relocations
against it, so we really can't change the size.

As the preferred way is to put it into libgcc.a only, this patch just puts
the overflow features into a new separate variable which is not put into
libgcc_s.so.1 at all, which will force use of the libgcc.a copy for
at least __builtin_cpu_supports calls that ask for the new features.

Bootstrapped/regtested on x86_64-linux and i686-linux (on Haswell-E, don't
have access to any of the CPUs with the new features), ok for trunk?

2018-03-19  Jakub Jelinek  <jakub@redhat.com>

	PR target/84945
	* config/i386/i386.c (fold_builtin_cpu): For features above 31
	use __cpu_features2 variable instead of __cpu_model.__cpu_features[0].
	Use 1U instead of 1.  Formatting fixes.

	* gcc.target/i386/pr84945.c: New test.

	* config/i386/cpuinfo.h (__cpu_features2): Declare.
	* config/i386/cpuinfo.c (__cpu_features2): New variable for
	ifndef SHARED only.
	(set_feature): Define.
	(get_available_features): Use set_feature macro.  Set __cpu_features2
	to the second word of features ifndef SHARED.

--- gcc/config/i386/i386.c.jj	2018-03-17 12:11:39.954436461 +0100
+++ gcc/config/i386/i386.c	2018-03-19 16:36:55.565231809 +0100
@@ -33265,8 +33264,8 @@ fold_builtin_cpu (tree fndecl, tree *arg
 	}
 
       /* Get the appropriate field in __cpu_model.  */
-      ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
-		     field, NULL_TREE);
+      ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
+		    field, NULL_TREE);
 
       /* Check the value.  */
       final = build2 (EQ_EXPR, unsigned_type_node, ref,
@@ -33296,20 +33295,34 @@ fold_builtin_cpu (tree fndecl, tree *arg
 	  return integer_zero_node;
 	}
 
+      if (isa_names_table[i].feature >= 32)
+	{
+	  tree __cpu_features2_var = make_var_decl (unsigned_type_node,
+						    "__cpu_features2");
+
+	  varpool_node::add (__cpu_features2_var);
+	  field_val = (1U << (isa_names_table[i].feature - 32));
+	  /* Return __cpu_features2 & field_val  */
+	  final = build2 (BIT_AND_EXPR, unsigned_type_node,
+			  __cpu_features2_var,
+			  build_int_cstu (unsigned_type_node, field_val));
+	  return build1 (CONVERT_EXPR, integer_type_node, final);
+	}
+
       field = TYPE_FIELDS (__processor_model_type);
       /* Get the last field, which is __cpu_features.  */
       while (DECL_CHAIN (field))
         field = DECL_CHAIN (field);
 
       /* Get the appropriate field: __cpu_model.__cpu_features  */
-      ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
-		     field, NULL_TREE);
+      ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
+		    field, NULL_TREE);
 
       /* Access the 0th element of __cpu_features array.  */
       array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
 			  integer_zero_node, NULL_TREE, NULL_TREE);
 
-      field_val = (1 << isa_names_table[i].feature);
+      field_val = (1U << (isa_names_table[i].feature));
       /* Return __cpu_model.__cpu_features[0] & field_val  */
       final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
 		      build_int_cstu (unsigned_type_node, field_val));
--- gcc/testsuite/gcc.target/i386/pr84945.c.jj	2018-03-19 16:37:14.667228396 +0100
+++ gcc/testsuite/gcc.target/i386/pr84945.c	2018-03-19 16:36:42.057234231 +0100
@@ -0,0 +1,16 @@
+/* PR target/84945 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+int
+main ()
+{
+  /* AVX512_VNNI instructions are all EVEX encoded, so if
+     __builtin_cpu_supports says avx512vnni is available and avx512f is not,
+     this is a GCC bug.  Ditto for AVX512_BITALG  */
+  if (!__builtin_cpu_supports ("avx512f")
+      && (__builtin_cpu_supports ("avx512vnni")
+	  || __builtin_cpu_supports ("avx512bitalg")))
+    __builtin_abort ();
+  return 0;
+}
--- libgcc/config/i386/cpuinfo.c.jj	2018-03-15 09:10:20.870075051 +0100
+++ libgcc/config/i386/cpuinfo.c	2018-03-19 16:13:25.059481079 +0100
@@ -39,6 +39,13 @@ int __cpu_indicator_init (void)
 
 
 struct __processor_model __cpu_model = { };
+#ifndef SHARED
+/* We want to move away from __cpu_model in libgcc_s.so.1 and the
+   size of __cpu_model is part of ABI.  So, new features that don't
+   fit into __cpu_model.__cpu_features[0] go into extra variables
+   in libgcc.a only, preferrably hidden.  */
+unsigned int __cpu_features2;
+#endif
 
 
 /* Get the specific type of AMD CPU.  */
@@ -231,78 +238,81 @@ get_available_features (unsigned int ecx
   unsigned int ext_level;
 
   unsigned int features = 0;
+  unsigned int features2 = 0;
 
+#define set_feature(f) \
+  if (f < 32) features |= (1U << f); else features2 |= (1U << (f - 32))
   if (edx & bit_CMOV)
-    features |= (1 << FEATURE_CMOV);
+    set_feature (FEATURE_CMOV);
   if (edx & bit_MMX)
-    features |= (1 << FEATURE_MMX);
+    set_feature (FEATURE_MMX);
   if (edx & bit_SSE)
-    features |= (1 << FEATURE_SSE);
+    set_feature (FEATURE_SSE);
   if (edx & bit_SSE2)
-    features |= (1 << FEATURE_SSE2);
+    set_feature (FEATURE_SSE2);
   if (ecx & bit_POPCNT)
-    features |= (1 << FEATURE_POPCNT);
+    set_feature (FEATURE_POPCNT);
   if (ecx & bit_AES)
-    features |= (1 << FEATURE_AES);
+    set_feature (FEATURE_AES);
   if (ecx & bit_PCLMUL)
-    features |= (1 << FEATURE_PCLMUL);
+    set_feature (FEATURE_PCLMUL);
   if (ecx & bit_SSE3)
-    features |= (1 << FEATURE_SSE3);
+    set_feature (FEATURE_SSE3);
   if (ecx & bit_SSSE3)
-    features |= (1 << FEATURE_SSSE3);
+    set_feature (FEATURE_SSSE3);
   if (ecx & bit_SSE4_1)
-    features |= (1 << FEATURE_SSE4_1);
+    set_feature (FEATURE_SSE4_1);
   if (ecx & bit_SSE4_2)
-    features |= (1 << FEATURE_SSE4_2);
+    set_feature (FEATURE_SSE4_2);
   if (ecx & bit_AVX)
-    features |= (1 << FEATURE_AVX);
+    set_feature (FEATURE_AVX);
   if (ecx & bit_FMA)
-    features |= (1 << FEATURE_FMA);
+    set_feature (FEATURE_FMA);
 
   /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
   if (max_cpuid_level >= 7)
     {
       __cpuid_count (7, 0, eax, ebx, ecx, edx);
       if (ebx & bit_BMI)
-        features |= (1 << FEATURE_BMI);
+	set_feature (FEATURE_BMI);
       if (ebx & bit_AVX2)
-	features |= (1 << FEATURE_AVX2);
+	set_feature (FEATURE_AVX2);
       if (ebx & bit_BMI2)
-        features |= (1 << FEATURE_BMI2);
+	set_feature (FEATURE_BMI2);
       if (ebx & bit_AVX512F)
-	features |= (1 << FEATURE_AVX512F);
+	set_feature (FEATURE_AVX512F);
       if (ebx & bit_AVX512VL)
-	features |= (1 << FEATURE_AVX512VL);
+	set_feature (FEATURE_AVX512VL);
       if (ebx & bit_AVX512BW)
-	features |= (1 << FEATURE_AVX512BW);
+	set_feature (FEATURE_AVX512BW);
       if (ebx & bit_AVX512DQ)
-	features |= (1 << FEATURE_AVX512DQ);
+	set_feature (FEATURE_AVX512DQ);
       if (ebx & bit_AVX512CD)
-	features |= (1 << FEATURE_AVX512CD);
+	set_feature (FEATURE_AVX512CD);
       if (ebx & bit_AVX512PF)
-	features |= (1 << FEATURE_AVX512PF);
+	set_feature (FEATURE_AVX512PF);
       if (ebx & bit_AVX512ER)
-	features |= (1 << FEATURE_AVX512ER);
+	set_feature (FEATURE_AVX512ER);
       if (ebx & bit_AVX512IFMA)
-	features |= (1 << FEATURE_AVX512IFMA);
+	set_feature (FEATURE_AVX512IFMA);
       if (ecx & bit_AVX512VBMI)
-	features |= (1 << FEATURE_AVX512VBMI);
+	set_feature (FEATURE_AVX512VBMI);
       if (ecx & bit_AVX512VBMI2)
-	features |= (1 << FEATURE_AVX512VBMI2);
+	set_feature (FEATURE_AVX512VBMI2);
       if (ecx & bit_GFNI)
-	features |= (1 << FEATURE_GFNI);
+	set_feature (FEATURE_GFNI);
       if (ecx & bit_VPCLMULQDQ)
-	features |= (1 << FEATURE_VPCLMULQDQ);
+	set_feature (FEATURE_VPCLMULQDQ);
       if (ecx & bit_AVX512VNNI)
-	features |= (1 << FEATURE_AVX512VNNI);
+	set_feature (FEATURE_AVX512VNNI);
       if (ecx & bit_AVX512BITALG)
-	features |= (1 << FEATURE_AVX512BITALG);
+	set_feature (FEATURE_AVX512BITALG);
       if (ecx & bit_AVX512VPOPCNTDQ)
-	features |= (1 << FEATURE_AVX512VPOPCNTDQ);
+	set_feature (FEATURE_AVX512VPOPCNTDQ);
       if (edx & bit_AVX5124VNNIW)
-	features |= (1 << FEATURE_AVX5124VNNIW);
+	set_feature (FEATURE_AVX5124VNNIW);
       if (edx & bit_AVX5124FMAPS)
-	features |= (1 << FEATURE_AVX5124FMAPS);
+	set_feature (FEATURE_AVX5124FMAPS);
     }
 
   /* Check cpuid level of extended features.  */
@@ -313,14 +323,19 @@ get_available_features (unsigned int ecx
       __cpuid (0x80000001, eax, ebx, ecx, edx);
 
       if (ecx & bit_SSE4a)
-	features |= (1 << FEATURE_SSE4_A);
+	set_feature (FEATURE_SSE4_A);
       if (ecx & bit_FMA4)
-	features |= (1 << FEATURE_FMA4);
+	set_feature (FEATURE_FMA4);
       if (ecx & bit_XOP)
-	features |= (1 << FEATURE_XOP);
+	set_feature (FEATURE_XOP);
     }
     
   __cpu_model.__cpu_features[0] = features;
+#ifndef SHARED
+  __cpu_features2 = features2;
+#else
+  (void) features2;
+#endif
 }
 
 /* A constructor function that is sets __cpu_model and __cpu_features with
--- libgcc/config/i386/cpuinfo.h.jj	2018-03-15 09:10:20.889075049 +0100
+++ libgcc/config/i386/cpuinfo.h	2018-03-19 16:10:34.278510545 +0100
@@ -124,3 +124,4 @@ extern struct __processor_model
   unsigned int __cpu_subtype;
   unsigned int __cpu_features[1];
 } __cpu_model;
+extern unsigned int __cpu_features2;

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]