Support for Runtime CPU type detection via builtins (issue5754058)

Sriraman Tallam tmsriram@google.com
Wed Apr 25 23:38:00 GMT 2012


Hi H.J,

   Could you please review this patch for AVX2 check?

	* config/i386/i386-cpuinfo.c (FEATURE_AVX2): New enum value.
	(get_available_features): New argument. Check for AVX2.
	(__cpu_indicator_init): Modify call to get_available_features
.
	* doc/extend.texi: Document avx2 support.
	* testsuite/gcc.target/i386/builtin_target.c: Check avx2.
	* config/i386/i386.c (fold_builtin_cpu): Add avx2.

Thanks,
-Sri.

On Wed, Apr 25, 2012 at 2:45 PM, Sriraman Tallam <tmsriram@google.com> wrote:
> On Wed, Apr 25, 2012 at 2:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Apr 25, 2012 at 2:25 PM, Sriraman Tallam <tmsriram@google.com> wrote:
>>> On Tue, Apr 24, 2012 at 7:39 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Tue, Apr 24, 2012 at 7:06 PM, Sriraman Tallam <tmsriram@google.com> wrote:
>>>>> On Tue, Apr 24, 2012 at 5:24 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>> On Tue, Apr 24, 2012 at 5:10 PM, Sriraman Tallam <tmsriram@google.com> wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>>   Thanks for all the comments. I have made all the changes as
>>>>>>> mentioned and submiited the patch. Summary of changes made:
>>>>>>>
>>>>>>> * Add support for AVX
>>>>>>> * Fix documentation in extend.texi
>>>>>>> * Make it thread-safe according to H.J.'s comments.
>>>>>>>
>>>>>>> I have attached the patch. Boot-strapped and checked for test parity
>>>>>>> with pristine build.
>>>>>>>
>>>>>>>       * config/i386/i386.c (build_processor_model_struct): New function.
>>>>>>>        (make_var_decl): New function.
>>>>>>>        (fold_builtin_cpu): New function.
>>>>>>>        (ix86_fold_builtin): New function.
>>>>>>>        (make_cpu_type_builtin): New function.
>>>>>>>        (ix86_init_platform_type_builtins): New function.
>>>>>>>        (ix86_expand_builtin): Expand new builtins by folding them.
>>>>>>>        (ix86_init_builtins): Make new builtins to detect CPU type.
>>>>>>>        (TARGET_FOLD_BUILTIN): New macro.
>>>>>>>        (IX86_BUILTIN_CPU_INIT): New enum value.
>>>>>>>        (IX86_BUILTIN_CPU_IS): New enum value.
>>>>>>>        (IX86_BUILTIN_CPU_SUPPORTS): New enum value.
>>>>>>>        * config/i386/i386-builtin-types.def: New function type.
>>>>>>>        * testsuite/gcc.target/builtin_target.c: New testcase.
>>>>>>>        * doc/extend.texi: Document builtins.
>>>>>>>
>>>>>>>        * libgcc/config/i386/i386-cpuinfo.c: New file.
>>>>>>>        * libgcc/config/i386/t-cpuinfo: New file.
>>>>>>>        * libgcc/config.host: Include t-cpuinfo.
>>>>>>>        * libgcc/config/i386/libgcc-glibc.ver: Version symbol __cpu_model.
>>>>>>>
>>>>>>>
>>>>>>
>>>>>> +  /* This function needs to run just once.  */
>>>>>> +  if (__cpu_model.__cpu_vendor)
>>>>>> +    return 0;
>>>>>> +
>>>>>> +  /* Assume cpuid insn present. Run in level 0 to get vendor id. */
>>>>>> +  if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
>>>>>> +    return -1;
>>>>>>
>>>>>> If __get_cpuid_output returns non-zero, it will be called
>>>>>> repeatedly.  I think you should set __cpu_model.__cpu_vendor
>>>>>> to non-zero in this case.
>>>>>
>>>>> Done now.
>>>>>
>>>>> 2012-04-24  Sriraman Tallam  <tmsriram@google.com>
>>>>>
>>>>>        * libgcc/config/i386/i386-cpuinfo.c: Set __cpu_vendor always.
>>>>>
>>>>>
>>>>> Index: libgcc/config/i386/i386-cpuinfo.c
>>>>> ===================================================================
>>>>> --- libgcc/config/i386/i386-cpuinfo.c   (revision 186789)
>>>>> +++ libgcc/config/i386/i386-cpuinfo.c   (working copy)
>>>>> @@ -256,16 +256,25 @@ __cpu_indicator_init (void)
>>>>>
>>>>>   /* Assume cpuid insn present. Run in level 0 to get vendor id. */
>>>>>   if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
>>>>> -    return -1;
>>>>> +    {
>>>>> +      __cpu_model.__cpu_vendor = VENDOR_OTHER;
>>>>> +      return -1;
>>>>> +    }
>>>>>
>>>>>   vendor = ebx;
>>>>>   max_level = eax;
>>>>>
>>>>>   if (max_level < 1)
>>>>> -    return -1;
>>>>> +    {
>>>>> +      __cpu_model.__cpu_vendor = VENDOR_OTHER;
>>>>> +      return -1;
>>>>> +    }
>>>>>
>>>>>   if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx))
>>>>> -    return -1;
>>>>> +    {
>>>>> +      __cpu_model.__cpu_vendor = VENDOR_OTHER;
>>>>> +      return -1;
>>>>> +    }
>>>>>
>>>>>   model = (eax >> 4) & 0x0f;
>>>>>   family = (eax >> 8) & 0x0f;
>>>>>
>>>>>
>>>>> Thanks,
>>>>
>>>> Should you also handle AVX2?
>>>
>>>  I cannot test it and thought will wait till I get access to a
>>> processor with AVX2.
>>>
>>
>> You can download an AVX2 emulator (SDE) from
>>
>> http://software.intel.com/en-us/avx/
>>
>> to test AVX2 binaries.
>
> Ok thanks, I will prepare a patch.
>
> -Sri.
>
>>
>> --
>> H.J.
-------------- next part --------------
	* config/i386/i386-cpuinfo.c (FEATURE_AVX2): New enum value.
	(get_available_features): New argument. Check for AVX2.
	(__cpu_indicator_init): Modify call to get_available_features.
	* doc/extend.texi: Document avx2 support.
	* testsuite/gcc.target/i386/builtin_target.c: Check avx2.
	* config/i386/i386.c (fold_builtin_cpu): Add avx2.




Index: libgcc/config/i386/i386-cpuinfo.c
===================================================================
--- libgcc/config/i386/i386-cpuinfo.c	(revision 186848)
+++ libgcc/config/i386/i386-cpuinfo.c	(working copy)
@@ -75,7 +75,8 @@ enum processor_features
   FEATURE_SSSE3,
   FEATURE_SSE4_1,
   FEATURE_SSE4_2,
-  FEATURE_AVX
+  FEATURE_AVX,
+  FEATURE_AVX2
 };
 
 struct __processor_model
@@ -191,8 +192,11 @@ get_intel_cpu (unsigned int family, unsigned int m
     }
 }	             	
 
+/* ECX and EDX are output of CPUID at level one.  MAX_CPUID_LEVEL is
+   the max possible level of CPUID insn.  */
 static void
-get_available_features (unsigned int ecx, unsigned int edx)
+get_available_features (unsigned int ecx, unsigned int edx,
+			int max_cpuid_level)
 {
   unsigned int features = 0;
 
@@ -217,6 +221,15 @@ static void
   if (ecx & bit_AVX)
     features |= (1 << FEATURE_AVX);
 
+  /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
+  if (max_cpuid_level >= 7)
+    {
+      unsigned int eax, ebx, ecx, edx;
+      __cpuid_count (7, 0, eax, ebx, ecx, edx);
+      if (ebx & bit_AVX2)
+	features |= (1 << FEATURE_AVX2);
+    }
+
   __cpu_model.__cpu_features[0] = features;
 }
 
@@ -296,7 +309,7 @@ __cpu_indicator_init (void)
       /* Get CPU type.  */
       get_intel_cpu (family, model, brand_id);
       /* Find available features. */
-      get_available_features (ecx, edx);
+      get_available_features (ecx, edx, max_level);
       __cpu_model.__cpu_vendor = VENDOR_INTEL;
     }
   else if (vendor == SIG_AMD)
@@ -311,7 +324,7 @@ __cpu_indicator_init (void)
       /* Get CPU type.  */
       get_amd_cpu (family, model);
       /* Find available features. */
-      get_available_features (ecx, edx);
+      get_available_features (ecx, edx, max_level);
       __cpu_model.__cpu_vendor = VENDOR_AMD;
     }
   else
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi	(revision 186848)
+++ gcc/doc/extend.texi	(working copy)
@@ -9541,6 +9541,8 @@ SSE4.1 instructions.
 SSE4.2 instructions.
 @item avx
 AVX instructions.
+@item avx2
+AVX2 instructions.
 @end table
 
 Here is an example:
Index: gcc/testsuite/gcc.target/i386/builtin_target.c
===================================================================
--- gcc/testsuite/gcc.target/i386/builtin_target.c	(revision 186848)
+++ gcc/testsuite/gcc.target/i386/builtin_target.c	(working copy)
@@ -29,6 +29,8 @@ fn1 ()
 
   assert (__builtin_cpu_supports ("avx") >= 0);
 
+  assert (__builtin_cpu_supports ("avx2") >= 0);
+
   /* Check CPU type.  */
   assert (__builtin_cpu_is ("amd") >= 0);
 
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 186848)
+++ gcc/config/i386/i386.c	(working copy)
@@ -27763,6 +27763,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
     F_SSE4_1,
     F_SSE4_2,
     F_AVX,
+    F_AVX2,
     F_MAX
   };
 
@@ -27830,7 +27831,8 @@ fold_builtin_cpu (tree fndecl, tree *args)
       {"ssse3",  F_SSSE3},
       {"sse4.1", F_SSE4_1},
       {"sse4.2", F_SSE4_2},
-      {"avx",    F_AVX}
+      {"avx",    F_AVX},
+      {"avx2",   F_AVX2}
     };
 
   static tree __processor_model_type = NULL_TREE;


More information about the Gcc-patches mailing list