Support for Runtime CPU type detection via builtins (issue5754058)
Sriraman Tallam
tmsriram@google.com
Wed Apr 25 23:38:00 GMT 2012
Hi H.J,
Could you please review this patch for AVX2 check?
* config/i386/i386-cpuinfo.c (FEATURE_AVX2): New enum value.
(get_available_features): New argument. Check for AVX2.
(__cpu_indicator_init): Modify call to get_available_features
.
* doc/extend.texi: Document avx2 support.
* testsuite/gcc.target/i386/builtin_target.c: Check avx2.
* config/i386/i386.c (fold_builtin_cpu): Add avx2.
Thanks,
-Sri.
On Wed, Apr 25, 2012 at 2:45 PM, Sriraman Tallam <tmsriram@google.com> wrote:
> On Wed, Apr 25, 2012 at 2:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Apr 25, 2012 at 2:25 PM, Sriraman Tallam <tmsriram@google.com> wrote:
>>> On Tue, Apr 24, 2012 at 7:39 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Tue, Apr 24, 2012 at 7:06 PM, Sriraman Tallam <tmsriram@google.com> wrote:
>>>>> On Tue, Apr 24, 2012 at 5:24 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>> On Tue, Apr 24, 2012 at 5:10 PM, Sriraman Tallam <tmsriram@google.com> wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> Thanks for all the comments. I have made all the changes as
>>>>>>> mentioned and submiited the patch. Summary of changes made:
>>>>>>>
>>>>>>> * Add support for AVX
>>>>>>> * Fix documentation in extend.texi
>>>>>>> * Make it thread-safe according to H.J.'s comments.
>>>>>>>
>>>>>>> I have attached the patch. Boot-strapped and checked for test parity
>>>>>>> with pristine build.
>>>>>>>
>>>>>>> * config/i386/i386.c (build_processor_model_struct): New function.
>>>>>>> (make_var_decl): New function.
>>>>>>> (fold_builtin_cpu): New function.
>>>>>>> (ix86_fold_builtin): New function.
>>>>>>> (make_cpu_type_builtin): New function.
>>>>>>> (ix86_init_platform_type_builtins): New function.
>>>>>>> (ix86_expand_builtin): Expand new builtins by folding them.
>>>>>>> (ix86_init_builtins): Make new builtins to detect CPU type.
>>>>>>> (TARGET_FOLD_BUILTIN): New macro.
>>>>>>> (IX86_BUILTIN_CPU_INIT): New enum value.
>>>>>>> (IX86_BUILTIN_CPU_IS): New enum value.
>>>>>>> (IX86_BUILTIN_CPU_SUPPORTS): New enum value.
>>>>>>> * config/i386/i386-builtin-types.def: New function type.
>>>>>>> * testsuite/gcc.target/builtin_target.c: New testcase.
>>>>>>> * doc/extend.texi: Document builtins.
>>>>>>>
>>>>>>> * libgcc/config/i386/i386-cpuinfo.c: New file.
>>>>>>> * libgcc/config/i386/t-cpuinfo: New file.
>>>>>>> * libgcc/config.host: Include t-cpuinfo.
>>>>>>> * libgcc/config/i386/libgcc-glibc.ver: Version symbol __cpu_model.
>>>>>>>
>>>>>>>
>>>>>>
>>>>>> + /* This function needs to run just once. */
>>>>>> + if (__cpu_model.__cpu_vendor)
>>>>>> + return 0;
>>>>>> +
>>>>>> + /* Assume cpuid insn present. Run in level 0 to get vendor id. */
>>>>>> + if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
>>>>>> + return -1;
>>>>>>
>>>>>> If __get_cpuid_output returns non-zero, it will be called
>>>>>> repeatedly. I think you should set __cpu_model.__cpu_vendor
>>>>>> to non-zero in this case.
>>>>>
>>>>> Done now.
>>>>>
>>>>> 2012-04-24 Sriraman Tallam <tmsriram@google.com>
>>>>>
>>>>> * libgcc/config/i386/i386-cpuinfo.c: Set __cpu_vendor always.
>>>>>
>>>>>
>>>>> Index: libgcc/config/i386/i386-cpuinfo.c
>>>>> ===================================================================
>>>>> --- libgcc/config/i386/i386-cpuinfo.c (revision 186789)
>>>>> +++ libgcc/config/i386/i386-cpuinfo.c (working copy)
>>>>> @@ -256,16 +256,25 @@ __cpu_indicator_init (void)
>>>>>
>>>>> /* Assume cpuid insn present. Run in level 0 to get vendor id. */
>>>>> if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
>>>>> - return -1;
>>>>> + {
>>>>> + __cpu_model.__cpu_vendor = VENDOR_OTHER;
>>>>> + return -1;
>>>>> + }
>>>>>
>>>>> vendor = ebx;
>>>>> max_level = eax;
>>>>>
>>>>> if (max_level < 1)
>>>>> - return -1;
>>>>> + {
>>>>> + __cpu_model.__cpu_vendor = VENDOR_OTHER;
>>>>> + return -1;
>>>>> + }
>>>>>
>>>>> if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx))
>>>>> - return -1;
>>>>> + {
>>>>> + __cpu_model.__cpu_vendor = VENDOR_OTHER;
>>>>> + return -1;
>>>>> + }
>>>>>
>>>>> model = (eax >> 4) & 0x0f;
>>>>> family = (eax >> 8) & 0x0f;
>>>>>
>>>>>
>>>>> Thanks,
>>>>
>>>> Should you also handle AVX2?
>>>
>>> I cannot test it and thought will wait till I get access to a
>>> processor with AVX2.
>>>
>>
>> You can download an AVX2 emulator (SDE) from
>>
>> http://software.intel.com/en-us/avx/
>>
>> to test AVX2 binaries.
>
> Ok thanks, I will prepare a patch.
>
> -Sri.
>
>>
>> --
>> H.J.
-------------- next part --------------
* config/i386/i386-cpuinfo.c (FEATURE_AVX2): New enum value.
(get_available_features): New argument. Check for AVX2.
(__cpu_indicator_init): Modify call to get_available_features.
* doc/extend.texi: Document avx2 support.
* testsuite/gcc.target/i386/builtin_target.c: Check avx2.
* config/i386/i386.c (fold_builtin_cpu): Add avx2.
Index: libgcc/config/i386/i386-cpuinfo.c
===================================================================
--- libgcc/config/i386/i386-cpuinfo.c (revision 186848)
+++ libgcc/config/i386/i386-cpuinfo.c (working copy)
@@ -75,7 +75,8 @@ enum processor_features
FEATURE_SSSE3,
FEATURE_SSE4_1,
FEATURE_SSE4_2,
- FEATURE_AVX
+ FEATURE_AVX,
+ FEATURE_AVX2
};
struct __processor_model
@@ -191,8 +192,11 @@ get_intel_cpu (unsigned int family, unsigned int m
}
}
+/* ECX and EDX are output of CPUID at level one. MAX_CPUID_LEVEL is
+ the max possible level of CPUID insn. */
static void
-get_available_features (unsigned int ecx, unsigned int edx)
+get_available_features (unsigned int ecx, unsigned int edx,
+ int max_cpuid_level)
{
unsigned int features = 0;
@@ -217,6 +221,15 @@ static void
if (ecx & bit_AVX)
features |= (1 << FEATURE_AVX);
+ /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
+ if (max_cpuid_level >= 7)
+ {
+ unsigned int eax, ebx, ecx, edx;
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+ if (ebx & bit_AVX2)
+ features |= (1 << FEATURE_AVX2);
+ }
+
__cpu_model.__cpu_features[0] = features;
}
@@ -296,7 +309,7 @@ __cpu_indicator_init (void)
/* Get CPU type. */
get_intel_cpu (family, model, brand_id);
/* Find available features. */
- get_available_features (ecx, edx);
+ get_available_features (ecx, edx, max_level);
__cpu_model.__cpu_vendor = VENDOR_INTEL;
}
else if (vendor == SIG_AMD)
@@ -311,7 +324,7 @@ __cpu_indicator_init (void)
/* Get CPU type. */
get_amd_cpu (family, model);
/* Find available features. */
- get_available_features (ecx, edx);
+ get_available_features (ecx, edx, max_level);
__cpu_model.__cpu_vendor = VENDOR_AMD;
}
else
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi (revision 186848)
+++ gcc/doc/extend.texi (working copy)
@@ -9541,6 +9541,8 @@ SSE4.1 instructions.
SSE4.2 instructions.
@item avx
AVX instructions.
+@item avx2
+AVX2 instructions.
@end table
Here is an example:
Index: gcc/testsuite/gcc.target/i386/builtin_target.c
===================================================================
--- gcc/testsuite/gcc.target/i386/builtin_target.c (revision 186848)
+++ gcc/testsuite/gcc.target/i386/builtin_target.c (working copy)
@@ -29,6 +29,8 @@ fn1 ()
assert (__builtin_cpu_supports ("avx") >= 0);
+ assert (__builtin_cpu_supports ("avx2") >= 0);
+
/* Check CPU type. */
assert (__builtin_cpu_is ("amd") >= 0);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 186848)
+++ gcc/config/i386/i386.c (working copy)
@@ -27763,6 +27763,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
F_SSE4_1,
F_SSE4_2,
F_AVX,
+ F_AVX2,
F_MAX
};
@@ -27830,7 +27831,8 @@ fold_builtin_cpu (tree fndecl, tree *args)
{"ssse3", F_SSSE3},
{"sse4.1", F_SSE4_1},
{"sse4.2", F_SSE4_2},
- {"avx", F_AVX}
+ {"avx", F_AVX},
+ {"avx2", F_AVX2}
};
static tree __processor_model_type = NULL_TREE;
More information about the Gcc-patches
mailing list