This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix __builtin_cpu_supports (PR target/84945)
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: Kirill Yukhin <kirill dot yukhin at gmail dot com>, "H. J. Lu" <hjl dot tools at gmail dot com>, "Koval, Julia" <julia dot koval at intel dot com>, "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 20 Mar 2018 09:03:39 +0100
- Subject: Re: [PATCH] Fix __builtin_cpu_supports (PR target/84945)
- References: <20180319193556.GO8577@tucnak>
On Mon, Mar 19, 2018 at 8:35 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As mentioned in the PR, this is another case where we've run into too many
> separate feature bits issues on x86. unfortunately in this case it affects
> ABI. __cpu_model variable contains just 32 bits for the features, and we
> now need 4 extra features that won't fit in there. The current code just
> invokes in the compiler as well as runtime initialization.
>
> While on {x86_64,i?86}-linux __cpu_model is exported from libgcc_s.so.1
> only for backwards compatibility and new code is always using a hidden
> copy in each DSO or binary from libgcc.a, that is not the case on other
> x86 targets. For a Linux only solution, we could just grow up __cpu_model
> except for the one in libgcc_s.so.1 (i.e. #ifdef SHARED) and as long as GCC
> 8+ libgcc.a would be used for linking of __builtin_cpu_supports code from
> both old and new gcc things would just work. For other targets, as
> __cpu_model is exported data object, binaries could have copy relocations
> against it, so we really can't change the size.
>
> As the preferred way is to put it into libgcc.a only, this patch just puts
> the overflow features into a new separate variable which is not put into
> libgcc_s.so.1 at all, which will force use of the libgcc.a copy for
> at least __builtin_cpu_supports calls that ask for the new features.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux (on Haswell-E, don't
> have access to any of the CPUs with the new features), ok for trunk?
>
> 2018-03-19 Jakub Jelinek <jakub@redhat.com>
>
> PR target/84945
> * config/i386/i386.c (fold_builtin_cpu): For features above 31
> use __cpu_features2 variable instead of __cpu_model.__cpu_features[0].
> Use 1U instead of 1. Formatting fixes.
>
> * gcc.target/i386/pr84945.c: New test.
>
> * config/i386/cpuinfo.h (__cpu_features2): Declare.
> * config/i386/cpuinfo.c (__cpu_features2): New variable for
> ifndef SHARED only.
> (set_feature): Define.
> (get_available_features): Use set_feature macro. Set __cpu_features2
> to the second word of features ifndef SHARED.
LGTM, with a couple of nits.
Thanks,
Uros.
> --- gcc/config/i386/i386.c.jj 2018-03-17 12:11:39.954436461 +0100
> +++ gcc/config/i386/i386.c 2018-03-19 16:36:55.565231809 +0100
> @@ -33265,8 +33264,8 @@ fold_builtin_cpu (tree fndecl, tree *arg
> }
>
> /* Get the appropriate field in __cpu_model. */
> - ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
> - field, NULL_TREE);
> + ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
> + field, NULL_TREE);
>
> /* Check the value. */
> final = build2 (EQ_EXPR, unsigned_type_node, ref,
> @@ -33296,20 +33295,34 @@ fold_builtin_cpu (tree fndecl, tree *arg
> return integer_zero_node;
> }
>
> + if (isa_names_table[i].feature >= 32)
> + {
> + tree __cpu_features2_var = make_var_decl (unsigned_type_node,
> + "__cpu_features2");
> +
> + varpool_node::add (__cpu_features2_var);
> + field_val = (1U << (isa_names_table[i].feature - 32));
> + /* Return __cpu_features2 & field_val */
> + final = build2 (BIT_AND_EXPR, unsigned_type_node,
> + __cpu_features2_var,
> + build_int_cstu (unsigned_type_node, field_val));
> + return build1 (CONVERT_EXPR, integer_type_node, final);
> + }
> +
> field = TYPE_FIELDS (__processor_model_type);
> /* Get the last field, which is __cpu_features. */
> while (DECL_CHAIN (field))
> field = DECL_CHAIN (field);
>
> /* Get the appropriate field: __cpu_model.__cpu_features */
> - ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
> - field, NULL_TREE);
> + ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
> + field, NULL_TREE);
>
> /* Access the 0th element of __cpu_features array. */
> array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
> integer_zero_node, NULL_TREE, NULL_TREE);
>
> - field_val = (1 << isa_names_table[i].feature);
> + field_val = (1U << (isa_names_table[i].feature));
Parenthesis above are not needed.
> /* Return __cpu_model.__cpu_features[0] & field_val */
> final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> build_int_cstu (unsigned_type_node, field_val));
> --- gcc/testsuite/gcc.target/i386/pr84945.c.jj 2018-03-19 16:37:14.667228396 +0100
> +++ gcc/testsuite/gcc.target/i386/pr84945.c 2018-03-19 16:36:42.057234231 +0100
> @@ -0,0 +1,16 @@
> +/* PR target/84945 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +int
> +main ()
> +{
> + /* AVX512_VNNI instructions are all EVEX encoded, so if
> + __builtin_cpu_supports says avx512vnni is available and avx512f is not,
> + this is a GCC bug. Ditto for AVX512_BITALG */
> + if (!__builtin_cpu_supports ("avx512f")
> + && (__builtin_cpu_supports ("avx512vnni")
> + || __builtin_cpu_supports ("avx512bitalg")))
> + __builtin_abort ();
> + return 0;
> +}
> --- libgcc/config/i386/cpuinfo.c.jj 2018-03-15 09:10:20.870075051 +0100
> +++ libgcc/config/i386/cpuinfo.c 2018-03-19 16:13:25.059481079 +0100
> @@ -39,6 +39,13 @@ int __cpu_indicator_init (void)
>
>
> struct __processor_model __cpu_model = { };
> +#ifndef SHARED
> +/* We want to move away from __cpu_model in libgcc_s.so.1 and the
> + size of __cpu_model is part of ABI. So, new features that don't
> + fit into __cpu_model.__cpu_features[0] go into extra variables
> + in libgcc.a only, preferrably hidden. */
> +unsigned int __cpu_features2;
> +#endif
>
>
> /* Get the specific type of AMD CPU. */
> @@ -231,78 +238,81 @@ get_available_features (unsigned int ecx
> unsigned int ext_level;
>
> unsigned int features = 0;
> + unsigned int features2 = 0;
>
> +#define set_feature(f) \
> + if (f < 32) features |= (1U << f); else features2 |= (1U << (f - 32))
Please add some vertical space here.
> if (edx & bit_CMOV)
> - features |= (1 << FEATURE_CMOV);
> + set_feature (FEATURE_CMOV);
> if (edx & bit_MMX)
> - features |= (1 << FEATURE_MMX);
> + set_feature (FEATURE_MMX);
> if (edx & bit_SSE)
> - features |= (1 << FEATURE_SSE);
> + set_feature (FEATURE_SSE);
> if (edx & bit_SSE2)
> - features |= (1 << FEATURE_SSE2);
> + set_feature (FEATURE_SSE2);
> if (ecx & bit_POPCNT)
> - features |= (1 << FEATURE_POPCNT);
> + set_feature (FEATURE_POPCNT);
> if (ecx & bit_AES)
> - features |= (1 << FEATURE_AES);
> + set_feature (FEATURE_AES);
> if (ecx & bit_PCLMUL)
> - features |= (1 << FEATURE_PCLMUL);
> + set_feature (FEATURE_PCLMUL);
> if (ecx & bit_SSE3)
> - features |= (1 << FEATURE_SSE3);
> + set_feature (FEATURE_SSE3);
> if (ecx & bit_SSSE3)
> - features |= (1 << FEATURE_SSSE3);
> + set_feature (FEATURE_SSSE3);
> if (ecx & bit_SSE4_1)
> - features |= (1 << FEATURE_SSE4_1);
> + set_feature (FEATURE_SSE4_1);
> if (ecx & bit_SSE4_2)
> - features |= (1 << FEATURE_SSE4_2);
> + set_feature (FEATURE_SSE4_2);
> if (ecx & bit_AVX)
> - features |= (1 << FEATURE_AVX);
> + set_feature (FEATURE_AVX);
> if (ecx & bit_FMA)
> - features |= (1 << FEATURE_FMA);
> + set_feature (FEATURE_FMA);
>
> /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
> if (max_cpuid_level >= 7)
> {
> __cpuid_count (7, 0, eax, ebx, ecx, edx);
> if (ebx & bit_BMI)
> - features |= (1 << FEATURE_BMI);
> + set_feature (FEATURE_BMI);
> if (ebx & bit_AVX2)
> - features |= (1 << FEATURE_AVX2);
> + set_feature (FEATURE_AVX2);
> if (ebx & bit_BMI2)
> - features |= (1 << FEATURE_BMI2);
> + set_feature (FEATURE_BMI2);
> if (ebx & bit_AVX512F)
> - features |= (1 << FEATURE_AVX512F);
> + set_feature (FEATURE_AVX512F);
> if (ebx & bit_AVX512VL)
> - features |= (1 << FEATURE_AVX512VL);
> + set_feature (FEATURE_AVX512VL);
> if (ebx & bit_AVX512BW)
> - features |= (1 << FEATURE_AVX512BW);
> + set_feature (FEATURE_AVX512BW);
> if (ebx & bit_AVX512DQ)
> - features |= (1 << FEATURE_AVX512DQ);
> + set_feature (FEATURE_AVX512DQ);
> if (ebx & bit_AVX512CD)
> - features |= (1 << FEATURE_AVX512CD);
> + set_feature (FEATURE_AVX512CD);
> if (ebx & bit_AVX512PF)
> - features |= (1 << FEATURE_AVX512PF);
> + set_feature (FEATURE_AVX512PF);
> if (ebx & bit_AVX512ER)
> - features |= (1 << FEATURE_AVX512ER);
> + set_feature (FEATURE_AVX512ER);
> if (ebx & bit_AVX512IFMA)
> - features |= (1 << FEATURE_AVX512IFMA);
> + set_feature (FEATURE_AVX512IFMA);
> if (ecx & bit_AVX512VBMI)
> - features |= (1 << FEATURE_AVX512VBMI);
> + set_feature (FEATURE_AVX512VBMI);
> if (ecx & bit_AVX512VBMI2)
> - features |= (1 << FEATURE_AVX512VBMI2);
> + set_feature (FEATURE_AVX512VBMI2);
> if (ecx & bit_GFNI)
> - features |= (1 << FEATURE_GFNI);
> + set_feature (FEATURE_GFNI);
> if (ecx & bit_VPCLMULQDQ)
> - features |= (1 << FEATURE_VPCLMULQDQ);
> + set_feature (FEATURE_VPCLMULQDQ);
> if (ecx & bit_AVX512VNNI)
> - features |= (1 << FEATURE_AVX512VNNI);
> + set_feature (FEATURE_AVX512VNNI);
> if (ecx & bit_AVX512BITALG)
> - features |= (1 << FEATURE_AVX512BITALG);
> + set_feature (FEATURE_AVX512BITALG);
> if (ecx & bit_AVX512VPOPCNTDQ)
> - features |= (1 << FEATURE_AVX512VPOPCNTDQ);
> + set_feature (FEATURE_AVX512VPOPCNTDQ);
> if (edx & bit_AVX5124VNNIW)
> - features |= (1 << FEATURE_AVX5124VNNIW);
> + set_feature (FEATURE_AVX5124VNNIW);
> if (edx & bit_AVX5124FMAPS)
> - features |= (1 << FEATURE_AVX5124FMAPS);
> + set_feature (FEATURE_AVX5124FMAPS);
> }
>
> /* Check cpuid level of extended features. */
> @@ -313,14 +323,19 @@ get_available_features (unsigned int ecx
> __cpuid (0x80000001, eax, ebx, ecx, edx);
>
> if (ecx & bit_SSE4a)
> - features |= (1 << FEATURE_SSE4_A);
> + set_feature (FEATURE_SSE4_A);
> if (ecx & bit_FMA4)
> - features |= (1 << FEATURE_FMA4);
> + set_feature (FEATURE_FMA4);
> if (ecx & bit_XOP)
> - features |= (1 << FEATURE_XOP);
> + set_feature (FEATURE_XOP);
> }
>
> __cpu_model.__cpu_features[0] = features;
> +#ifndef SHARED
> + __cpu_features2 = features2;
> +#else
> + (void) features2;
> +#endif
> }
>
> /* A constructor function that is sets __cpu_model and __cpu_features with
> --- libgcc/config/i386/cpuinfo.h.jj 2018-03-15 09:10:20.889075049 +0100
> +++ libgcc/config/i386/cpuinfo.h 2018-03-19 16:10:34.278510545 +0100
> @@ -124,3 +124,4 @@ extern struct __processor_model
> unsigned int __cpu_subtype;
> unsigned int __cpu_features[1];
> } __cpu_model;
> +extern unsigned int __cpu_features2;
>
> Jakub