This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix __builtin_cpu_supports (PR target/84945)


On Mon, Mar 19, 2018 at 8:35 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As mentioned in the PR, this is another case where we've run into too many
> separate feature bits issues on x86. unfortunately in this case it affects
> ABI.  __cpu_model variable contains just 32 bits for the features, and we
> now need 4 extra features that won't fit in there.  The current code just
> invokes in the compiler as well as runtime initialization.
>
> While on {x86_64,i?86}-linux __cpu_model is exported from libgcc_s.so.1
> only for backwards compatibility and new code is always using a hidden
> copy in each DSO or binary from libgcc.a, that is not the case on other
> x86 targets.  For a Linux only solution, we could just grow up __cpu_model
> except for the one in libgcc_s.so.1 (i.e. #ifdef SHARED) and as long as GCC
> 8+ libgcc.a would be used for linking of __builtin_cpu_supports code from
> both old and new gcc things would just work.  For other targets, as
> __cpu_model is exported data object, binaries could have copy relocations
> against it, so we really can't change the size.
>
> As the preferred way is to put it into libgcc.a only, this patch just puts
> the overflow features into a new separate variable which is not put into
> libgcc_s.so.1 at all, which will force use of the libgcc.a copy for
> at least __builtin_cpu_supports calls that ask for the new features.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux (on Haswell-E, don't
> have access to any of the CPUs with the new features), ok for trunk?
>
> 2018-03-19  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/84945
>         * config/i386/i386.c (fold_builtin_cpu): For features above 31
>         use __cpu_features2 variable instead of __cpu_model.__cpu_features[0].
>         Use 1U instead of 1.  Formatting fixes.
>
>         * gcc.target/i386/pr84945.c: New test.
>
>         * config/i386/cpuinfo.h (__cpu_features2): Declare.
>         * config/i386/cpuinfo.c (__cpu_features2): New variable for
>         ifndef SHARED only.
>         (set_feature): Define.
>         (get_available_features): Use set_feature macro.  Set __cpu_features2
>         to the second word of features ifndef SHARED.

LGTM, with a couple of nits.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2018-03-17 12:11:39.954436461 +0100
> +++ gcc/config/i386/i386.c      2018-03-19 16:36:55.565231809 +0100
> @@ -33265,8 +33264,8 @@ fold_builtin_cpu (tree fndecl, tree *arg
>         }
>
>        /* Get the appropriate field in __cpu_model.  */
> -      ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
> -                    field, NULL_TREE);
> +      ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
> +                   field, NULL_TREE);
>
>        /* Check the value.  */
>        final = build2 (EQ_EXPR, unsigned_type_node, ref,
> @@ -33296,20 +33295,34 @@ fold_builtin_cpu (tree fndecl, tree *arg
>           return integer_zero_node;
>         }
>
> +      if (isa_names_table[i].feature >= 32)
> +       {
> +         tree __cpu_features2_var = make_var_decl (unsigned_type_node,
> +                                                   "__cpu_features2");
> +
> +         varpool_node::add (__cpu_features2_var);
> +         field_val = (1U << (isa_names_table[i].feature - 32));
> +         /* Return __cpu_features2 & field_val  */
> +         final = build2 (BIT_AND_EXPR, unsigned_type_node,
> +                         __cpu_features2_var,
> +                         build_int_cstu (unsigned_type_node, field_val));
> +         return build1 (CONVERT_EXPR, integer_type_node, final);
> +       }
> +
>        field = TYPE_FIELDS (__processor_model_type);
>        /* Get the last field, which is __cpu_features.  */
>        while (DECL_CHAIN (field))
>          field = DECL_CHAIN (field);
>
>        /* Get the appropriate field: __cpu_model.__cpu_features  */
> -      ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
> -                    field, NULL_TREE);
> +      ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
> +                   field, NULL_TREE);
>
>        /* Access the 0th element of __cpu_features array.  */
>        array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
>                           integer_zero_node, NULL_TREE, NULL_TREE);
>
> -      field_val = (1 << isa_names_table[i].feature);
> +      field_val = (1U << (isa_names_table[i].feature));

Parenthesis above are not needed.

>        /* Return __cpu_model.__cpu_features[0] & field_val  */
>        final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
>                       build_int_cstu (unsigned_type_node, field_val));
> --- gcc/testsuite/gcc.target/i386/pr84945.c.jj  2018-03-19 16:37:14.667228396 +0100
> +++ gcc/testsuite/gcc.target/i386/pr84945.c     2018-03-19 16:36:42.057234231 +0100
> @@ -0,0 +1,16 @@
> +/* PR target/84945 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +int
> +main ()
> +{
> +  /* AVX512_VNNI instructions are all EVEX encoded, so if
> +     __builtin_cpu_supports says avx512vnni is available and avx512f is not,
> +     this is a GCC bug.  Ditto for AVX512_BITALG  */
> +  if (!__builtin_cpu_supports ("avx512f")
> +      && (__builtin_cpu_supports ("avx512vnni")
> +         || __builtin_cpu_supports ("avx512bitalg")))
> +    __builtin_abort ();
> +  return 0;
> +}
> --- libgcc/config/i386/cpuinfo.c.jj     2018-03-15 09:10:20.870075051 +0100
> +++ libgcc/config/i386/cpuinfo.c        2018-03-19 16:13:25.059481079 +0100
> @@ -39,6 +39,13 @@ int __cpu_indicator_init (void)
>
>
>  struct __processor_model __cpu_model = { };
> +#ifndef SHARED
> +/* We want to move away from __cpu_model in libgcc_s.so.1 and the
> +   size of __cpu_model is part of ABI.  So, new features that don't
> +   fit into __cpu_model.__cpu_features[0] go into extra variables
> +   in libgcc.a only, preferrably hidden.  */
> +unsigned int __cpu_features2;
> +#endif
>
>
>  /* Get the specific type of AMD CPU.  */
> @@ -231,78 +238,81 @@ get_available_features (unsigned int ecx
>    unsigned int ext_level;
>
>    unsigned int features = 0;
> +  unsigned int features2 = 0;
>
> +#define set_feature(f) \
> +  if (f < 32) features |= (1U << f); else features2 |= (1U << (f - 32))

Please add some vertical space here.

>    if (edx & bit_CMOV)
> -    features |= (1 << FEATURE_CMOV);
> +    set_feature (FEATURE_CMOV);
>    if (edx & bit_MMX)
> -    features |= (1 << FEATURE_MMX);
> +    set_feature (FEATURE_MMX);
>    if (edx & bit_SSE)
> -    features |= (1 << FEATURE_SSE);
> +    set_feature (FEATURE_SSE);
>    if (edx & bit_SSE2)
> -    features |= (1 << FEATURE_SSE2);
> +    set_feature (FEATURE_SSE2);
>    if (ecx & bit_POPCNT)
> -    features |= (1 << FEATURE_POPCNT);
> +    set_feature (FEATURE_POPCNT);
>    if (ecx & bit_AES)
> -    features |= (1 << FEATURE_AES);
> +    set_feature (FEATURE_AES);
>    if (ecx & bit_PCLMUL)
> -    features |= (1 << FEATURE_PCLMUL);
> +    set_feature (FEATURE_PCLMUL);
>    if (ecx & bit_SSE3)
> -    features |= (1 << FEATURE_SSE3);
> +    set_feature (FEATURE_SSE3);
>    if (ecx & bit_SSSE3)
> -    features |= (1 << FEATURE_SSSE3);
> +    set_feature (FEATURE_SSSE3);
>    if (ecx & bit_SSE4_1)
> -    features |= (1 << FEATURE_SSE4_1);
> +    set_feature (FEATURE_SSE4_1);
>    if (ecx & bit_SSE4_2)
> -    features |= (1 << FEATURE_SSE4_2);
> +    set_feature (FEATURE_SSE4_2);
>    if (ecx & bit_AVX)
> -    features |= (1 << FEATURE_AVX);
> +    set_feature (FEATURE_AVX);
>    if (ecx & bit_FMA)
> -    features |= (1 << FEATURE_FMA);
> +    set_feature (FEATURE_FMA);
>
>    /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
>    if (max_cpuid_level >= 7)
>      {
>        __cpuid_count (7, 0, eax, ebx, ecx, edx);
>        if (ebx & bit_BMI)
> -        features |= (1 << FEATURE_BMI);
> +       set_feature (FEATURE_BMI);
>        if (ebx & bit_AVX2)
> -       features |= (1 << FEATURE_AVX2);
> +       set_feature (FEATURE_AVX2);
>        if (ebx & bit_BMI2)
> -        features |= (1 << FEATURE_BMI2);
> +       set_feature (FEATURE_BMI2);
>        if (ebx & bit_AVX512F)
> -       features |= (1 << FEATURE_AVX512F);
> +       set_feature (FEATURE_AVX512F);
>        if (ebx & bit_AVX512VL)
> -       features |= (1 << FEATURE_AVX512VL);
> +       set_feature (FEATURE_AVX512VL);
>        if (ebx & bit_AVX512BW)
> -       features |= (1 << FEATURE_AVX512BW);
> +       set_feature (FEATURE_AVX512BW);
>        if (ebx & bit_AVX512DQ)
> -       features |= (1 << FEATURE_AVX512DQ);
> +       set_feature (FEATURE_AVX512DQ);
>        if (ebx & bit_AVX512CD)
> -       features |= (1 << FEATURE_AVX512CD);
> +       set_feature (FEATURE_AVX512CD);
>        if (ebx & bit_AVX512PF)
> -       features |= (1 << FEATURE_AVX512PF);
> +       set_feature (FEATURE_AVX512PF);
>        if (ebx & bit_AVX512ER)
> -       features |= (1 << FEATURE_AVX512ER);
> +       set_feature (FEATURE_AVX512ER);
>        if (ebx & bit_AVX512IFMA)
> -       features |= (1 << FEATURE_AVX512IFMA);
> +       set_feature (FEATURE_AVX512IFMA);
>        if (ecx & bit_AVX512VBMI)
> -       features |= (1 << FEATURE_AVX512VBMI);
> +       set_feature (FEATURE_AVX512VBMI);
>        if (ecx & bit_AVX512VBMI2)
> -       features |= (1 << FEATURE_AVX512VBMI2);
> +       set_feature (FEATURE_AVX512VBMI2);
>        if (ecx & bit_GFNI)
> -       features |= (1 << FEATURE_GFNI);
> +       set_feature (FEATURE_GFNI);
>        if (ecx & bit_VPCLMULQDQ)
> -       features |= (1 << FEATURE_VPCLMULQDQ);
> +       set_feature (FEATURE_VPCLMULQDQ);
>        if (ecx & bit_AVX512VNNI)
> -       features |= (1 << FEATURE_AVX512VNNI);
> +       set_feature (FEATURE_AVX512VNNI);
>        if (ecx & bit_AVX512BITALG)
> -       features |= (1 << FEATURE_AVX512BITALG);
> +       set_feature (FEATURE_AVX512BITALG);
>        if (ecx & bit_AVX512VPOPCNTDQ)
> -       features |= (1 << FEATURE_AVX512VPOPCNTDQ);
> +       set_feature (FEATURE_AVX512VPOPCNTDQ);
>        if (edx & bit_AVX5124VNNIW)
> -       features |= (1 << FEATURE_AVX5124VNNIW);
> +       set_feature (FEATURE_AVX5124VNNIW);
>        if (edx & bit_AVX5124FMAPS)
> -       features |= (1 << FEATURE_AVX5124FMAPS);
> +       set_feature (FEATURE_AVX5124FMAPS);
>      }
>
>    /* Check cpuid level of extended features.  */
> @@ -313,14 +323,19 @@ get_available_features (unsigned int ecx
>        __cpuid (0x80000001, eax, ebx, ecx, edx);
>
>        if (ecx & bit_SSE4a)
> -       features |= (1 << FEATURE_SSE4_A);
> +       set_feature (FEATURE_SSE4_A);
>        if (ecx & bit_FMA4)
> -       features |= (1 << FEATURE_FMA4);
> +       set_feature (FEATURE_FMA4);
>        if (ecx & bit_XOP)
> -       features |= (1 << FEATURE_XOP);
> +       set_feature (FEATURE_XOP);
>      }
>
>    __cpu_model.__cpu_features[0] = features;
> +#ifndef SHARED
> +  __cpu_features2 = features2;
> +#else
> +  (void) features2;
> +#endif
>  }
>
>  /* A constructor function that is sets __cpu_model and __cpu_features with
> --- libgcc/config/i386/cpuinfo.h.jj     2018-03-15 09:10:20.889075049 +0100
> +++ libgcc/config/i386/cpuinfo.h        2018-03-19 16:10:34.278510545 +0100
> @@ -124,3 +124,4 @@ extern struct __processor_model
>    unsigned int __cpu_subtype;
>    unsigned int __cpu_features[1];
>  } __cpu_model;
> +extern unsigned int __cpu_features2;
>
>         Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]