[patch 1/3] AMD bdver2 processors - BMI

Quentin Neill quentin.neill.gnu@gmail.com
Mon Oct 18 19:14:00 GMT 2010


On Fri, Oct 15, 2010 at 1:45 PM, Quentin Neill
<quentin.neill.gnu@gmail.com> wrote:
> These patches add support for upcoming bdver2 AMD processors:
> BMI (Bit Manipulation Instructions)
> TBM (Trailing Bit Manipulation)
> FMA3 (three operand FMA) instructions
>
> The public specifications for BMI and TBM are in progress (they are
> today available under NDA).  They will appear in one of the AMD64
> Architecture Programmer's Manual Volumes 3-6.   I can post the
> mnemonics definitions if needed.  The FMA3 specification is documented
> in http://software.intel.com/en-us/avx/
>
>
> 2010-10-15  Quentin Neill  <quentin.neill.gnu@amd.com>
>
> gcc/
>        * config.gcc (i[34567]86-*-*): Include bmiintrin.h.
>        (x86_64-*-*): Likewise.
>
>        * config/i386/cpuid.h: Define BMI bit.
>
>        * config/i386/driver-i386.c (host_detect_local_cpu): Define
>        and set has_bmi.
>
>        * config/i386/i386-builtin-types.def (UINT_FTYPE_UINT_UINT): New.
>        (UINT64_FTYPE_UINT64_UINT64): New.
>
>        * config/i386/i386-c.c (ix86_target_macros_internal): Check
>        isa_flag for BMI.
>
>        * config/i386/i386.c (OPTION_MASK_ISA_BMI_SET): New.
>        (OPTION_MASK_ISA_BMI_UNSET): New.
>        (ix86_handle_option): Handle -mbmi.
>        (isa_opts): Add -mbmi.
>        (enum pta_flags): Add PTA_BMI.
>        (ix86_option_override_internal): Add BMI support.
>        (ix86_valid_target_attribute_inner_p): Handle -mbmi.
>        (IX86_BUILTIN_ANDN32): New for BMI intrinsic.
>        (IX86_BUILTIN_ANDN64): Likewise.
>        (IX86_BUILTIN_BEXTR32): Likewise.
>        (IX86_BUILTIN_BEXTR64): Likewise.
>        (IX86_BUILTIN_BLSI32): Likewise.
>        (IX86_BUILTIN_BLSI64): Likewise.
>        (IX86_BUILTIN_BLSMSK32): Likewise.
>        (IX86_BUILTIN_BLSMSK64): Likewise.
>        (IX86_BUILTIN_BLSR32): Likewise.
>        (IX86_BUILTIN_BLSR64): Likewise.
>        (IX86_BUILTIN_TZCNT16): Likewise.
>        (IX86_BUILTIN_TZCNT32): Likewise.
>        (IX86_BUILTIN_TZCNT64): Likewise.
>        (bdesc_args): Add BMI intrinsics.
>        (ix86_expand_args_builtin): Add BMI specific cases.
>
>        * config/i386/i386.h (TARGET_BMI): New for BMI.
>
>        * config/i386/i386.md (UNSPEC_ANDN): New for BMI.
>        (UNSPEC_BEXTR): Likewise.
>        (UNSPEC_BLSI): Likewise.
>        (UNSPEC_BLSMSK): Likewise.
>        (UNSPEC_BLSR): Likewise.
>        (UNSPEC_TZCNT): Likewise.
>        (bmi_andn<mode>): Likewise.
>        (bmi_bextr<mode>): Likewise.
>        (bmi_blsi<mode>): Likewise.
>        (bmi_blsmsk<mode>): Likewise.
>        (bmi_blsr<mode>): Likewise.
>        (bmi_tzcnt<mode>): Likewise.
>        (bsr_rex64): Likewise.
>
>        * config/i386/i386.opt: Add -mbmi.
>
>        * config/i386/x86intrin.h: Add BMI check and bmiintrin.h.
>
>        * config/i386/bmiintrin.h (__tzcnt_u16): New.
>        (__lzcnt_u16): Likewise.
>        (__bextr_u32): Likewise.
>        (__andn_u32): Likewise.
>        (__tzcnt_u32): Likewise.
>        (__lzcnt_u32): Likewise.
>        (__blsr_u32): Likewise.
>        (__blsmsk_u32): Likewise.
>        (__blsi_u32): Likewise.
>        (__bextr_u64): Likewise.
>        (__andn_u64): Likewise.
>        (__tzcnt_u64): Likewise.
>        (__lzcnt_u64): Likewise.
>        (__blsr_u64): Likewise.
>        (__blsmsk_u64): Likewise.
>        (__blsi_u64): Likewise.
>
>
>        * doc/invoke.texi: Document -mbmi.
>
>        * doc/extend.texi: Document BMI built-in functions.
>
> gcc/testsuite/
>        * g++.dg/other/i386-2.C: Add -mbmi.
>
>        * g++.dg/other/i386-3.C: Likewise.
>
>        * gcc.target/i386/funcspec-5.c: Add bmi and no-bmi targets.
>
>        * gcc.target/i386/funcspec-6.c: Likewise.
>
>        * gcc.target/i386/sse-12.c: Add -mbmi.
>
>        * gcc.target/i386/bmi-1.c: New file.
>
>        * gcc.target/i386/bmi-2.c: Likewise.
>
>        * gcc.target/i386/bmi-3.c: Likewise.
>
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index b353fa0..4034241 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -318,7 +318,7 @@ i[34567]86-*-*)
>                       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
>                       immintrin.h x86intrin.h avxintrin.h xopintrin.h
>                       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
> -                      abmintrin.h"
> +                      abmintrin.h bmiintrin.h"
>        ;;
>  x86_64-*-*)
>        cpu_type=i386
> @@ -329,7 +329,7 @@ x86_64-*-*)
>                       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
>                       immintrin.h x86intrin.h avxintrin.h xopintrin.h
>                       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
> -                      abmintrin.h"
> +                      abmintrin.h bmiintrin.h"
>        need_64bit_hwint=yes
>        ;;
>  ia64-*-*)
> diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
> new file mode 100644
> index 0000000..f4422cd
> --- /dev/null
> +++ b/gcc/config/i386/bmiintrin.h
> @@ -0,0 +1,134 @@
> +/* Copyright (C) 2010 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   GCC is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef _X86INTRIN_H_INCLUDED
> +# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
> +#endif
> +
> +#ifndef __BMI__
> +# error "BMI instruction set not enabled"
> +#endif /* __BMI__ */
> +
> +#ifndef _BMIINTRIN_H_INCLUDED
> +#define _BMIINTRIN_H_INCLUDED
> +
> +extern __inline unsigned short __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzcnt_u16 (unsigned short __X)
> +{
> +  return __builtin_ia32_tzcnt_u16 (__X);
> +}
> +extern __inline unsigned short __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__lzcnt_u16 (unsigned short __X)
> +{
> +  return __builtin_ia32_lzcnt_u16 (__X);
> +}
> +
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__bextr_u32 (unsigned int __X, unsigned int __Y)
> +{
> +  return __builtin_ia32_bextr_u32 (__X, __Y);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__andn_u32 (unsigned int __X, unsigned int __Y)
> +{
> +  return __builtin_ia32_andn_u32 (__X, __Y);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzcnt_u32 (unsigned int __X)
> +{
> +  return __builtin_ia32_tzcnt_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__lzcnt_u32 (unsigned int __X)
> +{
> +  return __builtin_ia32_lzcnt_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsr_u32 (unsigned int __X)
> +{
> +  return __builtin_ia32_blsr_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsmsk_u32 (unsigned int __X)
> +{
> +  return __builtin_ia32_blsmsk_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsi_u32 (unsigned int __X)
> +{
> +  return __builtin_ia32_blsi_u32 (__X);
> +}
> +
> +
> +#ifdef  __x86_64__
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__bextr_u64 (unsigned long long __X, unsigned long long __Y)
> +{
> +  return __builtin_ia32_bextr_u64 (__X, __Y);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__andn_u64 (unsigned long long __X, unsigned long long __Y)
> +{
> +  return __builtin_ia32_andn_u64 (__X, __Y);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzcnt_u64 (unsigned long long __X)
> +{
> +  return __builtin_ia32_tzcnt_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__lzcnt_u64 (unsigned long long __X)
> +{
> +  return __builtin_ia32_lzcnt_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsr_u64 (unsigned long long __X)
> +{
> +  return __builtin_ia32_blsr_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsmsk_u64 (unsigned long long __X)
> +{
> +  return __builtin_ia32_blsmsk_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsi_u64 (unsigned long long __X)
> +{
> +  return __builtin_ia32_blsi_u64 (__X);
> +}
> +#endif /* __x86_64__  */
> +
> +#endif /* _BMIINTRIN_H_INCLUDED */
> diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
> index 11c2f1e..0f1af7f 100644
> --- a/gcc/config/i386/cpuid.h
> +++ b/gcc/config/i386/cpuid.h
> @@ -62,6 +62,7 @@
>
>  /* Extended Features (%eax == 7) */
>  #define bit_FSGSBASE   (1 << 0)
> +#define bit_BMI                (1 << 3)
>
>  #if defined(__i386__) && defined(__PIC__)
>  /* %ebx may be the PIC register.  */
> diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
> index 8a76857..a7d6808 100644
> --- a/gcc/config/i386/driver-i386.c
> +++ b/gcc/config/i386/driver-i386.c
> @@ -397,6 +397,7 @@ const char *host_detect_local_cpu (int argc, const
> char **argv)
>   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
>   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
>   unsigned int has_fma4 = 0, has_xop = 0;
> +  unsigned int has_bmi = 0;
>
>   bool arch;
>
> @@ -467,6 +468,10 @@ const char *host_detect_local_cpu (int argc,
> const char **argv)
>       has_longmode = edx & bit_LM;
>       has_3dnowp = edx & bit_3DNOWP;
>       has_3dnow = edx & bit_3DNOW;
> +
> +      __cpuid (0x7, eax, ebx, ecx, edx);
> +
> +      has_bmi = ebx & bit_BMI;
>     }
>
>   if (!arch)
> diff --git a/gcc/config/i386/i386-builtin-types.def
> b/gcc/config/i386/i386-builtin-types.def
> index 09dd9eb..110b81d 100644
> --- a/gcc/config/i386/i386-builtin-types.def
> +++ b/gcc/config/i386/i386-builtin-types.def
> @@ -145,7 +145,9 @@ DEF_FUNCTION_TYPE (INT64, INT64)
>  DEF_FUNCTION_TYPE (INT64, V2DF)
>  DEF_FUNCTION_TYPE (INT64, V4SF)
>  DEF_FUNCTION_TYPE (UINT64, INT)
> +DEF_FUNCTION_TYPE (UINT, UINT)
>  DEF_FUNCTION_TYPE (UINT16, UINT16)
> +DEF_FUNCTION_TYPE (UINT64, UINT64)
>  DEF_FUNCTION_TYPE (UINT64, PUNSIGNED)
>  DEF_FUNCTION_TYPE (V16QI, PCCHAR)
>  DEF_FUNCTION_TYPE (V16QI, V16QI)
> diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> index 1846efb..e84347c 100644
> --- a/gcc/config/i386/i386-c.c
> +++ b/gcc/config/i386/i386-c.c
> @@ -244,6 +244,8 @@ ix86_target_macros_internal (int isa_flag,
>     def_or_undef (parse_in, "__LWP__");
>   if (isa_flag & OPTION_MASK_ISA_ABM)
>     def_or_undef (parse_in, "__ABM__");
> +  if (isa_flag & OPTION_MASK_ISA_BMI)
> +    def_or_undef (parse_in, "__BMI__");
>   if (isa_flag & OPTION_MASK_ISA_POPCNT)
>     def_or_undef (parse_in, "__POPCNT__");
>   if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 91e3839..e003ee7 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2079,6 +2079,8 @@ static int ix86_isa_flags_explicit;
>  #define OPTION_MASK_ISA_ABM_SET \
>   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
>
> +#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
> +
>  #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
>  #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
>  #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
> @@ -2133,6 +2135,7 @@ static int ix86_isa_flags_explicit;
>  #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
>  #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
>  #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
> +#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
>  #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
>  #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
>  #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
> @@ -2430,6 +2433,19 @@ ix86_handle_option (size_t code, const char
> *arg ATTRIBUTE_UNUSED, int value)
>        }
>       return true;
>
> +    case OPT_mbmi:
> +      if (value)
> +       {
> +         ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
> +         ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
> +       }
> +      else
> +       {
> +         ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
> +         ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
> +       }
> +      return true;
> +
>     case OPT_mpopcnt:
>       if (value)
>        {
> @@ -2598,6 +2614,7 @@ ix86_target_string (int isa, int flags, const
> char *arch, const char *tune,
>     { "-m3dnowa",      OPTION_MASK_ISA_3DNOW_A },
>     { "-mmmx",         OPTION_MASK_ISA_MMX },
>     { "-mabm",         OPTION_MASK_ISA_ABM },
> +    { "-mbmi",         OPTION_MASK_ISA_BMI },
>     { "-mpopcnt",      OPTION_MASK_ISA_POPCNT },
>     { "-mmovbe",       OPTION_MASK_ISA_MOVBE },
>     { "-mcrc32",       OPTION_MASK_ISA_CRC32 },
> @@ -2852,7 +2869,9 @@ ix86_option_override_internal (bool main_args_p)
>       PTA_LWP = 1 << 23,
>       PTA_FSGSBASE = 1 << 24,
>       PTA_RDRND = 1 << 25,
> -      PTA_F16C = 1 << 26
> +      PTA_F16C = 1 << 26,
> +      PTA_BMI = 1 << 27,
> +      /* if this reaches 32, need to widen struct pta flags below */
>     };
>
>   static struct pta
> @@ -3184,6 +3203,9 @@ ix86_option_override_internal (bool main_args_p)
>        if (processor_alias_table[i].flags & PTA_ABM
>            && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
>          ix86_isa_flags |= OPTION_MASK_ISA_ABM;
> +       if (processor_alias_table[i].flags & PTA_BMI
> +           && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
> +         ix86_isa_flags |= OPTION_MASK_ISA_BMI;
>        if (processor_alias_table[i].flags & PTA_CX16
>            && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
>          ix86_isa_flags |= OPTION_MASK_ISA_CX16;
> @@ -3928,6 +3950,7 @@ ix86_valid_target_attribute_inner_p (tree args,
> char *p_strings[])
>     /* isa options */
>     IX86_ATTR_ISA ("3dnow",    OPT_m3dnow),
>     IX86_ATTR_ISA ("abm",      OPT_mabm),
> +    IX86_ATTR_ISA ("bmi",      OPT_mbmi),
>     IX86_ATTR_ISA ("aes",      OPT_maes),
>     IX86_ATTR_ISA ("avx",      OPT_mavx),
>     IX86_ATTR_ISA ("mmx",      OPT_mmmx),
> @@ -22954,6 +22977,21 @@ enum ix86_builtins
>
>   IX86_BUILTIN_CLZS,
>
> +  /* BMI instructions.  */
> +  IX86_BUILTIN_ANDN32,
> +  IX86_BUILTIN_ANDN64,
> +  IX86_BUILTIN_BEXTR32,
> +  IX86_BUILTIN_BEXTR64,
> +  IX86_BUILTIN_BLSI32,
> +  IX86_BUILTIN_BLSI64,
> +  IX86_BUILTIN_BLSMSK32,
> +  IX86_BUILTIN_BLSMSK64,
> +  IX86_BUILTIN_BLSR32,
> +  IX86_BUILTIN_BLSR64,
> +  IX86_BUILTIN_TZCNT16,
> +  IX86_BUILTIN_TZCNT32,
> +  IX86_BUILTIN_TZCNT64,
> +
>   /* FSGSBASE instructions.  */
>   IX86_BUILTIN_RDFSBASE32,
>   IX86_BUILTIN_RDFSBASE64,
> @@ -23893,6 +23931,21 @@ static const struct builtin_description bdesc_args[] =
>
>   { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm,   "__builtin_clzs",
> IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
>
> +  /* BMI */
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_andnsi,
> "__builtin_ia32_andn_u32", IX86_BUILTIN_ANDN32, UNKNOWN, (int)
> UINT_FTYPE_UINT_UINT },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_andndi,
> "__builtin_ia32_andn_u64", IX86_BUILTIN_ANDN64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64_UINT64 },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextrsi,
> "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int)
> UINT_FTYPE_UINT_UINT },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextrdi,
> "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64_UINT64 },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnthi,
> "__builtin_ia32_tzcnt_u16", IX86_BUILTIN_TZCNT16, UNKNOWN, (int)
> UINT16_FTYPE_UINT16 },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcntsi,
> "__builtin_ia32_tzcnt_u32", IX86_BUILTIN_TZCNT32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcntdi,
> "__builtin_ia32_tzcnt_u64", IX86_BUILTIN_TZCNT64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsisi,
> "__builtin_ia32_blsi_u32", IX86_BUILTIN_BLSI32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsidi,
> "__builtin_ia32_blsi_u64", IX86_BUILTIN_BLSI64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsmsksi,
> "__builtin_ia32_blsmsk_u32", IX86_BUILTIN_BLSMSK32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsmskdi,
> "__builtin_ia32_blsmsk_u64", IX86_BUILTIN_BLSMSK64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrsi,
> "__builtin_ia32_blsr_u32", IX86_BUILTIN_BLSR32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrdi,
> "__builtin_ia32_blsr_u64", IX86_BUILTIN_BLSR64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +
>   /* F16C */
>   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps,
> "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)
> V4SF_FTYPE_V8HI },
>   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256,
> "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN,
> (int) V8SF_FTYPE_V8HI },
> @@ -25118,9 +25171,11 @@ ix86_expand_args_builtin (const struct
> builtin_description *d,
>     case FLOAT128_FTYPE_FLOAT128:
>     case FLOAT_FTYPE_FLOAT:
>     case INT_FTYPE_INT:
> +    case UINT_FTYPE_UINT:
>     case UINT64_FTYPE_INT:
>     case UINT16_FTYPE_UINT16:
>     case INT64_FTYPE_INT64:
> +    case UINT64_FTYPE_UINT64:
>     case INT64_FTYPE_V4SF:
>     case INT64_FTYPE_V2DF:
>     case INT_FTYPE_V16QI:
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 719761d..4fba57d 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -59,6 +59,7 @@ see the files COPYING3 and COPYING.RUNTIME
> respectively.  If not, see
>  #define TARGET_LWP     OPTION_ISA_LWP
>  #define TARGET_ROUND   OPTION_ISA_ROUND
>  #define TARGET_ABM     OPTION_ISA_ABM
> +#define TARGET_BMI     OPTION_ISA_BMI
>  #define TARGET_POPCNT  OPTION_ISA_POPCNT
>  #define TARGET_SAHF    OPTION_ISA_SAHF
>  #define TARGET_MOVBE   OPTION_ISA_MOVBE
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index d2ad8b1..967886d 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -229,6 +229,14 @@
>   UNSPEC_VTESTP
>   UNSPEC_VCVTPH2PS
>   UNSPEC_VCVTPS2PH
> +
> +  ;; For BMI support
> +  UNSPEC_ANDN
> +  UNSPEC_BEXTR
> +  UNSPEC_BLSI
> +  UNSPEC_BLSMSK
> +  UNSPEC_BLSR
> +  UNSPEC_TZCNT
>  ])
>
>  (define_c_enum "unspecv" [
> @@ -11851,6 +11859,63 @@
>    (set_attr "type" "bitmanip")
>    (set_attr "mode" "<MODE>")])
>
> +;; BMI instructions.
> +(define_insn "bmi_andn<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
> +                      (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
> +                      UNSPEC_ANDN))]
> +  "TARGET_BMI"
> +  "andn\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_bextr<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
> +                      (match_operand:SWI48 2 "register_operand" "r")]
> +                      UNSPEC_BEXTR))]
> +  "TARGET_BMI"
> +  "bextr\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_blsi<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLSI))]
> +  "TARGET_BMI"
> +  "blsi\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_blsmsk<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLSMSK))]
> +  "TARGET_BMI"
> +  "blsmsk\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_blsr<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLSR))]
> +  "TARGET_BMI"
> +  "blsr\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_tzcnt<mode>"
> +  [(set (match_operand:SWI248 0 "register_operand" "=r")
> +       (unspec:SWI248 [(match_operand:SWI248 1 "nonimmediate_operand" "rm")]
> +               UNSPEC_TZCNT))]
> +  "TARGET_BMI"
> +  "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
>  (define_insn "bsr_rex64"
>   [(set (match_operand:DI 0 "register_operand" "=r")
>        (minus:DI (const_int 63)
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 9c1fe1f..d808804 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -353,6 +353,10 @@ mpopcnt
>  Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) Save
>  Support code generation of popcnt instruction.
>
> +mbmi
> +Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
> +Support BMI built-in functions and code generation
> +
>  mcx16
>  Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save
>  Support code generation of cmpxchg16b instruction.
> diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
> index 29d44dc..9a7366b 100644
> --- a/gcc/config/i386/x86intrin.h
> +++ b/gcc/config/i386/x86intrin.h
> @@ -81,6 +81,10 @@
>  #include <abmintrin.h>
>  #endif
>
> +#ifdef __BMI__
> +#include <bmiintrin.h>
> +#endif
> +
>  #ifdef __POPCNT__
>  #include <popcntintrin.h>
>  #endif
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index fc40d53..a7be54b 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -9351,6 +9351,25 @@ unsigned char __builtin_ia32_lwpins32 (unsigned
> int, unsigned int, unsigned int)
>  unsigned char __builtin_ia32_lwpins64 (unsigned __int64, unsigned
> int, unsigned int)
>  @end smallexample
>
> +The following built-in function is available when @option{-mbmi} is used.
> +All of them generate the machine instruction that is part of the name.
> +@smallexample
> +unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int);
> +unsigned long long __builtin_ia32_bextr_u64 (unsigned long long,
> unsigned long long);
> +unsigned int __builtin_ia32_andn_u32(unsigned int, unsigned int);
> +unsigned long long __builtin_ia32_andn_u64 (unsigned long long,
> unsigned long long);
> +unsigned int __builtin_ia32_tzcnt_u32(unsigned int);
> +unsigned long long __builtin_ia32_tzcnt_u64 (unsigned long long);
> +unsigned int __builtin_ia32_lzcnt_u32(unsigned int);
> +unsigned long long __builtin_ia32_lzcnt_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsr_u32(unsigned int);
> +unsigned long long __builtin_ia32_blsr_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsmsk_u32(unsigned int);
> +unsigned long long __builtin_ia32_blsmsk_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsi_u32(unsigned int);
> +unsigned long long __builtin_ia32_blsi_u64 (unsigned long long);
> +@end smallexample
> +
>  The following built-in functions are available when @option{-m3dnow} is used.
>  All of them generate the machine instruction that is part of the name.
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 776fdd0..071ad27 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -597,7 +597,7 @@ Objective-C and Objective-C++ Dialects}.
>  -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol
>  -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
>  -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfused-madd @gol
> --msse4a -m3dnow -mpopcnt -mabm -mfma4 -mxop -mlwp @gol
> +-msse4a -m3dnow -mpopcnt -mabm -mbmi -mfma4 -mxop -mlwp @gol
>  -mthreads  -mno-align-stringops  -minline-all-stringops @gol
>  -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
>  -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
> @@ -12418,6 +12418,8 @@ preferred alignment to
> @option{-mpreferred-stack-boundary=2}.
>  @itemx -mno-popcnt
>  @itemx -mabm
>  @itemx -mno-abm
> +@itemx -mbmi
> +@itemx -mno-bmi
>  @opindex mmmx
>  @opindex mno-mmx
>  @opindex msse
> @@ -12426,7 +12428,7 @@ preferred alignment to
> @option{-mpreferred-stack-boundary=2}.
>  @opindex mno-3dnow
>  These switches enable or disable the use of instructions in the MMX,
>  SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, FSGSBASE, RDRND,
> -F16C, SSE4A, FMA4, XOP, LWP, ABM or 3DNow!@: extended instruction sets.
> +F16C, SSE4A, FMA4, XOP, LWP, ABM, BMI, or 3DNow!@: extended instruction sets.
>  These extensions are also available as built-in functions: see
>  @ref{X86 Built-in Functions}, for details of the functions enabled and
>  disabled by these switches.
> diff --git a/gcc/testsuite/g++.dg/other/i386-2.C
> b/gcc/testsuite/g++.dg/other/i386-2.C
> index 7297068..f0a382a 100644
> --- a/gcc/testsuite/g++.dg/other/i386-2.C
> +++ b/gcc/testsuite/g++.dg/other/i386-2.C
> @@ -1,8 +1,8 @@
>  /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop
> -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
> +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop
> -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" }
> */
>
>  /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
> -   lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> +   bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
>    -O -pedantic-errors.  */
>
>  #include <x86intrin.h>
> diff --git a/gcc/testsuite/g++.dg/other/i386-3.C
> b/gcc/testsuite/g++.dg/other/i386-3.C
> index 75515ef..4b27372 100644
> --- a/gcc/testsuite/g++.dg/other/i386-3.C
> +++ b/gcc/testsuite/g++.dg/other/i386-3.C
> @@ -1,8 +1,8 @@
>  /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx
> -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" }
> */
> +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx
> -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd
> -mf16c" } */
>
>  /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
> -   lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> +   bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
>    -O -fkeep-inline-functions.  */
>
>  #include <x86intrin.h>
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-1.c
> b/gcc/testsuite/gcc.target/i386/bmi-1.c
> new file mode 100644
> index 0000000..dc964ba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/bmi-1.c
> @@ -0,0 +1,46 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbmi " } */
> +/* { dg-final { scan-assembler "andn\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "tzcntl\[^\\n]*(%|)eax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned int
> +func_andn32 (unsigned int X, unsigned int Y)
> +{
> +  return __andn_u32(X, Y);
> +}
> +
> +unsigned int
> +func_bextr32 (unsigned int X, unsigned int Y)
> +{
> +  return __bextr_u32(X, Y);
> +}
> +
> +unsigned int
> +func_blsi32 (unsigned int X)
> +{
> +  return __blsi_u32(X);
> +}
> +
> +unsigned int
> +func_blsmsk32 (unsigned int X)
> +{
> +  return __blsmsk_u32(X);
> +}
> +
> +unsigned int
> +func_blsr32 (unsigned int X)
> +{
> +  return __blsr_u32(X);
> +}
> +
> +unsigned int
> +func_tzcnt32 (unsigned int X)
> +{
> +  return __tzcnt_u32(X);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-2.c
> b/gcc/testsuite/gcc.target/i386/bmi-2.c
> new file mode 100644
> index 0000000..6250949
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/bmi-2.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mbmi " } */
> +/* { dg-final { scan-assembler "andn\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "tzcntq\[^\\n]*(%|)rax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned long long
> +func_andn64 (unsigned long long X, unsigned long long Y)
> +{
> +  return __andn_u64 (X, Y);
> +}
> +
> +unsigned long long
> +func_bextr64 (unsigned long long X, unsigned long long Y)
> +{
> +  return __bextr_u64 (X, Y);
> +}
> +
> +unsigned long long
> +func_blsi64 (unsigned long long X)
> +{
> +  return __blsi_u64 (X);
> +}
> +
> +unsigned long long
> +func_blsmsk64 (unsigned long long X)
> +{
> +  return __blsmsk_u64 (X);
> +}
> +
> +unsigned long long
> +func_blsr64 (unsigned long long X)
> +{
> +  return __blsr_u64 (X);
> +}
> +
> +unsigned long long
> +func_tzcnt64 (unsigned long long X)
> +{
> +  return __tzcnt_u64 (X);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-3.c
> b/gcc/testsuite/gcc.target/i386/bmi-3.c
> new file mode 100644
> index 0000000..ddc5e0f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/bmi-3.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbmi " } */
> +/* { dg-final { scan-assembler "tzcntw\[^\\n]*(%|)ax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned short
> +func_tzcnt16 (unsigned short X)
> +{
> +  return __tzcnt_u16(X);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> index 34da51c..5e07d85 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> @@ -5,6 +5,7 @@
>
>  extern void test_abm (void)                    __attribute__((__target__("abm")));
>  extern void test_aes (void)                    __attribute__((__target__("aes")));
> +extern void test_bmi (void)                    __attribute__((__target__("bmi")));
>  extern void test_mmx (void)                    __attribute__((__target__("mmx")));
>  extern void test_pclmul (void)                 __attribute__((__target__("pclmul")));
>  extern void test_popcnt (void)                 __attribute__((__target__("popcnt")));
> @@ -21,6 +22,7 @@ extern void test_ssse3
> (void)                  __attribute__((__target__("ssse3")));
>
>  extern void test_no_abm (void)                 __attribute__((__target__("no-abm")));
>  extern void test_no_aes (void)                 __attribute__((__target__("no-aes")));
> +extern void test_no_bmi (void)                 __attribute__((__target__("no-bmi")));
>  extern void test_no_mmx (void)                 __attribute__((__target__("no-mmx")));
>  extern void test_no_pclmul (void)              __attribute__((__target__("no-pclmul")));
>  extern void test_no_popcnt (void)              __attribute__((__target__("no-popcnt")));
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c
> b/gcc/testsuite/gcc.target/i386/funcspec-6.c
> index 575be9b..81c831c 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-6.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c
> @@ -5,6 +5,7 @@
>
>  extern void test_abm (void)                    __attribute__((__target__("abm")));
I neglected to mention: this patch passes "make check -k
RUNTESTFLAGS=i386.exp",
>  extern void test_aes (void)                    __attribute__((__target__("aes")));
> +extern void test_bmi (void)                    __attribute__((__target__("bmi")));
>  extern void test_mmx (void)                    __attribute__((__target__("mmx")));
>  extern void test_pclmul (void)                 __attribute__((__target__("pclmul")));
>  extern void test_popcnt (void)                 __attribute__((__target__("popcnt")));
> @@ -21,6 +22,7 @@ extern void test_ssse3
> (void)                  __attribute__((__target__("ssse3")));
>
>  extern void test_no_abm (void)                 __attribute__((__target__("no-abm")));
>  extern void test_no_aes (void)                 __attribute__((__target__("no-aes")));
> +extern void test_no_bmi (void)                 __attribute__((__target__("no-bmi")));
>  extern void test_no_mmx (void)                 __attribute__((__target__("no-mmx")));
>  extern void test_no_pclmul (void)              __attribute__((__target__("no-pclmul")));
>  extern void test_no_popcnt (void)              __attribute__((__target__("no-popcnt")));
> diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c
> b/gcc/testsuite/gcc.target/i386/sse-12.c
> index 2d50f41..d59777b 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-12.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-12.c
> @@ -1,8 +1,9 @@
>  /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h,
> -   abmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h are usable
> +   fma4intrin.h, abmintrin.h, bmiintrin.h, lwpintrin.h,
> +   popcntintrin.h and mm_malloc.h are usable
>    with -O -std=c89 -pedantic-errors.  */
>  /* { dg-do compile } */
> -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx
> -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd
> -mf16c" } */
> +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx
> -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase
> -mrdrnd -mf16c" } */
>
>  #include <x86intrin.h>
>

I neglected to mention: this patch passes "make check -k
RUNTESTFLAGS=i386.exp" in x86-64, working on full bootstrap test.

Ok to commit?
-- 
Quentin



More information about the Gcc-patches mailing list