[patch 1/3] AMD bdver2 processors - BMI
Quentin Neill
quentin.neill.gnu@gmail.com
Mon Oct 18 19:14:00 GMT 2010
On Fri, Oct 15, 2010 at 1:45 PM, Quentin Neill
<quentin.neill.gnu@gmail.com> wrote:
> These patches add support for upcoming bdver2 AMD processors:
> BMI (Bit Manipulation Instructions)
> TBM (Trailing Bit Manipulation)
> FMA3 (three operand FMA) instructions
>
> The public specifications for BMI and TBM are in progress (they are
> today available under NDA). They will appear in one of the AMD64
> Architecture Programmer's Manual Volumes 3-6. I can post the
> mnemonics definitions if needed. The FMA3 specification is documented
> in http://software.intel.com/en-us/avx/
>
>
> 2010-10-15 Quentin Neill <quentin.neill.gnu@amd.com>
>
> gcc/
> * config.gcc (i[34567]86-*-*): Include bmiintrin.h.
> (x86_64-*-*): Likewise.
>
> * config/i386/cpuid.h: Define BMI bit.
>
> * config/i386/driver-i386.c (host_detect_local_cpu): Define
> and set has_bmi.
>
> * config/i386/i386-builtin-types.def (UINT_FTYPE_UINT_UINT): New.
> (UINT64_FTYPE_UINT64_UINT64): New.
>
> * config/i386/i386-c.c (ix86_target_macros_internal): Check
> isa_flag for BMI.
>
> * config/i386/i386.c (OPTION_MASK_ISA_BMI_SET): New.
> (OPTION_MASK_ISA_BMI_UNSET): New.
> (ix86_handle_option): Handle -mbmi.
> (isa_opts): Add -mbmi.
> (enum pta_flags): Add PTA_BMI.
> (ix86_option_override_internal): Add BMI support.
> (ix86_valid_target_attribute_inner_p): Handle -mbmi.
> (IX86_BUILTIN_ANDN32): New for BMI intrinsic.
> (IX86_BUILTIN_ANDN64): Likewise.
> (IX86_BUILTIN_BEXTR32): Likewise.
> (IX86_BUILTIN_BEXTR64): Likewise.
> (IX86_BUILTIN_BLSI32): Likewise.
> (IX86_BUILTIN_BLSI64): Likewise.
> (IX86_BUILTIN_BLSMSK32): Likewise.
> (IX86_BUILTIN_BLSMSK64): Likewise.
> (IX86_BUILTIN_BLSR32): Likewise.
> (IX86_BUILTIN_BLSR64): Likewise.
> (IX86_BUILTIN_TZCNT16): Likewise.
> (IX86_BUILTIN_TZCNT32): Likewise.
> (IX86_BUILTIN_TZCNT64): Likewise.
> (bdesc_args): Add BMI intrinsics.
> (ix86_expand_args_builtin): Add BMI specific cases.
>
> * config/i386/i386.h (TARGET_BMI): New for BMI.
>
> * config/i386/i386.md (UNSPEC_ANDN): New for BMI.
> (UNSPEC_BEXTR): Likewise.
> (UNSPEC_BLSI): Likewise.
> (UNSPEC_BLSMSK): Likewise.
> (UNSPEC_BLSR): Likewise.
> (UNSPEC_TZCNT): Likewise.
> (bmi_andn<mode>): Likewise.
> (bmi_bextr<mode>): Likewise.
> (bmi_blsi<mode>): Likewise.
> (bmi_blsmsk<mode>): Likewise.
> (bmi_blsr<mode>): Likewise.
> (bmi_tzcnt<mode>): Likewise.
> (bsr_rex64): Likewise.
>
> * config/i386/i386.opt: Add -mbmi.
>
> * config/i386/x86intrin.h: Add BMI check and bmiintrin.h.
>
> * config/i386/bmiintrin.h (__tzcnt_u16): New.
> (__lzcnt_u16): Likewise.
> (__bextr_u32): Likewise.
> (__andn_u32): Likewise.
> (__tzcnt_u32): Likewise.
> (__lzcnt_u32): Likewise.
> (__blsr_u32): Likewise.
> (__blsmsk_u32): Likewise.
> (__blsi_u32): Likewise.
> (__bextr_u64): Likewise.
> (__andn_u64): Likewise.
> (__tzcnt_u64): Likewise.
> (__lzcnt_u64): Likewise.
> (__blsr_u64): Likewise.
> (__blsmsk_u64): Likewise.
> (__blsi_u64): Likewise.
>
>
> * doc/invoke.texi: Document -mbmi.
>
> * doc/extend.texi: Document BMI built-in functions.
>
> gcc/testsuite/
> * g++.dg/other/i386-2.C: Add -mbmi.
>
> * g++.dg/other/i386-3.C: Likewise.
>
> * gcc.target/i386/funcspec-5.c: Add bmi and no-bmi targets.
>
> * gcc.target/i386/funcspec-6.c: Likewise.
>
> * gcc.target/i386/sse-12.c: Add -mbmi.
>
> * gcc.target/i386/bmi-1.c: New file.
>
> * gcc.target/i386/bmi-2.c: Likewise.
>
> * gcc.target/i386/bmi-3.c: Likewise.
>
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index b353fa0..4034241 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -318,7 +318,7 @@ i[34567]86-*-*)
> nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
> immintrin.h x86intrin.h avxintrin.h xopintrin.h
> ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
> - abmintrin.h"
> + abmintrin.h bmiintrin.h"
> ;;
> x86_64-*-*)
> cpu_type=i386
> @@ -329,7 +329,7 @@ x86_64-*-*)
> nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
> immintrin.h x86intrin.h avxintrin.h xopintrin.h
> ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
> - abmintrin.h"
> + abmintrin.h bmiintrin.h"
> need_64bit_hwint=yes
> ;;
> ia64-*-*)
> diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
> new file mode 100644
> index 0000000..f4422cd
> --- /dev/null
> +++ b/gcc/config/i386/bmiintrin.h
> @@ -0,0 +1,134 @@
> +/* Copyright (C) 2010 Free Software Foundation, Inc.
> +
> + This file is part of GCC.
> +
> + GCC is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3, or (at your option)
> + any later version.
> +
> + GCC is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + Under Section 7 of GPL version 3, you are granted additional
> + permissions described in the GCC Runtime Library Exception, version
> + 3.1, as published by the Free Software Foundation.
> +
> + You should have received a copy of the GNU General Public License and
> + a copy of the GCC Runtime Library Exception along with this program;
> + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#ifndef _X86INTRIN_H_INCLUDED
> +# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
> +#endif
> +
> +#ifndef __BMI__
> +# error "BMI instruction set not enabled"
> +#endif /* __BMI__ */
> +
> +#ifndef _BMIINTRIN_H_INCLUDED
> +#define _BMIINTRIN_H_INCLUDED
> +
> +extern __inline unsigned short __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzcnt_u16 (unsigned short __X)
> +{
> + return __builtin_ia32_tzcnt_u16 (__X);
> +}
> +extern __inline unsigned short __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__lzcnt_u16 (unsigned short __X)
> +{
> + return __builtin_ia32_lzcnt_u16 (__X);
> +}
> +
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__bextr_u32 (unsigned int __X, unsigned int __Y)
> +{
> + return __builtin_ia32_bextr_u32 (__X, __Y);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__andn_u32 (unsigned int __X, unsigned int __Y)
> +{
> + return __builtin_ia32_andn_u32 (__X, __Y);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzcnt_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_tzcnt_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__lzcnt_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_lzcnt_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsr_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blsr_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsmsk_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blsmsk_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsi_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blsi_u32 (__X);
> +}
> +
> +
> +#ifdef __x86_64__
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__bextr_u64 (unsigned long long __X, unsigned long long __Y)
> +{
> + return __builtin_ia32_bextr_u64 (__X, __Y);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__andn_u64 (unsigned long long __X, unsigned long long __Y)
> +{
> + return __builtin_ia32_andn_u64 (__X, __Y);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzcnt_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_tzcnt_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__lzcnt_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_lzcnt_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsr_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blsr_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsmsk_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blsmsk_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsi_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blsi_u64 (__X);
> +}
> +#endif /* __x86_64__ */
> +
> +#endif /* _BMIINTRIN_H_INCLUDED */
> diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
> index 11c2f1e..0f1af7f 100644
> --- a/gcc/config/i386/cpuid.h
> +++ b/gcc/config/i386/cpuid.h
> @@ -62,6 +62,7 @@
>
> /* Extended Features (%eax == 7) */
> #define bit_FSGSBASE (1 << 0)
> +#define bit_BMI (1 << 3)
>
> #if defined(__i386__) && defined(__PIC__)
> /* %ebx may be the PIC register. */
> diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
> index 8a76857..a7d6808 100644
> --- a/gcc/config/i386/driver-i386.c
> +++ b/gcc/config/i386/driver-i386.c
> @@ -397,6 +397,7 @@ const char *host_detect_local_cpu (int argc, const
> char **argv)
> unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
> unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
> unsigned int has_fma4 = 0, has_xop = 0;
> + unsigned int has_bmi = 0;
>
> bool arch;
>
> @@ -467,6 +468,10 @@ const char *host_detect_local_cpu (int argc,
> const char **argv)
> has_longmode = edx & bit_LM;
> has_3dnowp = edx & bit_3DNOWP;
> has_3dnow = edx & bit_3DNOW;
> +
> + __cpuid (0x7, eax, ebx, ecx, edx);
> +
> + has_bmi = ebx & bit_BMI;
> }
>
> if (!arch)
> diff --git a/gcc/config/i386/i386-builtin-types.def
> b/gcc/config/i386/i386-builtin-types.def
> index 09dd9eb..110b81d 100644
> --- a/gcc/config/i386/i386-builtin-types.def
> +++ b/gcc/config/i386/i386-builtin-types.def
> @@ -145,7 +145,9 @@ DEF_FUNCTION_TYPE (INT64, INT64)
> DEF_FUNCTION_TYPE (INT64, V2DF)
> DEF_FUNCTION_TYPE (INT64, V4SF)
> DEF_FUNCTION_TYPE (UINT64, INT)
> +DEF_FUNCTION_TYPE (UINT, UINT)
> DEF_FUNCTION_TYPE (UINT16, UINT16)
> +DEF_FUNCTION_TYPE (UINT64, UINT64)
> DEF_FUNCTION_TYPE (UINT64, PUNSIGNED)
> DEF_FUNCTION_TYPE (V16QI, PCCHAR)
> DEF_FUNCTION_TYPE (V16QI, V16QI)
> diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> index 1846efb..e84347c 100644
> --- a/gcc/config/i386/i386-c.c
> +++ b/gcc/config/i386/i386-c.c
> @@ -244,6 +244,8 @@ ix86_target_macros_internal (int isa_flag,
> def_or_undef (parse_in, "__LWP__");
> if (isa_flag & OPTION_MASK_ISA_ABM)
> def_or_undef (parse_in, "__ABM__");
> + if (isa_flag & OPTION_MASK_ISA_BMI)
> + def_or_undef (parse_in, "__BMI__");
> if (isa_flag & OPTION_MASK_ISA_POPCNT)
> def_or_undef (parse_in, "__POPCNT__");
> if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 91e3839..e003ee7 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2079,6 +2079,8 @@ static int ix86_isa_flags_explicit;
> #define OPTION_MASK_ISA_ABM_SET \
> (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
>
> +#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
> +
> #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
> #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
> #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
> @@ -2133,6 +2135,7 @@ static int ix86_isa_flags_explicit;
> #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
> #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
> #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
> +#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
> #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
> #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
> #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
> @@ -2430,6 +2433,19 @@ ix86_handle_option (size_t code, const char
> *arg ATTRIBUTE_UNUSED, int value)
> }
> return true;
>
> + case OPT_mbmi:
> + if (value)
> + {
> + ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
> + ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
> + }
> + else
> + {
> + ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
> + ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
> + }
> + return true;
> +
> case OPT_mpopcnt:
> if (value)
> {
> @@ -2598,6 +2614,7 @@ ix86_target_string (int isa, int flags, const
> char *arch, const char *tune,
> { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
> { "-mmmx", OPTION_MASK_ISA_MMX },
> { "-mabm", OPTION_MASK_ISA_ABM },
> + { "-mbmi", OPTION_MASK_ISA_BMI },
> { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
> { "-mmovbe", OPTION_MASK_ISA_MOVBE },
> { "-mcrc32", OPTION_MASK_ISA_CRC32 },
> @@ -2852,7 +2869,9 @@ ix86_option_override_internal (bool main_args_p)
> PTA_LWP = 1 << 23,
> PTA_FSGSBASE = 1 << 24,
> PTA_RDRND = 1 << 25,
> - PTA_F16C = 1 << 26
> + PTA_F16C = 1 << 26,
> + PTA_BMI = 1 << 27,
> + /* if this reaches 32, need to widen struct pta flags below */
> };
>
> static struct pta
> @@ -3184,6 +3203,9 @@ ix86_option_override_internal (bool main_args_p)
> if (processor_alias_table[i].flags & PTA_ABM
> && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
> ix86_isa_flags |= OPTION_MASK_ISA_ABM;
> + if (processor_alias_table[i].flags & PTA_BMI
> + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
> + ix86_isa_flags |= OPTION_MASK_ISA_BMI;
> if (processor_alias_table[i].flags & PTA_CX16
> && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
> ix86_isa_flags |= OPTION_MASK_ISA_CX16;
> @@ -3928,6 +3950,7 @@ ix86_valid_target_attribute_inner_p (tree args,
> char *p_strings[])
> /* isa options */
> IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
> IX86_ATTR_ISA ("abm", OPT_mabm),
> + IX86_ATTR_ISA ("bmi", OPT_mbmi),
> IX86_ATTR_ISA ("aes", OPT_maes),
> IX86_ATTR_ISA ("avx", OPT_mavx),
> IX86_ATTR_ISA ("mmx", OPT_mmmx),
> @@ -22954,6 +22977,21 @@ enum ix86_builtins
>
> IX86_BUILTIN_CLZS,
>
> + /* BMI instructions. */
> + IX86_BUILTIN_ANDN32,
> + IX86_BUILTIN_ANDN64,
> + IX86_BUILTIN_BEXTR32,
> + IX86_BUILTIN_BEXTR64,
> + IX86_BUILTIN_BLSI32,
> + IX86_BUILTIN_BLSI64,
> + IX86_BUILTIN_BLSMSK32,
> + IX86_BUILTIN_BLSMSK64,
> + IX86_BUILTIN_BLSR32,
> + IX86_BUILTIN_BLSR64,
> + IX86_BUILTIN_TZCNT16,
> + IX86_BUILTIN_TZCNT32,
> + IX86_BUILTIN_TZCNT64,
> +
> /* FSGSBASE instructions. */
> IX86_BUILTIN_RDFSBASE32,
> IX86_BUILTIN_RDFSBASE64,
> @@ -23893,6 +23931,21 @@ static const struct builtin_description bdesc_args[] =
>
> { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs",
> IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
>
> + /* BMI */
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_andnsi,
> "__builtin_ia32_andn_u32", IX86_BUILTIN_ANDN32, UNKNOWN, (int)
> UINT_FTYPE_UINT_UINT },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_andndi,
> "__builtin_ia32_andn_u64", IX86_BUILTIN_ANDN64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64_UINT64 },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextrsi,
> "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int)
> UINT_FTYPE_UINT_UINT },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextrdi,
> "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64_UINT64 },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnthi,
> "__builtin_ia32_tzcnt_u16", IX86_BUILTIN_TZCNT16, UNKNOWN, (int)
> UINT16_FTYPE_UINT16 },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcntsi,
> "__builtin_ia32_tzcnt_u32", IX86_BUILTIN_TZCNT32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcntdi,
> "__builtin_ia32_tzcnt_u64", IX86_BUILTIN_TZCNT64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsisi,
> "__builtin_ia32_blsi_u32", IX86_BUILTIN_BLSI32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsidi,
> "__builtin_ia32_blsi_u64", IX86_BUILTIN_BLSI64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsmsksi,
> "__builtin_ia32_blsmsk_u32", IX86_BUILTIN_BLSMSK32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsmskdi,
> "__builtin_ia32_blsmsk_u64", IX86_BUILTIN_BLSMSK64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrsi,
> "__builtin_ia32_blsr_u32", IX86_BUILTIN_BLSR32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrdi,
> "__builtin_ia32_blsr_u64", IX86_BUILTIN_BLSR64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +
> /* F16C */
> { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps,
> "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)
> V4SF_FTYPE_V8HI },
> { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256,
> "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN,
> (int) V8SF_FTYPE_V8HI },
> @@ -25118,9 +25171,11 @@ ix86_expand_args_builtin (const struct
> builtin_description *d,
> case FLOAT128_FTYPE_FLOAT128:
> case FLOAT_FTYPE_FLOAT:
> case INT_FTYPE_INT:
> + case UINT_FTYPE_UINT:
> case UINT64_FTYPE_INT:
> case UINT16_FTYPE_UINT16:
> case INT64_FTYPE_INT64:
> + case UINT64_FTYPE_UINT64:
> case INT64_FTYPE_V4SF:
> case INT64_FTYPE_V2DF:
> case INT_FTYPE_V16QI:
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 719761d..4fba57d 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -59,6 +59,7 @@ see the files COPYING3 and COPYING.RUNTIME
> respectively. If not, see
> #define TARGET_LWP OPTION_ISA_LWP
> #define TARGET_ROUND OPTION_ISA_ROUND
> #define TARGET_ABM OPTION_ISA_ABM
> +#define TARGET_BMI OPTION_ISA_BMI
> #define TARGET_POPCNT OPTION_ISA_POPCNT
> #define TARGET_SAHF OPTION_ISA_SAHF
> #define TARGET_MOVBE OPTION_ISA_MOVBE
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index d2ad8b1..967886d 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -229,6 +229,14 @@
> UNSPEC_VTESTP
> UNSPEC_VCVTPH2PS
> UNSPEC_VCVTPS2PH
> +
> + ;; For BMI support
> + UNSPEC_ANDN
> + UNSPEC_BEXTR
> + UNSPEC_BLSI
> + UNSPEC_BLSMSK
> + UNSPEC_BLSR
> + UNSPEC_TZCNT
> ])
>
> (define_c_enum "unspecv" [
> @@ -11851,6 +11859,63 @@
> (set_attr "type" "bitmanip")
> (set_attr "mode" "<MODE>")])
>
> +;; BMI instructions.
> +(define_insn "bmi_andn<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
> + (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
> + UNSPEC_ANDN))]
> + "TARGET_BMI"
> + "andn\t{%2, %1, %0|%0, %1, %2}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_bextr<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
> + (match_operand:SWI48 2 "register_operand" "r")]
> + UNSPEC_BEXTR))]
> + "TARGET_BMI"
> + "bextr\t{%2, %1, %0|%0, %1, %2}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_blsi<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLSI))]
> + "TARGET_BMI"
> + "blsi\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_blsmsk<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLSMSK))]
> + "TARGET_BMI"
> + "blsmsk\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_blsr<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLSR))]
> + "TARGET_BMI"
> + "blsr\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "bmi_tzcnt<mode>"
> + [(set (match_operand:SWI248 0 "register_operand" "=r")
> + (unspec:SWI248 [(match_operand:SWI248 1 "nonimmediate_operand" "rm")]
> + UNSPEC_TZCNT))]
> + "TARGET_BMI"
> + "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> (define_insn "bsr_rex64"
> [(set (match_operand:DI 0 "register_operand" "=r")
> (minus:DI (const_int 63)
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 9c1fe1f..d808804 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -353,6 +353,10 @@ mpopcnt
> Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) Save
> Support code generation of popcnt instruction.
>
> +mbmi
> +Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
> +Support BMI built-in functions and code generation
> +
> mcx16
> Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save
> Support code generation of cmpxchg16b instruction.
> diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
> index 29d44dc..9a7366b 100644
> --- a/gcc/config/i386/x86intrin.h
> +++ b/gcc/config/i386/x86intrin.h
> @@ -81,6 +81,10 @@
> #include <abmintrin.h>
> #endif
>
> +#ifdef __BMI__
> +#include <bmiintrin.h>
> +#endif
> +
> #ifdef __POPCNT__
> #include <popcntintrin.h>
> #endif
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index fc40d53..a7be54b 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -9351,6 +9351,25 @@ unsigned char __builtin_ia32_lwpins32 (unsigned
> int, unsigned int, unsigned int)
> unsigned char __builtin_ia32_lwpins64 (unsigned __int64, unsigned
> int, unsigned int)
> @end smallexample
>
> +The following built-in function is available when @option{-mbmi} is used.
> +All of them generate the machine instruction that is part of the name.
> +@smallexample
> +unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int);
> +unsigned long long __builtin_ia32_bextr_u64 (unsigned long long,
> unsigned long long);
> +unsigned int __builtin_ia32_andn_u32(unsigned int, unsigned int);
> +unsigned long long __builtin_ia32_andn_u64 (unsigned long long,
> unsigned long long);
> +unsigned int __builtin_ia32_tzcnt_u32(unsigned int);
> +unsigned long long __builtin_ia32_tzcnt_u64 (unsigned long long);
> +unsigned int __builtin_ia32_lzcnt_u32(unsigned int);
> +unsigned long long __builtin_ia32_lzcnt_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsr_u32(unsigned int);
> +unsigned long long __builtin_ia32_blsr_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsmsk_u32(unsigned int);
> +unsigned long long __builtin_ia32_blsmsk_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsi_u32(unsigned int);
> +unsigned long long __builtin_ia32_blsi_u64 (unsigned long long);
> +@end smallexample
> +
> The following built-in functions are available when @option{-m3dnow} is used.
> All of them generate the machine instruction that is part of the name.
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 776fdd0..071ad27 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -597,7 +597,7 @@ Objective-C and Objective-C++ Dialects}.
> -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol
> -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
> -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfused-madd @gol
> --msse4a -m3dnow -mpopcnt -mabm -mfma4 -mxop -mlwp @gol
> +-msse4a -m3dnow -mpopcnt -mabm -mbmi -mfma4 -mxop -mlwp @gol
> -mthreads -mno-align-stringops -minline-all-stringops @gol
> -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
> -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
> @@ -12418,6 +12418,8 @@ preferred alignment to
> @option{-mpreferred-stack-boundary=2}.
> @itemx -mno-popcnt
> @itemx -mabm
> @itemx -mno-abm
> +@itemx -mbmi
> +@itemx -mno-bmi
> @opindex mmmx
> @opindex mno-mmx
> @opindex msse
> @@ -12426,7 +12428,7 @@ preferred alignment to
> @option{-mpreferred-stack-boundary=2}.
> @opindex mno-3dnow
> These switches enable or disable the use of instructions in the MMX,
> SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, FSGSBASE, RDRND,
> -F16C, SSE4A, FMA4, XOP, LWP, ABM or 3DNow!@: extended instruction sets.
> +F16C, SSE4A, FMA4, XOP, LWP, ABM, BMI, or 3DNow!@: extended instruction sets.
> These extensions are also available as built-in functions: see
> @ref{X86 Built-in Functions}, for details of the functions enabled and
> disabled by these switches.
> diff --git a/gcc/testsuite/g++.dg/other/i386-2.C
> b/gcc/testsuite/g++.dg/other/i386-2.C
> index 7297068..f0a382a 100644
> --- a/gcc/testsuite/g++.dg/other/i386-2.C
> +++ b/gcc/testsuite/g++.dg/other/i386-2.C
> @@ -1,8 +1,8 @@
> /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop
> -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
> +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop
> -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" }
> */
>
> /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
> - lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> + bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> -O -pedantic-errors. */
>
> #include <x86intrin.h>
> diff --git a/gcc/testsuite/g++.dg/other/i386-3.C
> b/gcc/testsuite/g++.dg/other/i386-3.C
> index 75515ef..4b27372 100644
> --- a/gcc/testsuite/g++.dg/other/i386-3.C
> +++ b/gcc/testsuite/g++.dg/other/i386-3.C
> @@ -1,8 +1,8 @@
> /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx
> -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" }
> */
> +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx
> -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd
> -mf16c" } */
>
> /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
> - lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> + bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> -O -fkeep-inline-functions. */
>
> #include <x86intrin.h>
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-1.c
> b/gcc/testsuite/gcc.target/i386/bmi-1.c
> new file mode 100644
> index 0000000..dc964ba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/bmi-1.c
> @@ -0,0 +1,46 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbmi " } */
> +/* { dg-final { scan-assembler "andn\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "tzcntl\[^\\n]*(%|)eax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned int
> +func_andn32 (unsigned int X, unsigned int Y)
> +{
> + return __andn_u32(X, Y);
> +}
> +
> +unsigned int
> +func_bextr32 (unsigned int X, unsigned int Y)
> +{
> + return __bextr_u32(X, Y);
> +}
> +
> +unsigned int
> +func_blsi32 (unsigned int X)
> +{
> + return __blsi_u32(X);
> +}
> +
> +unsigned int
> +func_blsmsk32 (unsigned int X)
> +{
> + return __blsmsk_u32(X);
> +}
> +
> +unsigned int
> +func_blsr32 (unsigned int X)
> +{
> + return __blsr_u32(X);
> +}
> +
> +unsigned int
> +func_tzcnt32 (unsigned int X)
> +{
> + return __tzcnt_u32(X);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-2.c
> b/gcc/testsuite/gcc.target/i386/bmi-2.c
> new file mode 100644
> index 0000000..6250949
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/bmi-2.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mbmi " } */
> +/* { dg-final { scan-assembler "andn\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "tzcntq\[^\\n]*(%|)rax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned long long
> +func_andn64 (unsigned long long X, unsigned long long Y)
> +{
> + return __andn_u64 (X, Y);
> +}
> +
> +unsigned long long
> +func_bextr64 (unsigned long long X, unsigned long long Y)
> +{
> + return __bextr_u64 (X, Y);
> +}
> +
> +unsigned long long
> +func_blsi64 (unsigned long long X)
> +{
> + return __blsi_u64 (X);
> +}
> +
> +unsigned long long
> +func_blsmsk64 (unsigned long long X)
> +{
> + return __blsmsk_u64 (X);
> +}
> +
> +unsigned long long
> +func_blsr64 (unsigned long long X)
> +{
> + return __blsr_u64 (X);
> +}
> +
> +unsigned long long
> +func_tzcnt64 (unsigned long long X)
> +{
> + return __tzcnt_u64 (X);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-3.c
> b/gcc/testsuite/gcc.target/i386/bmi-3.c
> new file mode 100644
> index 0000000..ddc5e0f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/bmi-3.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbmi " } */
> +/* { dg-final { scan-assembler "tzcntw\[^\\n]*(%|)ax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned short
> +func_tzcnt16 (unsigned short X)
> +{
> + return __tzcnt_u16(X);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> index 34da51c..5e07d85 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> @@ -5,6 +5,7 @@
>
> extern void test_abm (void) __attribute__((__target__("abm")));
> extern void test_aes (void) __attribute__((__target__("aes")));
> +extern void test_bmi (void) __attribute__((__target__("bmi")));
> extern void test_mmx (void) __attribute__((__target__("mmx")));
> extern void test_pclmul (void) __attribute__((__target__("pclmul")));
> extern void test_popcnt (void) __attribute__((__target__("popcnt")));
> @@ -21,6 +22,7 @@ extern void test_ssse3
> (void) __attribute__((__target__("ssse3")));
>
> extern void test_no_abm (void) __attribute__((__target__("no-abm")));
> extern void test_no_aes (void) __attribute__((__target__("no-aes")));
> +extern void test_no_bmi (void) __attribute__((__target__("no-bmi")));
> extern void test_no_mmx (void) __attribute__((__target__("no-mmx")));
> extern void test_no_pclmul (void) __attribute__((__target__("no-pclmul")));
> extern void test_no_popcnt (void) __attribute__((__target__("no-popcnt")));
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c
> b/gcc/testsuite/gcc.target/i386/funcspec-6.c
> index 575be9b..81c831c 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-6.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c
> @@ -5,6 +5,7 @@
>
> extern void test_abm (void) __attribute__((__target__("abm")));
I neglected to mention: this patch passes "make check -k
RUNTESTFLAGS=i386.exp",
> extern void test_aes (void) __attribute__((__target__("aes")));
> +extern void test_bmi (void) __attribute__((__target__("bmi")));
> extern void test_mmx (void) __attribute__((__target__("mmx")));
> extern void test_pclmul (void) __attribute__((__target__("pclmul")));
> extern void test_popcnt (void) __attribute__((__target__("popcnt")));
> @@ -21,6 +22,7 @@ extern void test_ssse3
> (void) __attribute__((__target__("ssse3")));
>
> extern void test_no_abm (void) __attribute__((__target__("no-abm")));
> extern void test_no_aes (void) __attribute__((__target__("no-aes")));
> +extern void test_no_bmi (void) __attribute__((__target__("no-bmi")));
> extern void test_no_mmx (void) __attribute__((__target__("no-mmx")));
> extern void test_no_pclmul (void) __attribute__((__target__("no-pclmul")));
> extern void test_no_popcnt (void) __attribute__((__target__("no-popcnt")));
> diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c
> b/gcc/testsuite/gcc.target/i386/sse-12.c
> index 2d50f41..d59777b 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-12.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-12.c
> @@ -1,8 +1,9 @@
> /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h,
> - abmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h are usable
> + fma4intrin.h, abmintrin.h, bmiintrin.h, lwpintrin.h,
> + popcntintrin.h and mm_malloc.h are usable
> with -O -std=c89 -pedantic-errors. */
> /* { dg-do compile } */
> -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx
> -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd
> -mf16c" } */
> +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx
> -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase
> -mrdrnd -mf16c" } */
>
> #include <x86intrin.h>
>
I neglected to mention: this patch passes "make check -k
RUNTESTFLAGS=i386.exp" in x86-64, working on full bootstrap test.
Ok to commit?
--
Quentin
More information about the Gcc-patches
mailing list