[patch 2/3] AMD bdver2 processors - TBM
Quentin Neill
quentin.neill.gnu@gmail.com
Mon Oct 18 18:59:00 GMT 2010
On Fri, Oct 15, 2010 at 1:45 PM, Quentin Neill
<quentin.neill.gnu@gmail.com> wrote:
> These patches add support for upcoming bdver2 AMD processors:
> BMI (Bit Manipulation Instructions)
> TBM (Trailing Bit Manipulation)
> FMA3 (three operand FMA) instructions
>
> The public specifications for BMI and TBM are in progress (they are
> today available under NDA). They will appear in one of the AMD64
> Architecture Programmer's Manual Volumes 3-6. I can post the
> mnemonics definitions if needed. The FMA3 specification is documented
> in http://software.intel.com/en-us/avx/
>
>
> 2010-10-15 Quentin Neill <quentin.neill.gnu@amd.com>
>
> gcc/
> * config.gcc (i[34567]86-*-*): Include tbmintrin.h.
> (x86_64-*-*): Likewise.
>
> * config/i386/cpuid.h: Define TBM bit.
>
> * config/i386/driver-i386.c (host_detect_local_cpu): Define
> and set has_tbm.
>
> * config/i386/i386-c.c (ix86_target_macros_internal): Check
> isa_flag for TBM.
>
> * config/i386/i386.c (OPTION_MASK_ISA_TBM_SET): New.
> (OPTION_MASK_ISA_TBM_UNSET): New.
> (ix86_handle_option): Handle -mbmi.
> (isa_opts): Add -mtbm.
> (enum pta_flags): Add PTA_TBM.
> (ix86_option_override_internal): Add TBM support.
> (ix86_valid_target_attribute_inner_p): Handle -mtbm.
> (IX86_BUILTIN_BEXTRI32): New for TBM intrinsic.
> (IX86_BUILTIN_BEXTRI64): Likewise.
> (IX86_BUILTIN_BLCFILL32): Likewise.
> (IX86_BUILTIN_BLCFILL64): Likewise.
> (IX86_BUILTIN_BLCI32): Likewise.
> (IX86_BUILTIN_BLCI64): Likewise.
> (IX86_BUILTIN_BLCIC32): Likewise.
> (IX86_BUILTIN_BLCIC64): Likewise.
> (IX86_BUILTIN_BLCMSK32): Likewise.
> (IX86_BUILTIN_BLCMSK64): Likewise.
> (IX86_BUILTIN_BLCS32): Likewise.
> (IX86_BUILTIN_BLCS64): Likewise.
> (IX86_BUILTIN_BLSFILL32): Likewise.
> (IX86_BUILTIN_BLSFILL64): Likewise.
> (IX86_BUILTIN_BLSIC32): Likewise.
> (IX86_BUILTIN_BLSIC64): Likewise.
> (IX86_BUILTIN_T1MSKC32): Likewise.
> (IX86_BUILTIN_T1MSKC64): Likewise.
> (IX86_BUILTIN_TZMSK32): Likewise.
> (IX86_BUILTIN_TZMSK64): Likewise.
> (bdesc_args): Add TBM intrinsics.
> (ix86_expand_builtin): Add TBM specific cases for
> BEXTR immediate operands.
>
> * config/i386/i386.h (TARGET_TBM): New for TBM.
>
> * config/i386/i386.md (UNSPEC_BEXTRI): New for TBM.
> (UNSPEC_BLCFILL): Likewise.
> (UNSPEC_BLCI): Likewise.
> (UNSPEC_BLCIC): Likewise.
> (UNSPEC_BLCMSK): Likewise.
> (UNSPEC_BLCS): Likewise.
> (UNSPEC_BLSFILL): Likewise.
> (UNSPEC_BLSIC): Likewise.
> (UNSPEC_T1MSKC): Likewise.
> (UNSPEC_TZMSK): Likewise.
> (tbm_bextri<mode>): Likewise.
> (tbm_blcfill<mode>): Likewise.
> (tbm_blci<mode>): Likewise.
> (tbm_blcic<mode>): Likewise.
> (tbm_blcmsk<mode>): Likewise.
> (tbm_blcs<mode>): Likewise.
> (tbm_blsfill<mode>): Likewise.
> (tbm_blsic<mode>): Likewise.
> (tbm_t1mskc<mode>): Likewise.
> (tbm_tzmsk<mode>): Likewise.
> (bsr_rex64): Likewise.
>
> * config/i386/i386.opt: Add -mtbm.
>
> * config/i386/tbmintrin.h (__bextri_u32): New.
> (__blcfill_u32): Likewise.
> (__blsfill_u32): Likewise.
> (__blcs_u32): Likewise.
> (__tzmsk_u32): Likewise.
> (__blcic_u32): Likewise.
> (__blsic_u32): Likewise.
> (__t1mskc_u32): Likewise.
> (__blcmsk_u32): Likewise.
> (__blci_u32): Likewise.
> (__bextri_u64): Likewise.
> (__blcfill_u64): Likewise.
> (__blsfill_u64): Likewise.
> (__blcs_u64): Likewise.
> (__tzmsk_u64): Likewise.
> (__blcic_u64): Likewise.
> (__blsic_u64): Likewise.
> (__t1mskc_u64): Likewise.
> (__blcmsk_u64): Likewise.
> (__blci_u64): Likewise.
>
> * config/i386/x86intrin.h: Add TBM check and tbmintrin.h.
>
> * doc/invoke.texi: Document -mtbm.
>
> * doc/extend.texi: Document TBM built-in functions.
>
> gcc/testsuite/
>
> * g++.dg/other/i386-2.C: Add -mtbm.
>
> * g++.dg/other/i386-3.C: Likewise.
>
> * gcc.target/i386/funcspec-5.c: Add tbm and no-tbm targets.
>
> * gcc.target/i386/funcspec-6.c: Likewise.
>
> * gcc.target/i386/sse-12.c: Add -mtbm.
>
> * gcc.target/i386/sse-13.c: Add -mtbm and test immediate
> operand intrinsics.
>
> * gcc.target/i386/sse-14.c: Likewise.
>
> * gcc.target/i386/sse-22.c: Likewise.
>
> * gcc.target/i386/sse-23.c: Likewise.
>
> * gcc.target/i386/tbm-1.c: New file.
>
> * gcc.target/i386/tbm-2.c: Likewise.
>
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 4034241..f923990 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -318,7 +318,7 @@ i[34567]86-*-*)
> nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
> immintrin.h x86intrin.h avxintrin.h xopintrin.h
> ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
> - abmintrin.h bmiintrin.h"
> + abmintrin.h bmiintrin.h tbmintrin.h"
> ;;
> x86_64-*-*)
> cpu_type=i386
> @@ -329,7 +329,7 @@ x86_64-*-*)
> nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
> immintrin.h x86intrin.h avxintrin.h xopintrin.h
> ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
> - abmintrin.h bmiintrin.h"
> + abmintrin.h bmiintrin.h tbmintrin.h"
> need_64bit_hwint=yes
> ;;
> ia64-*-*)
> diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
> index 0f1af7f..e9d0fab 100644
> --- a/gcc/config/i386/cpuid.h
> +++ b/gcc/config/i386/cpuid.h
> @@ -54,6 +54,7 @@
> #define bit_XOP (1 << 11)
> #define bit_LWP (1 << 15)
> #define bit_FMA4 (1 << 16)
> +#define bit_TBM (1 << 21)
>
> /* %edx */
> #define bit_LM (1 << 29)
> diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
> index a7d6808..15d3284 100644
> --- a/gcc/config/i386/driver-i386.c
> +++ b/gcc/config/i386/driver-i386.c
> @@ -397,7 +397,7 @@ const char *host_detect_local_cpu (int argc, const
> char **argv)
> unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
> unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
> unsigned int has_fma4 = 0, has_xop = 0;
> - unsigned int has_bmi = 0;
> + unsigned int has_bmi = 0, has_tbm = 0;
>
> bool arch;
>
> @@ -464,6 +464,7 @@ const char *host_detect_local_cpu (int argc, const
> char **argv)
> has_lwp = ecx & bit_LWP;
> has_fma4 = ecx & bit_FMA4;
> has_xop = ecx & bit_XOP;
> + has_tbm = ecx & bit_TBM;
>
> has_longmode = edx & bit_LM;
> has_3dnowp = edx & bit_3DNOWP;
> diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> index e84347c..666e77e 100644
> --- a/gcc/config/i386/i386-c.c
> +++ b/gcc/config/i386/i386-c.c
> @@ -246,6 +246,8 @@ ix86_target_macros_internal (int isa_flag,
> def_or_undef (parse_in, "__ABM__");
> if (isa_flag & OPTION_MASK_ISA_BMI)
> def_or_undef (parse_in, "__BMI__");
> + if (isa_flag & OPTION_MASK_ISA_TBM)
> + def_or_undef (parse_in, "__TBM__");
> if (isa_flag & OPTION_MASK_ISA_POPCNT)
> def_or_undef (parse_in, "__POPCNT__");
> if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index e003ee7..ac0772e 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2080,6 +2080,7 @@ static int ix86_isa_flags_explicit;
> (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
>
> #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
> +#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
>
> #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
> #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
> @@ -2136,6 +2137,7 @@ static int ix86_isa_flags_explicit;
> #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
> #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
> #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
> +#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
> #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
> #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
> #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
> @@ -2446,6 +2448,20 @@ ix86_handle_option (size_t code, const char
> *arg ATTRIBUTE_UNUSED, int value)
> }
> return true;
>
> + case OPT_mtbm:
> + if (value)
> + {
> + ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
> + ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
> + }
> + else
> + {
> + ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
> + ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
> + }
> + return true;
> +
> +
> case OPT_mpopcnt:
> if (value)
> {
> @@ -2615,6 +2631,7 @@ ix86_target_string (int isa, int flags, const
> char *arch, const char *tune,
> { "-mmmx", OPTION_MASK_ISA_MMX },
> { "-mabm", OPTION_MASK_ISA_ABM },
> { "-mbmi", OPTION_MASK_ISA_BMI },
> + { "-mtbm", OPTION_MASK_ISA_TBM },
> { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
> { "-mmovbe", OPTION_MASK_ISA_MOVBE },
> { "-mcrc32", OPTION_MASK_ISA_CRC32 },
> @@ -2871,6 +2888,7 @@ ix86_option_override_internal (bool main_args_p)
> PTA_RDRND = 1 << 25,
> PTA_F16C = 1 << 26,
> PTA_BMI = 1 << 27,
> + PTA_TBM = 1 << 28,
> /* if this reaches 32, need to widen struct pta flags below */
> };
>
> @@ -3206,6 +3224,9 @@ ix86_option_override_internal (bool main_args_p)
> if (processor_alias_table[i].flags & PTA_BMI
> && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
> ix86_isa_flags |= OPTION_MASK_ISA_BMI;
> + if (processor_alias_table[i].flags & PTA_TBM
> + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
> + ix86_isa_flags |= OPTION_MASK_ISA_TBM;
> if (processor_alias_table[i].flags & PTA_CX16
> && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
> ix86_isa_flags |= OPTION_MASK_ISA_CX16;
> @@ -3951,6 +3972,7 @@ ix86_valid_target_attribute_inner_p (tree args,
> char *p_strings[])
> IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
> IX86_ATTR_ISA ("abm", OPT_mabm),
> IX86_ATTR_ISA ("bmi", OPT_mbmi),
> + IX86_ATTR_ISA ("tbm", OPT_mtbm),
> IX86_ATTR_ISA ("aes", OPT_maes),
> IX86_ATTR_ISA ("avx", OPT_mavx),
> IX86_ATTR_ISA ("mmx", OPT_mmmx),
> @@ -22992,6 +23014,28 @@ enum ix86_builtins
> IX86_BUILTIN_TZCNT32,
> IX86_BUILTIN_TZCNT64,
>
> + /* TBM instructions. */
> + IX86_BUILTIN_BEXTRI32,
> + IX86_BUILTIN_BEXTRI64,
> + IX86_BUILTIN_BLCFILL32,
> + IX86_BUILTIN_BLCFILL64,
> + IX86_BUILTIN_BLCI32,
> + IX86_BUILTIN_BLCI64,
> + IX86_BUILTIN_BLCIC32,
> + IX86_BUILTIN_BLCIC64,
> + IX86_BUILTIN_BLCMSK32,
> + IX86_BUILTIN_BLCMSK64,
> + IX86_BUILTIN_BLCS32,
> + IX86_BUILTIN_BLCS64,
> + IX86_BUILTIN_BLSFILL32,
> + IX86_BUILTIN_BLSFILL64,
> + IX86_BUILTIN_BLSIC32,
> + IX86_BUILTIN_BLSIC64,
> + IX86_BUILTIN_T1MSKC32,
> + IX86_BUILTIN_T1MSKC64,
> + IX86_BUILTIN_TZMSK32,
> + IX86_BUILTIN_TZMSK64,
> +
> /* FSGSBASE instructions. */
> IX86_BUILTIN_RDFSBASE32,
> IX86_BUILTIN_RDFSBASE64,
> @@ -23946,6 +23990,28 @@ static const struct builtin_description bdesc_args[] =
> { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrsi,
> "__builtin_ia32_blsr_u32", IX86_BUILTIN_BLSR32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrdi,
> "__builtin_ia32_blsr_u64", IX86_BUILTIN_BLSR64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
>
> + /* TBM */
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextrisi,
> "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int)
> UINT_FTYPE_UINT_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextridi,
> "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcfillsi,
> "__builtin_ia32_blcfill_u32", IX86_BUILTIN_BLCFILL32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcfilldi,
> "__builtin_ia32_blcfill_u64", IX86_BUILTIN_BLCFILL64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcisi,
> "__builtin_ia32_blci_u32", IX86_BUILTIN_BLCI32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcidi,
> "__builtin_ia32_blci_u64", IX86_BUILTIN_BLCI64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcicsi,
> "__builtin_ia32_blcic_u32", IX86_BUILTIN_BLCIC32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcicdi,
> "__builtin_ia32_blcic_u64", IX86_BUILTIN_BLCIC64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcmsksi,
> "__builtin_ia32_blcmsk_u32", IX86_BUILTIN_BLCMSK32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcmskdi,
> "__builtin_ia32_blcmsk_u64", IX86_BUILTIN_BLCMSK64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcssi,
> "__builtin_ia32_blcs_u32", IX86_BUILTIN_BLCS32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcsdi,
> "__builtin_ia32_blcs_u64", IX86_BUILTIN_BLCS64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsfillsi,
> "__builtin_ia32_blsfill_u32", IX86_BUILTIN_BLSFILL32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsfilldi,
> "__builtin_ia32_blsfill_u64", IX86_BUILTIN_BLSFILL64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsicsi,
> "__builtin_ia32_blsic_u32", IX86_BUILTIN_BLSIC32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsicdi,
> "__builtin_ia32_blsic_u64", IX86_BUILTIN_BLSIC64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_t1mskcsi,
> "__builtin_ia32_t1mskc_u32", IX86_BUILTIN_T1MSKC32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_t1mskcdi,
> "__builtin_ia32_t1mskc_u64", IX86_BUILTIN_T1MSKC64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_tzmsksi,
> "__builtin_ia32_tzmsk_u32", IX86_BUILTIN_TZMSK32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_tzmskdi,
> "__builtin_ia32_tzmsk_u64", IX86_BUILTIN_TZMSK64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +
> /* F16C */
> { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps,
> "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)
> V4SF_FTYPE_V8HI },
> { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256,
> "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN,
> (int) V8SF_FTYPE_V8HI },
> @@ -26057,6 +26123,25 @@ ix86_expand_builtin (tree exp, rtx target,
> rtx subtarget ATTRIBUTE_UNUSED,
> emit_insn (gen_lwp_slwpcb (target));
> return target;
>
> + case IX86_BUILTIN_BEXTRI32:
> + case IX86_BUILTIN_BEXTRI64:
> + arg0 = CALL_EXPR_ARG (exp, 0);
> + arg1 = CALL_EXPR_ARG (exp, 1);
> + op0 = expand_normal (arg0);
> + op1 = expand_normal (arg1);
> + icode = (fcode == IX86_BUILTIN_BEXTRI32
> + ? CODE_FOR_tbm_bextrisi
> + : CODE_FOR_tbm_bextridi);
> + if (!CONST_INT_P (op1))
> + {
> + error ("last argument must be an immediate");
> + enum machine_mode tmode = insn_data[icode].operand[0].mode;
> + return gen_reg_rtx(tmode);
> + }
> + pat = GEN_FCN (icode) (target, op0, op1);
> + if (pat) emit_insn (pat);
> + return target;
> +
> default:
> break;
> }
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 4fba57d..3518bec 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -60,6 +60,7 @@ see the files COPYING3 and COPYING.RUNTIME
> respectively. If not, see
> #define TARGET_ROUND OPTION_ISA_ROUND
> #define TARGET_ABM OPTION_ISA_ABM
> #define TARGET_BMI OPTION_ISA_BMI
> +#define TARGET_TBM OPTION_ISA_TBM
> #define TARGET_POPCNT OPTION_ISA_POPCNT
> #define TARGET_SAHF OPTION_ISA_SAHF
> #define TARGET_MOVBE OPTION_ISA_MOVBE
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 967886d..ab588e2 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -237,6 +237,18 @@
> UNSPEC_BLSMSK
> UNSPEC_BLSR
> UNSPEC_TZCNT
> +
> + ;; For TBM support
> + UNSPEC_BEXTRI
> + UNSPEC_BLCFILL
> + UNSPEC_BLCI
> + UNSPEC_BLCIC
> + UNSPEC_BLCMSK
> + UNSPEC_BLCS
> + UNSPEC_BLSFILL
> + UNSPEC_BLSIC
> + UNSPEC_T1MSKC
> + UNSPEC_TZMSK
> ])
>
> (define_c_enum "unspecv" [
> @@ -11916,6 +11928,99 @@
> [(set_attr "type" "bitmanip")
> (set_attr "mode" "<MODE>")])
>
> +;; TBM instructions.
> +(define_insn "tbm_bextri<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
> + (match_operand:SI 2 "const_0_to_31_operand" "n")]
> + UNSPEC_BEXTRI))]
> + "TARGET_TBM"
> + "bextr\t{%2, %1, %0|%0, %1, %2}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blcfill<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLCFILL))]
> + "TARGET_TBM"
> + "blcfill\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blci<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLCI))]
> + "TARGET_TBM"
> + "blci\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blcic<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLCIC))]
> + "TARGET_TBM"
> + "blcic\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blcmsk<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLCMSK))]
> + "TARGET_TBM"
> + "blcmsk\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blcs<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLCS))]
> + "TARGET_TBM"
> + "blcs\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blsfill<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLSFILL))]
> + "TARGET_TBM"
> + "blsfill\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blsic<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_BLSIC))]
> + "TARGET_TBM"
> + "blsic\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_t1mskc<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_T1MSKC))]
> + "TARGET_TBM"
> + "t1mskc\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_tzmsk<mode>"
> + [(set (match_operand:SWI48 0 "register_operand" "=r")
> + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> + UNSPEC_TZMSK))]
> + "TARGET_TBM"
> + "tzmsk\t{%1, %0|%0, %1}"
> + [(set_attr "type" "bitmanip")
> + (set_attr "mode" "<MODE>")])
> +
> +
> (define_insn "bsr_rex64"
> [(set (match_operand:DI 0 "register_operand" "=r")
> (minus:DI (const_int 63)
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index d808804..33014f4 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -357,6 +357,10 @@ mbmi
> Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
> Support BMI built-in functions and code generation
>
> +mtbm
> +Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save
> +Support TBM built-in functions and code generation
> +
> mcx16
> Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save
> Support code generation of cmpxchg16b instruction.
> diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
> new file mode 100644
> index 0000000..7a623ef
> --- /dev/null
> +++ b/gcc/config/i386/tbmintrin.h
> @@ -0,0 +1,171 @@
> +/* Copyright (C) 2010 Free Software Foundation, Inc.
> +
> + This file is part of GCC.
> +
> + GCC is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3, or (at your option)
> + any later version.
> +
> + GCC is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + Under Section 7 of GPL version 3, you are granted additional
> + permissions described in the GCC Runtime Library Exception, version
> + 3.1, as published by the Free Software Foundation.
> +
> + You should have received a copy of the GNU General Public License and
> + a copy of the GCC Runtime Library Exception along with this program;
> + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#ifndef _X86INTRIN_H_INCLUDED
> +# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
> +#endif
> +
> +#ifndef __TBM__
> +# error "TBM instruction set not enabled"
> +#endif /* __TBM__ */
> +
> +#ifndef _TBMINTRIN_H_INCLUDED
> +#define _TBMINTRIN_H_INCLUDED
> +
> +#ifdef __OPTIMIZE__
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__bextri_u32 (unsigned int __X, const unsigned int __I)
> +{
> + return __builtin_ia32_bextri_u32 (__X, __I);
> +}
> +#else
> +#define __bextri_u32 (X, I) \
> + ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X), \
> + (unsigned int)(I))
> +#endif /*__OPTIMIZE__ */
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcfill_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blcfill_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsfill_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blsfill_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcs_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blcs_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzmsk_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_tzmsk_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcic_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blcic_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsic_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blsic_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__t1mskc_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_t1mskc_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcmsk_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blcmsk_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blci_u32 (unsigned int __X)
> +{
> + return __builtin_ia32_blci_u32 (__X);
> +}
> +
> +
> +#ifdef __x86_64__
> +#ifdef __OPTIMIZE__
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__bextri_u64 (unsigned long long __X, const unsigned int __Y)
> +{
> + return __builtin_ia32_bextri_u64 (__X, __Y);
> +}
> +#else
> +#define __bextri_u64 (X, I)
> \
> + ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long
> long)(X), \
> + (unsigned
> long long)(I))
> +#endif /*__OPTIMIZE__ */
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcfill_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blcfill_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsfill_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blsfill_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcs_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blcs_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzmsk_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_tzmsk_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcic_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blcic_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsic_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blsic_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__t1mskc_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_t1mskc_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcmsk_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blcmsk_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blci_u64 (unsigned long long __X)
> +{
> + return __builtin_ia32_blci_u64 (__X);
> +}
> +#endif /* __x86_64__ */
> +
> +#endif /* _TBMINTRIN_H_INCLUDED */
> +
> diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
> index 9a7366b..07074ae 100644
> --- a/gcc/config/i386/x86intrin.h
> +++ b/gcc/config/i386/x86intrin.h
> @@ -85,6 +85,10 @@
> #include <bmiintrin.h>
> #endif
>
> +#ifdef __BMI__
> +#include <tbmintrin.h>
> +#endif
> +
> #ifdef __POPCNT__
> #include <popcntintrin.h>
> #endif
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index a7be54b..e6ba951 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -9370,6 +9370,31 @@ unsigned int __builtin_ia32_blsi_u32 (unsigned int);
> unsigned long long __builtin_ia32_blsi_u64 (unsigned long long);
> @end smallexample
>
> +The following built-in function is available when @option{-mtbm} is used.
> +All of them generate the machine instruction that is part of the name.
> +@smallexample
> +unsigned int __builtin_ia32_bextri_u32 (unsigned int, unsigned int);
> +unsigned long long __builtin_ia32_bextri_u64 (unsigned long long,
> const unsigned long long);
> +unsigned int __builtin_ia32_blcfill_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blcfill_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsfill_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blsfill_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blcs_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blcs_u64 (unsigned long long);
> +unsigned int __builtin_ia32_tzmsk_u32 (unsigned int);
> +unsigned long long __builtin_ia32_tzmsk_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blcic_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blcic_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsic_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blsic_u64 (unsigned long long);
> +unsigned int __builtin_ia32_t1mskc_u32 (unsigned int);
> +unsigned long long __builtin_ia32_t1mskc_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blcmsk_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blcmsk_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blci_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blci_u64 (unsigned long long);
> +@end smallexample
> +
> The following built-in functions are available when @option{-m3dnow} is used.
> All of them generate the machine instruction that is part of the name.
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 071ad27..d4eaea6 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -597,7 +597,7 @@ Objective-C and Objective-C++ Dialects}.
> -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol
> -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
> -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfused-madd @gol
> --msse4a -m3dnow -mpopcnt -mabm -mbmi -mfma4 -mxop -mlwp @gol
> +-msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlwp @gol
> -mthreads -mno-align-stringops -minline-all-stringops @gol
> -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
> -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
> @@ -12420,6 +12420,8 @@ preferred alignment to
> @option{-mpreferred-stack-boundary=2}.
> @itemx -mno-abm
> @itemx -mbmi
> @itemx -mno-bmi
> +@itemx -mtbm
> +@itemx -mno-tbm
> @opindex mmmx
> @opindex mno-mmx
> @opindex msse
> diff --git a/gcc/testsuite/g++.dg/other/i386-2.C
> b/gcc/testsuite/g++.dg/other/i386-2.C
> index f0a382a..6f7ca84 100644
> --- a/gcc/testsuite/g++.dg/other/i386-2.C
> +++ b/gcc/testsuite/g++.dg/other/i386-2.C
> @@ -1,9 +1,9 @@
> /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop
> -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" }
> */
> +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop
> -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd
> -mf16c" } */
>
> /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
> - bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> - -O -pedantic-errors. */
> + bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h
> + are usable with -O -pedantic-errors. */
>
> #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/g++.dg/other/i386-3.C
> b/gcc/testsuite/g++.dg/other/i386-3.C
> index 4b27372..fe2a097 100644
> --- a/gcc/testsuite/g++.dg/other/i386-3.C
> +++ b/gcc/testsuite/g++.dg/other/i386-3.C
> @@ -1,8 +1,8 @@
> /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx
> -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd
> -mf16c" } */
> +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx
> -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase
> -mrdrnd -mf16c" } */
>
> /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
> - bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> + bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h
> -O -fkeep-inline-functions. */
>
> #include <x86intrin.h>
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> index 5e07d85..1e18dcf 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> @@ -19,6 +19,7 @@ extern void test_sse4_2
> (void) __attribute__((__target__("sse4.2")));
> extern void test_sse4a (void) __attribute__((__target__("sse4a")));
> extern void test_fma4 (void) __attribute__((__target__("fma4")));
> extern void test_ssse3 (void) __attribute__((__target__("ssse3")));
> +extern void test_tbm (void) __attribute__((__target__("tbm")));
>
> extern void test_no_abm (void) __attribute__((__target__("no-abm")));
> extern void test_no_aes (void) __attribute__((__target__("no-aes")));
> @@ -36,6 +37,7 @@ extern void test_no_sse4_2
> (void) __attribute__((__target__("no-sse4.2")));
> extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a")));
> extern void test_no_fma4 (void) __attribute__((__target__("no-fma4")));
> extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3")));
> +extern void test_no_tbm (void) __attribute__((__target__("no-tbm")));
>
> extern void test_arch_i386 (void) __attribute__((__target__("arch=i386")));
> extern void test_arch_i486 (void) __attribute__((__target__("arch=i486")));
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c
> b/gcc/testsuite/gcc.target/i386/funcspec-6.c
> index 81c831c..92a3cb5 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-6.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c
> @@ -19,6 +19,7 @@ extern void test_sse4_2
> (void) __attribute__((__target__("sse4.2")));
> extern void test_sse4a (void) __attribute__((__target__("sse4a")));
> extern void test_fma4 (void) __attribute__((__target__("fma4")));
> extern void test_ssse3 (void) __attribute__((__target__("ssse3")));
> +extern void test_tbm (void) __attribute__((__target__("tbm")));
>
> extern void test_no_abm (void) __attribute__((__target__("no-abm")));
> extern void test_no_aes (void) __attribute__((__target__("no-aes")));
> @@ -36,6 +37,7 @@ extern void test_no_sse4_2
> (void) __attribute__((__target__("no-sse4.2")));
> extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a")));
> extern void test_no_fma4 (void) __attribute__((__target__("no-fma4")));
> extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3")));
> +extern void test_no_tbm (void) __attribute__((__target__("no-tbm")));
>
> extern void test_arch_nocona
> (void) __attribute__((__target__("arch=nocona")));
> extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
> diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c
> b/gcc/testsuite/gcc.target/i386/sse-12.c
> index d59777b..eee7b29 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-12.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-12.c
> @@ -1,9 +1,9 @@
> /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h,
> - fma4intrin.h, abmintrin.h, bmiintrin.h, lwpintrin.h,
> + fma4intrin.h, abmintrin.h, bmiintrin.h, tbmiintrin.h, lwpintrin.h,
> popcntintrin.h and mm_malloc.h are usable
> with -O -std=c89 -pedantic-errors. */
> /* { dg-do compile } */
> -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx
> -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase
> -mrdrnd -mf16c" } */
> +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx
> -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp
> -mfsgsbase -mrdrnd -mf16c" } */
>
> #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c
> b/gcc/testsuite/gcc.target/i386/sse-13.c
> index 01809d0..3fb7eff 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> @@ -1,13 +1,13 @@
> /* { dg-do compile } */
> -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8
> -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase
> -mrdrnd -mf16c" } */
> +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8
> -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mtbm -mlwp
> -mfsgsbase -mrdrnd -mf16c" } */
>
> #include <mm_malloc.h>
>
> /* Test that the intrinsics compile with optimization. All of them
> are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
> - xopintrin.h, abmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h
> - that reference the proper builtin functions. Defining away
> - "extern" and "__inline" results in all of them being compiled as
> + xopintrin.h, abmintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h
> + and mm3dnow.h that reference the proper builtin functions. Defining
> + away "extern" and "__inline" results in all of them being compiled as
> proper functions. */
>
> #define extern
> @@ -141,4 +141,8 @@
> #define __builtin_ia32_lwpins32 (D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1)
> #define __builtin_ia32_lwpins64 (D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1)
>
> +/* tbmintrin.h */
> +#define __builtin_ia32_bextri_u32 (X, Y) __builtin_ia32_bextri_u32 (X, 1)
> +#define __builtin_ia32_bextri_u64 (X, Y) __builtin_ia32_bextri_u64 (X, 1)
> +
> #include <x86intrin.h>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c
> b/gcc/testsuite/gcc.target/i386/sse-14.c
> index d256e68..41bde1b 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-14.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-14.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8
> -m3dnow -mavx -mxop -msse4a -maes -mpclmul -mpopcnt -mabm -mlwp
> -mfsgsbase -mrdrnd -mf16c" } */
> +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8
> -m3dnow -mavx -mxop -msse4a -maes -mpclmul -mpopcnt -mabm -mtbm -mlwp
> -mfsgsbase -mrdrnd -mf16c" } */
>
> #include <mm_malloc.h>
>
> @@ -177,3 +177,9 @@ test_2 ( __lwpins32, unsigned char, unsigned int,
> unsigned int, 1)
> test_2 ( __lwpval64, void, unsigned long long, unsigned int, 1)
> test_2 ( __lwpins64, unsigned char, unsigned long long, unsigned int, 1)
> #endif
> +
> +/* tbmintrin.h */
> +test_1 ( __bextri_u32, unsigned int, unsigned int, 1)
> +#ifdef __x86_64__
> +test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1)
> +#endif
> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c
> b/gcc/testsuite/gcc.target/i386/sse-22.c
> index bb0472d..e940ef8 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-22.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-22.c
> @@ -6,10 +6,10 @@
>
> /* Test that the intrinsics compile without optimization. All of them
> are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h,
> - xopintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h that
> - reference the proper builtin functions. Defining away "extern" and
> - "__inline" results in all of them being compiled as proper
> - functions. */
> + xopintrin.h, abmintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h
> + and mm3dnow.h that reference the proper builtin functions. Defining
> + away "extern" and "__inline" results in all of them being compiled as
> + proper functions. */
>
> #define extern
> #define __inline
> @@ -39,7 +39,7 @@
>
>
> #ifndef DIFFERENT_PRAGMAS
> -#pragma GCC target
> ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,lwp,fsgsbase,rdrnd,f16c")
> +#pragma GCC target
> ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,tbm,lwp,fsgsbase,rdrnd,f16c")
> #endif
>
> /* Following intrinsics require immediate arguments. They
> @@ -188,3 +188,14 @@ test_2 ( __lwpins64, unsigned char, unsigned long
> long, unsigned int, 1)
> test_1 (_cvtss_sh, unsigned short, float, 1)
> test_1 (_mm_cvtps_ph, __m128i, __m128, 1)
> test_1 (_mm256_cvtps_ph, __m128i, __m256, 1)
> +
> +
> +/* tbmintrin.h (TBM). */
> +#ifdef DIFFERENT_PRAGMAS
> +#pragma GCC target ("tbm")
> +#endif
> +#include <tbmintrin.h>
> +test_1 ( __bextri_u32, unsigned int, unsigned int, 1)
> +#ifdef __x86_64__
> +test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1)
> +#endif
> diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c
> b/gcc/testsuite/gcc.target/i386/sse-23.c
> index 0e15bb2..6a7b854 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> @@ -5,7 +5,7 @@
>
> /* Test that the intrinsics compile with optimization. All of them
> are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h,
> - xopintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h that
> + xopintrin.h, lwpintrin.h, tbmintrin.h, popcntintrin.h and mm3dnow.h that
> reference the proper builtin functions. Defining away "extern" and
> "__inline" results in all of them being compiled as proper
> functions. */
> @@ -141,7 +141,11 @@
> #define __builtin_ia32_lwpins32 (D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1)
> #define __builtin_ia32_lwpins64 (D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1)
>
> -#pragma GCC target
> ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,fsgsbase,rdrnd,f16c")
> +/* tbmintrin.h */
> +#define __builtin_ia32_bextri_u32 (X, Y) __builtin_ia32_bextr_u32 (X, 1)
> +#define __builtin_ia32_bextri_u64 (X, Y) __builtin_ia32_bextr_u64 (X, 1)
> +
> +#pragma GCC target
> ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,tbm,fsgsbase,rdrnd,f16c")
> #include <wmmintrin.h>
> #include <smmintrin.h>
> #include <mm3dnow.h>
> diff --git a/gcc/testsuite/gcc.target/i386/tbm-1.c
> b/gcc/testsuite/gcc.target/i386/tbm-1.c
> new file mode 100644
> index 0000000..4dddafc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/tbm-1.c
> @@ -0,0 +1,74 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtbm" } */
> +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blcfill\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blci\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blcic\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blcmsk\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blcs\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsfill\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsic\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "t1mskc\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "tzmsk\[^\\n]*(%|)eax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned int
> +func_bextri32 (unsigned int X)
> +{
> + return __bextri_u32 (X, 1);
> +}
> +
> +unsigned int
> +func_blcfill32 (unsigned int X)
> +{
> + return __blcfill_u32 (X);
> +}
> +
> +unsigned int
> +func_blci32 (unsigned int X)
> +{
> + return __blci_u32 (X);
> +}
> +
> +unsigned int
> +func_blcic32 (unsigned int X)
> +{
> + return __blcic_u32 (X);
> +}
> +
> +unsigned int
> +func_blcmsk32 (unsigned int X)
> +{
> + return __blcmsk_u32 (X);
> +}
> +
> +unsigned int
> +func_blcs32 (unsigned int X)
> +{
> + return __blcs_u32 (X);
> +}
> +
> +unsigned int
> +func_blsfill32 (unsigned int X)
> +{
> + return __blsfill_u32 (X);
> +}
> +
> +unsigned int
> +func_blsic32 (unsigned int X)
> +{
> + return __blsic_u32 (X);
> +}
> +
> +unsigned int
> +func_t1mskc32 (unsigned int X)
> +{
> + return __t1mskc_u32 (X);
> +}
> +
> +unsigned int
> +func_tzmsk32 (unsigned int X)
> +{
> + return __tzmsk_u32 (X);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/tbm-2.c
> b/gcc/testsuite/gcc.target/i386/tbm-2.c
> new file mode 100644
> index 0000000..e3ba375
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/tbm-2.c
> @@ -0,0 +1,74 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mtbm" } */
> +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blci\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blcic\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blcmsk\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blcs\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsfill\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsic\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "t1mskc\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "tzmsk\[^\\n]*(%|)rax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned long long
> +func_bextri64 (unsigned long long X)
> +{
> + return __bextri_u64 (X, 1);
> +}
> +
> +unsigned long long
> +func_blcfill64 (unsigned long long X)
> +{
> + return __blcfill_u64 (X);
> +}
> +
> +unsigned long long
> +func_blci64 (unsigned long long X)
> +{
> + return __blci_u64 (X);
> +}
> +
> +unsigned long long
> +func_blcic64 (unsigned long long X)
> +{
> + return __blcic_u64 (X);
> +}
> +
> +unsigned long long
> +func_blcmsk64 (unsigned long long X)
> +{
> + return __blcmsk_u64 (X);
> +}
> +
> +unsigned long long
> +func_blcs64 (unsigned long long X)
> +{
> + return __blcs_u64 (X);
> +}
> +
> +unsigned long long
> +func_blsfill64 (unsigned long long X)
> +{
> + return __blsfill_u64 (X);
> +}
> +
> +unsigned long long
> +func_blsic64 (unsigned long long X)
> +{
> + return __blsic_u64 (X);
> +}
> +
> +unsigned long long
> +func_t1mskc64 (unsigned long long X)
> +{
> + return __t1mskc_u64 (X);
> +}
> +
> +unsigned long long
> +func_tzmsk64 (unsigned long long X)
> +{
> + return __tzmsk_u64 (X);
> +}
>
This patch also passes "make check -k RUNTESTFLAGS=i386.exp" in
x86-64, working on a full bootstrap test.
Ok to commit?
--
Quentin
More information about the Gcc-patches
mailing list