[patch 2/3] AMD bdver2 processors - TBM

Quentin Neill quentin.neill.gnu@gmail.com
Mon Oct 18 18:59:00 GMT 2010


On Fri, Oct 15, 2010 at 1:45 PM, Quentin Neill
<quentin.neill.gnu@gmail.com> wrote:
> These patches add support for upcoming bdver2 AMD processors:
> BMI (Bit Manipulation Instructions)
> TBM (Trailing Bit Manipulation)
> FMA3 (three operand FMA) instructions
>
> The public specifications for BMI and TBM are in progress (they are
> today available under NDA).  They will appear in one of the AMD64
> Architecture Programmer's Manual Volumes 3-6.   I can post the
> mnemonics definitions if needed.  The FMA3 specification is documented
> in http://software.intel.com/en-us/avx/
>
>
> 2010-10-15  Quentin Neill  <quentin.neill.gnu@amd.com>
>
> gcc/
>        * config.gcc (i[34567]86-*-*): Include tbmintrin.h.
>    (x86_64-*-*): Likewise.
>
>        * config/i386/cpuid.h: Define TBM bit.
>
>        * config/i386/driver-i386.c (host_detect_local_cpu): Define
>        and set has_tbm.
>
>        * config/i386/i386-c.c (ix86_target_macros_internal): Check
>        isa_flag for TBM.
>
>        * config/i386/i386.c (OPTION_MASK_ISA_TBM_SET): New.
>        (OPTION_MASK_ISA_TBM_UNSET): New.
>        (ix86_handle_option): Handle -mbmi.
>        (isa_opts): Add -mtbm.
>        (enum pta_flags): Add PTA_TBM.
>        (ix86_option_override_internal): Add TBM support.
>        (ix86_valid_target_attribute_inner_p): Handle -mtbm.
>        (IX86_BUILTIN_BEXTRI32): New for TBM intrinsic.
>        (IX86_BUILTIN_BEXTRI64): Likewise.
>        (IX86_BUILTIN_BLCFILL32): Likewise.
>        (IX86_BUILTIN_BLCFILL64): Likewise.
>        (IX86_BUILTIN_BLCI32): Likewise.
>        (IX86_BUILTIN_BLCI64): Likewise.
>        (IX86_BUILTIN_BLCIC32): Likewise.
>        (IX86_BUILTIN_BLCIC64): Likewise.
>        (IX86_BUILTIN_BLCMSK32): Likewise.
>        (IX86_BUILTIN_BLCMSK64): Likewise.
>        (IX86_BUILTIN_BLCS32): Likewise.
>        (IX86_BUILTIN_BLCS64): Likewise.
>        (IX86_BUILTIN_BLSFILL32): Likewise.
>        (IX86_BUILTIN_BLSFILL64): Likewise.
>        (IX86_BUILTIN_BLSIC32): Likewise.
>        (IX86_BUILTIN_BLSIC64): Likewise.
>        (IX86_BUILTIN_T1MSKC32): Likewise.
>        (IX86_BUILTIN_T1MSKC64): Likewise.
>        (IX86_BUILTIN_TZMSK32): Likewise.
>        (IX86_BUILTIN_TZMSK64): Likewise.
>        (bdesc_args): Add TBM intrinsics.
>        (ix86_expand_builtin): Add TBM specific cases for
>        BEXTR immediate operands.
>
>        * config/i386/i386.h (TARGET_TBM): New for TBM.
>
>        * config/i386/i386.md (UNSPEC_BEXTRI): New for TBM.
>        (UNSPEC_BLCFILL): Likewise.
>        (UNSPEC_BLCI): Likewise.
>        (UNSPEC_BLCIC): Likewise.
>        (UNSPEC_BLCMSK): Likewise.
>        (UNSPEC_BLCS): Likewise.
>        (UNSPEC_BLSFILL): Likewise.
>        (UNSPEC_BLSIC): Likewise.
>        (UNSPEC_T1MSKC): Likewise.
>        (UNSPEC_TZMSK): Likewise.
>        (tbm_bextri<mode>): Likewise.
>        (tbm_blcfill<mode>): Likewise.
>        (tbm_blci<mode>): Likewise.
>        (tbm_blcic<mode>): Likewise.
>        (tbm_blcmsk<mode>): Likewise.
>        (tbm_blcs<mode>): Likewise.
>        (tbm_blsfill<mode>): Likewise.
>        (tbm_blsic<mode>): Likewise.
>        (tbm_t1mskc<mode>): Likewise.
>        (tbm_tzmsk<mode>): Likewise.
>        (bsr_rex64): Likewise.
>
>        * config/i386/i386.opt: Add -mtbm.
>
>        * config/i386/tbmintrin.h (__bextri_u32): New.
>        (__blcfill_u32): Likewise.
>        (__blsfill_u32): Likewise.
>        (__blcs_u32): Likewise.
>        (__tzmsk_u32): Likewise.
>        (__blcic_u32): Likewise.
>        (__blsic_u32): Likewise.
>        (__t1mskc_u32): Likewise.
>        (__blcmsk_u32): Likewise.
>        (__blci_u32): Likewise.
>        (__bextri_u64): Likewise.
>        (__blcfill_u64): Likewise.
>        (__blsfill_u64): Likewise.
>        (__blcs_u64): Likewise.
>        (__tzmsk_u64): Likewise.
>        (__blcic_u64): Likewise.
>        (__blsic_u64): Likewise.
>        (__t1mskc_u64): Likewise.
>        (__blcmsk_u64): Likewise.
>        (__blci_u64): Likewise.
>
>        * config/i386/x86intrin.h: Add TBM check and tbmintrin.h.
>
>        * doc/invoke.texi: Document -mtbm.
>
>        * doc/extend.texi: Document TBM built-in functions.
>
> gcc/testsuite/
>
>        * g++.dg/other/i386-2.C: Add -mtbm.
>
>        * g++.dg/other/i386-3.C: Likewise.
>
>        * gcc.target/i386/funcspec-5.c: Add tbm and no-tbm targets.
>
>        * gcc.target/i386/funcspec-6.c: Likewise.
>
>        * gcc.target/i386/sse-12.c: Add -mtbm.
>
>        * gcc.target/i386/sse-13.c: Add -mtbm and test immediate
>        operand intrinsics.
>
>        * gcc.target/i386/sse-14.c: Likewise.
>
>        * gcc.target/i386/sse-22.c: Likewise.
>
>        * gcc.target/i386/sse-23.c: Likewise.
>
>        * gcc.target/i386/tbm-1.c: New file.
>
>        * gcc.target/i386/tbm-2.c: Likewise.
>
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 4034241..f923990 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -318,7 +318,7 @@ i[34567]86-*-*)
>                       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
>                       immintrin.h x86intrin.h avxintrin.h xopintrin.h
>                       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
> -                      abmintrin.h bmiintrin.h"
> +                      abmintrin.h bmiintrin.h tbmintrin.h"
>        ;;
>  x86_64-*-*)
>        cpu_type=i386
> @@ -329,7 +329,7 @@ x86_64-*-*)
>                       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
>                       immintrin.h x86intrin.h avxintrin.h xopintrin.h
>                       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
> -                      abmintrin.h bmiintrin.h"
> +                      abmintrin.h bmiintrin.h tbmintrin.h"
>        need_64bit_hwint=yes
>        ;;
>  ia64-*-*)
> diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
> index 0f1af7f..e9d0fab 100644
> --- a/gcc/config/i386/cpuid.h
> +++ b/gcc/config/i386/cpuid.h
> @@ -54,6 +54,7 @@
>  #define bit_XOP         (1 << 11)
>  #define bit_LWP        (1 << 15)
>  #define bit_FMA4        (1 << 16)
> +#define bit_TBM         (1 << 21)
>
>  /* %edx */
>  #define bit_LM         (1 << 29)
> diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
> index a7d6808..15d3284 100644
> --- a/gcc/config/i386/driver-i386.c
> +++ b/gcc/config/i386/driver-i386.c
> @@ -397,7 +397,7 @@ const char *host_detect_local_cpu (int argc, const
> char **argv)
>   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
>   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
>   unsigned int has_fma4 = 0, has_xop = 0;
> -  unsigned int has_bmi = 0;
> +  unsigned int has_bmi = 0, has_tbm = 0;
>
>   bool arch;
>
> @@ -464,6 +464,7 @@ const char *host_detect_local_cpu (int argc, const
> char **argv)
>       has_lwp = ecx & bit_LWP;
>       has_fma4 = ecx & bit_FMA4;
>       has_xop = ecx & bit_XOP;
> +      has_tbm = ecx & bit_TBM;
>
>       has_longmode = edx & bit_LM;
>       has_3dnowp = edx & bit_3DNOWP;
> diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> index e84347c..666e77e 100644
> --- a/gcc/config/i386/i386-c.c
> +++ b/gcc/config/i386/i386-c.c
> @@ -246,6 +246,8 @@ ix86_target_macros_internal (int isa_flag,
>     def_or_undef (parse_in, "__ABM__");
>   if (isa_flag & OPTION_MASK_ISA_BMI)
>     def_or_undef (parse_in, "__BMI__");
> +  if (isa_flag & OPTION_MASK_ISA_TBM)
> +    def_or_undef (parse_in, "__TBM__");
>   if (isa_flag & OPTION_MASK_ISA_POPCNT)
>     def_or_undef (parse_in, "__POPCNT__");
>   if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index e003ee7..ac0772e 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2080,6 +2080,7 @@ static int ix86_isa_flags_explicit;
>   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
>
>  #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
> +#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
>
>  #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
>  #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
> @@ -2136,6 +2137,7 @@ static int ix86_isa_flags_explicit;
>  #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
>  #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
>  #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
> +#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
>  #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
>  #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
>  #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
> @@ -2446,6 +2448,20 @@ ix86_handle_option (size_t code, const char
> *arg ATTRIBUTE_UNUSED, int value)
>        }
>       return true;
>
> +    case OPT_mtbm:
> +      if (value)
> +       {
> +         ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
> +         ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
> +       }
> +      else
> +       {
> +         ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
> +         ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
> +       }
> +      return true;
> +
> +
>     case OPT_mpopcnt:
>       if (value)
>        {
> @@ -2615,6 +2631,7 @@ ix86_target_string (int isa, int flags, const
> char *arch, const char *tune,
>     { "-mmmx",         OPTION_MASK_ISA_MMX },
>     { "-mabm",         OPTION_MASK_ISA_ABM },
>     { "-mbmi",         OPTION_MASK_ISA_BMI },
> +    { "-mtbm",         OPTION_MASK_ISA_TBM },
>     { "-mpopcnt",      OPTION_MASK_ISA_POPCNT },
>     { "-mmovbe",       OPTION_MASK_ISA_MOVBE },
>     { "-mcrc32",       OPTION_MASK_ISA_CRC32 },
> @@ -2871,6 +2888,7 @@ ix86_option_override_internal (bool main_args_p)
>       PTA_RDRND = 1 << 25,
>       PTA_F16C = 1 << 26,
>       PTA_BMI = 1 << 27,
> +      PTA_TBM = 1 << 28,
>       /* if this reaches 32, need to widen struct pta flags below */
>     };
>
> @@ -3206,6 +3224,9 @@ ix86_option_override_internal (bool main_args_p)
>        if (processor_alias_table[i].flags & PTA_BMI
>            && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
>          ix86_isa_flags |= OPTION_MASK_ISA_BMI;
> +       if (processor_alias_table[i].flags & PTA_TBM
> +           && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
> +         ix86_isa_flags |= OPTION_MASK_ISA_TBM;
>        if (processor_alias_table[i].flags & PTA_CX16
>            && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
>          ix86_isa_flags |= OPTION_MASK_ISA_CX16;
> @@ -3951,6 +3972,7 @@ ix86_valid_target_attribute_inner_p (tree args,
> char *p_strings[])
>     IX86_ATTR_ISA ("3dnow",    OPT_m3dnow),
>     IX86_ATTR_ISA ("abm",      OPT_mabm),
>     IX86_ATTR_ISA ("bmi",      OPT_mbmi),
> +    IX86_ATTR_ISA ("tbm",      OPT_mtbm),
>     IX86_ATTR_ISA ("aes",      OPT_maes),
>     IX86_ATTR_ISA ("avx",      OPT_mavx),
>     IX86_ATTR_ISA ("mmx",      OPT_mmmx),
> @@ -22992,6 +23014,28 @@ enum ix86_builtins
>   IX86_BUILTIN_TZCNT32,
>   IX86_BUILTIN_TZCNT64,
>
> +  /* TBM instructions.  */
> +  IX86_BUILTIN_BEXTRI32,
> +  IX86_BUILTIN_BEXTRI64,
> +  IX86_BUILTIN_BLCFILL32,
> +  IX86_BUILTIN_BLCFILL64,
> +  IX86_BUILTIN_BLCI32,
> +  IX86_BUILTIN_BLCI64,
> +  IX86_BUILTIN_BLCIC32,
> +  IX86_BUILTIN_BLCIC64,
> +  IX86_BUILTIN_BLCMSK32,
> +  IX86_BUILTIN_BLCMSK64,
> +  IX86_BUILTIN_BLCS32,
> +  IX86_BUILTIN_BLCS64,
> +  IX86_BUILTIN_BLSFILL32,
> +  IX86_BUILTIN_BLSFILL64,
> +  IX86_BUILTIN_BLSIC32,
> +  IX86_BUILTIN_BLSIC64,
> +  IX86_BUILTIN_T1MSKC32,
> +  IX86_BUILTIN_T1MSKC64,
> +  IX86_BUILTIN_TZMSK32,
> +  IX86_BUILTIN_TZMSK64,
> +
>   /* FSGSBASE instructions.  */
>   IX86_BUILTIN_RDFSBASE32,
>   IX86_BUILTIN_RDFSBASE64,
> @@ -23946,6 +23990,28 @@ static const struct builtin_description bdesc_args[] =
>   { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrsi,
> "__builtin_ia32_blsr_u32", IX86_BUILTIN_BLSR32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
>   { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrdi,
> "__builtin_ia32_blsr_u64", IX86_BUILTIN_BLSR64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
>
> +  /* TBM */
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextrisi,
> "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int)
> UINT_FTYPE_UINT_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextridi,
> "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcfillsi,
> "__builtin_ia32_blcfill_u32", IX86_BUILTIN_BLCFILL32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcfilldi,
> "__builtin_ia32_blcfill_u64", IX86_BUILTIN_BLCFILL64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcisi,
> "__builtin_ia32_blci_u32", IX86_BUILTIN_BLCI32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcidi,
> "__builtin_ia32_blci_u64", IX86_BUILTIN_BLCI64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcicsi,
> "__builtin_ia32_blcic_u32", IX86_BUILTIN_BLCIC32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcicdi,
> "__builtin_ia32_blcic_u64", IX86_BUILTIN_BLCIC64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcmsksi,
> "__builtin_ia32_blcmsk_u32", IX86_BUILTIN_BLCMSK32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcmskdi,
> "__builtin_ia32_blcmsk_u64", IX86_BUILTIN_BLCMSK64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcssi,
> "__builtin_ia32_blcs_u32", IX86_BUILTIN_BLCS32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcsdi,
> "__builtin_ia32_blcs_u64", IX86_BUILTIN_BLCS64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsfillsi,
> "__builtin_ia32_blsfill_u32", IX86_BUILTIN_BLSFILL32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsfilldi,
> "__builtin_ia32_blsfill_u64", IX86_BUILTIN_BLSFILL64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsicsi,
> "__builtin_ia32_blsic_u32", IX86_BUILTIN_BLSIC32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsicdi,
> "__builtin_ia32_blsic_u64", IX86_BUILTIN_BLSIC64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_t1mskcsi,
> "__builtin_ia32_t1mskc_u32", IX86_BUILTIN_T1MSKC32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_t1mskcdi,
> "__builtin_ia32_t1mskc_u64", IX86_BUILTIN_T1MSKC64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_tzmsksi,
> "__builtin_ia32_tzmsk_u32", IX86_BUILTIN_TZMSK32, UNKNOWN, (int)
> UINT_FTYPE_UINT },
> +  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_tzmskdi,
> "__builtin_ia32_tzmsk_u64", IX86_BUILTIN_TZMSK64, UNKNOWN, (int)
> UINT64_FTYPE_UINT64 },
> +
>   /* F16C */
>   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps,
> "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)
> V4SF_FTYPE_V8HI },
>   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256,
> "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN,
> (int) V8SF_FTYPE_V8HI },
> @@ -26057,6 +26123,25 @@ ix86_expand_builtin (tree exp, rtx target,
> rtx subtarget ATTRIBUTE_UNUSED,
>       emit_insn (gen_lwp_slwpcb (target));
>       return target;
>
> +    case IX86_BUILTIN_BEXTRI32:
> +    case IX86_BUILTIN_BEXTRI64:
> +      arg0 = CALL_EXPR_ARG (exp, 0);
> +      arg1 = CALL_EXPR_ARG (exp, 1);
> +      op0 = expand_normal (arg0);
> +      op1 = expand_normal (arg1);
> +      icode = (fcode == IX86_BUILTIN_BEXTRI32
> +              ? CODE_FOR_tbm_bextrisi
> +              : CODE_FOR_tbm_bextridi);
> +      if (!CONST_INT_P (op1))
> +        {
> +          error ("last argument must be an immediate");
> +          enum machine_mode tmode = insn_data[icode].operand[0].mode;
> +          return gen_reg_rtx(tmode);
> +        }
> +      pat = GEN_FCN (icode) (target, op0, op1);
> +      if (pat) emit_insn (pat);
> +      return target;
> +
>     default:
>       break;
>     }
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 4fba57d..3518bec 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -60,6 +60,7 @@ see the files COPYING3 and COPYING.RUNTIME
> respectively.  If not, see
>  #define TARGET_ROUND   OPTION_ISA_ROUND
>  #define TARGET_ABM     OPTION_ISA_ABM
>  #define TARGET_BMI     OPTION_ISA_BMI
> +#define TARGET_TBM     OPTION_ISA_TBM
>  #define TARGET_POPCNT  OPTION_ISA_POPCNT
>  #define TARGET_SAHF    OPTION_ISA_SAHF
>  #define TARGET_MOVBE   OPTION_ISA_MOVBE
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 967886d..ab588e2 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -237,6 +237,18 @@
>   UNSPEC_BLSMSK
>   UNSPEC_BLSR
>   UNSPEC_TZCNT
> +
> +  ;; For TBM support
> +  UNSPEC_BEXTRI
> +  UNSPEC_BLCFILL
> +  UNSPEC_BLCI
> +  UNSPEC_BLCIC
> +  UNSPEC_BLCMSK
> +  UNSPEC_BLCS
> +  UNSPEC_BLSFILL
> +  UNSPEC_BLSIC
> +  UNSPEC_T1MSKC
> +  UNSPEC_TZMSK
>  ])
>
>  (define_c_enum "unspecv" [
> @@ -11916,6 +11928,99 @@
>   [(set_attr "type" "bitmanip")
>    (set_attr "mode" "<MODE>")])
>
> +;; TBM instructions.
> +(define_insn "tbm_bextri<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
> +                      (match_operand:SI    2 "const_0_to_31_operand" "n")]
> +                      UNSPEC_BEXTRI))]
> +  "TARGET_TBM"
> +  "bextr\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blcfill<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLCFILL))]
> +  "TARGET_TBM"
> +  "blcfill\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blci<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLCI))]
> +  "TARGET_TBM"
> +  "blci\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blcic<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLCIC))]
> +  "TARGET_TBM"
> +  "blcic\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blcmsk<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLCMSK))]
> +  "TARGET_TBM"
> +  "blcmsk\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blcs<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLCS))]
> +  "TARGET_TBM"
> +  "blcs\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blsfill<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLSFILL))]
> +  "TARGET_TBM"
> +  "blsfill\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_blsic<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_BLSIC))]
> +  "TARGET_TBM"
> +  "blsic\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_t1mskc<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_T1MSKC))]
> +  "TARGET_TBM"
> +  "t1mskc\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "tbm_tzmsk<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
> +                      UNSPEC_TZMSK))]
> +  "TARGET_TBM"
> +  "tzmsk\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "mode" "<MODE>")])
> +
> +
>  (define_insn "bsr_rex64"
>   [(set (match_operand:DI 0 "register_operand" "=r")
>        (minus:DI (const_int 63)
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index d808804..33014f4 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -357,6 +357,10 @@ mbmi
>  Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
>  Support BMI built-in functions and code generation
>
> +mtbm
> +Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save
> +Support TBM built-in functions and code generation
> +
>  mcx16
>  Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save
>  Support code generation of cmpxchg16b instruction.
> diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
> new file mode 100644
> index 0000000..7a623ef
> --- /dev/null
> +++ b/gcc/config/i386/tbmintrin.h
> @@ -0,0 +1,171 @@
> +/* Copyright (C) 2010 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   GCC is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef _X86INTRIN_H_INCLUDED
> +# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
> +#endif
> +
> +#ifndef __TBM__
> +# error "TBM instruction set not enabled"
> +#endif /* __TBM__ */
> +
> +#ifndef _TBMINTRIN_H_INCLUDED
> +#define _TBMINTRIN_H_INCLUDED
> +
> +#ifdef __OPTIMIZE__
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__bextri_u32 (unsigned int __X, const unsigned int __I)
> +{
> +       return __builtin_ia32_bextri_u32 (__X, __I);
> +}
> +#else
> +#define __bextri_u32 (X, I)                                              \
> +       ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X),    \
> +                                                  (unsigned int)(I))
> +#endif /*__OPTIMIZE__ */
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcfill_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_blcfill_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsfill_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_blsfill_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcs_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_blcs_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzmsk_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_tzmsk_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcic_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_blcic_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsic_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_blsic_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__t1mskc_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_t1mskc_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcmsk_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_blcmsk_u32 (__X);
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blci_u32 (unsigned int __X)
> +{
> +       return __builtin_ia32_blci_u32 (__X);
> +}
> +
> +
> +#ifdef __x86_64__
> +#ifdef __OPTIMIZE__
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__bextri_u64 (unsigned long long __X, const unsigned int __Y)
> +{
> +       return __builtin_ia32_bextri_u64 (__X, __Y);
> +}
> +#else
> +#define __bextri_u64 (X, I)
>                  \
> +       ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long
> long)(X),        \
> +                                                        (unsigned
> long long)(I))
> +#endif /*__OPTIMIZE__ */
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcfill_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_blcfill_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsfill_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_blsfill_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcs_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_blcs_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__tzmsk_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_tzmsk_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcic_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_blcic_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blsic_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_blsic_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__t1mskc_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_t1mskc_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blcmsk_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_blcmsk_u64 (__X);
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> +__blci_u64 (unsigned long long __X)
> +{
> +       return __builtin_ia32_blci_u64 (__X);
> +}
> +#endif /* __x86_64__  */
> +
> +#endif /* _TBMINTRIN_H_INCLUDED */
> +
> diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
> index 9a7366b..07074ae 100644
> --- a/gcc/config/i386/x86intrin.h
> +++ b/gcc/config/i386/x86intrin.h
> @@ -85,6 +85,10 @@
>  #include <bmiintrin.h>
>  #endif
>
> +#ifdef __BMI__
> +#include <tbmintrin.h>
> +#endif
> +
>  #ifdef __POPCNT__
>  #include <popcntintrin.h>
>  #endif
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index a7be54b..e6ba951 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -9370,6 +9370,31 @@ unsigned int __builtin_ia32_blsi_u32 (unsigned int);
>  unsigned long long __builtin_ia32_blsi_u64 (unsigned long long);
>  @end smallexample
>
> +The following built-in function is available when @option{-mtbm} is used.
> +All of them generate the machine instruction that is part of the name.
> +@smallexample
> +unsigned int __builtin_ia32_bextri_u32 (unsigned int, unsigned int);
> +unsigned long long __builtin_ia32_bextri_u64 (unsigned long long,
> const unsigned long long);
> +unsigned int __builtin_ia32_blcfill_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blcfill_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsfill_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blsfill_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blcs_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blcs_u64 (unsigned long long);
> +unsigned int __builtin_ia32_tzmsk_u32 (unsigned int);
> +unsigned long long __builtin_ia32_tzmsk_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blcic_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blcic_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blsic_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blsic_u64 (unsigned long long);
> +unsigned int __builtin_ia32_t1mskc_u32 (unsigned int);
> +unsigned long long __builtin_ia32_t1mskc_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blcmsk_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blcmsk_u64 (unsigned long long);
> +unsigned int __builtin_ia32_blci_u32 (unsigned int);
> +unsigned long long __builtin_ia32_blci_u64 (unsigned long long);
> +@end smallexample
> +
>  The following built-in functions are available when @option{-m3dnow} is used.
>  All of them generate the machine instruction that is part of the name.
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 071ad27..d4eaea6 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -597,7 +597,7 @@ Objective-C and Objective-C++ Dialects}.
>  -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol
>  -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
>  -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfused-madd @gol
> --msse4a -m3dnow -mpopcnt -mabm -mbmi -mfma4 -mxop -mlwp @gol
> +-msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlwp @gol
>  -mthreads  -mno-align-stringops  -minline-all-stringops @gol
>  -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
>  -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
> @@ -12420,6 +12420,8 @@ preferred alignment to
> @option{-mpreferred-stack-boundary=2}.
>  @itemx -mno-abm
>  @itemx -mbmi
>  @itemx -mno-bmi
> +@itemx -mtbm
> +@itemx -mno-tbm
>  @opindex mmmx
>  @opindex mno-mmx
>  @opindex msse
> diff --git a/gcc/testsuite/g++.dg/other/i386-2.C
> b/gcc/testsuite/g++.dg/other/i386-2.C
> index f0a382a..6f7ca84 100644
> --- a/gcc/testsuite/g++.dg/other/i386-2.C
> +++ b/gcc/testsuite/g++.dg/other/i386-2.C
> @@ -1,9 +1,9 @@
>  /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop
> -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" }
> */
> +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop
> -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd
> -mf16c" } */
>
>  /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
> -   bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> -   -O -pedantic-errors.  */
> +   bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h
> +   are usable with -O -pedantic-errors.  */
>
>  #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/g++.dg/other/i386-3.C
> b/gcc/testsuite/g++.dg/other/i386-3.C
> index 4b27372..fe2a097 100644
> --- a/gcc/testsuite/g++.dg/other/i386-3.C
> +++ b/gcc/testsuite/g++.dg/other/i386-3.C
> @@ -1,8 +1,8 @@
>  /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx
> -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd
> -mf16c" } */
> +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx
> -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase
> -mrdrnd -mf16c" } */
>
>  /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
> -   bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
> +   bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h
>    -O -fkeep-inline-functions.  */
>
>  #include <x86intrin.h>
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> index 5e07d85..1e18dcf 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> @@ -19,6 +19,7 @@ extern void test_sse4_2
> (void)                  __attribute__((__target__("sse4.2")));
>  extern void test_sse4a (void)                  __attribute__((__target__("sse4a")));
>  extern void test_fma4 (void)                   __attribute__((__target__("fma4")));
>  extern void test_ssse3 (void)                  __attribute__((__target__("ssse3")));
> +extern void test_tbm (void)                    __attribute__((__target__("tbm")));
>
>  extern void test_no_abm (void)                 __attribute__((__target__("no-abm")));
>  extern void test_no_aes (void)                 __attribute__((__target__("no-aes")));
> @@ -36,6 +37,7 @@ extern void test_no_sse4_2
> (void)          __attribute__((__target__("no-sse4.2")));
>  extern void test_no_sse4a (void)               __attribute__((__target__("no-sse4a")));
>  extern void test_no_fma4 (void)                        __attribute__((__target__("no-fma4")));
>  extern void test_no_ssse3 (void)               __attribute__((__target__("no-ssse3")));
> +extern void test_no_tbm (void)                 __attribute__((__target__("no-tbm")));
>
>  extern void test_arch_i386 (void)              __attribute__((__target__("arch=i386")));
>  extern void test_arch_i486 (void)              __attribute__((__target__("arch=i486")));
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c
> b/gcc/testsuite/gcc.target/i386/funcspec-6.c
> index 81c831c..92a3cb5 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-6.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c
> @@ -19,6 +19,7 @@ extern void test_sse4_2
> (void)                  __attribute__((__target__("sse4.2")));
>  extern void test_sse4a (void)                  __attribute__((__target__("sse4a")));
>  extern void test_fma4 (void)                   __attribute__((__target__("fma4")));
>  extern void test_ssse3 (void)                  __attribute__((__target__("ssse3")));
> +extern void test_tbm (void)                    __attribute__((__target__("tbm")));
>
>  extern void test_no_abm (void)                 __attribute__((__target__("no-abm")));
>  extern void test_no_aes (void)                 __attribute__((__target__("no-aes")));
> @@ -36,6 +37,7 @@ extern void test_no_sse4_2
> (void)          __attribute__((__target__("no-sse4.2")));
>  extern void test_no_sse4a (void)               __attribute__((__target__("no-sse4a")));
>  extern void test_no_fma4 (void)                        __attribute__((__target__("no-fma4")));
>  extern void test_no_ssse3 (void)               __attribute__((__target__("no-ssse3")));
> +extern void test_no_tbm (void)                 __attribute__((__target__("no-tbm")));
>
>  extern void test_arch_nocona
> (void)          __attribute__((__target__("arch=nocona")));
>  extern void test_arch_core2 (void)             __attribute__((__target__("arch=core2")));
> diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c
> b/gcc/testsuite/gcc.target/i386/sse-12.c
> index d59777b..eee7b29 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-12.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-12.c
> @@ -1,9 +1,9 @@
>  /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h,
> -   fma4intrin.h, abmintrin.h, bmiintrin.h, lwpintrin.h,
> +   fma4intrin.h, abmintrin.h, bmiintrin.h, tbmiintrin.h, lwpintrin.h,
>    popcntintrin.h and mm_malloc.h are usable
>    with -O -std=c89 -pedantic-errors.  */
>  /* { dg-do compile } */
> -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx
> -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase
> -mrdrnd -mf16c" } */
> +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx
> -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp
> -mfsgsbase -mrdrnd -mf16c" } */
>
>  #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c
> b/gcc/testsuite/gcc.target/i386/sse-13.c
> index 01809d0..3fb7eff 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> @@ -1,13 +1,13 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8
> -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase
> -mrdrnd -mf16c" } */
> +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8
> -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mtbm -mlwp
> -mfsgsbase -mrdrnd -mf16c" } */
>
>  #include <mm_malloc.h>
>
>  /* Test that the intrinsics compile with optimization.  All of them
>    are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
> -   xopintrin.h, abmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h
> -   that reference the proper builtin functions.  Defining away
> -   "extern" and "__inline" results in all of them being compiled as
> +   xopintrin.h, abmintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h
> +   and mm3dnow.h that reference the proper builtin functions.  Defining
> +   away "extern" and "__inline" results in all of them being compiled as
>    proper functions.  */
>
>  #define extern
> @@ -141,4 +141,8 @@
>  #define __builtin_ia32_lwpins32 (D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1)
>  #define __builtin_ia32_lwpins64 (D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1)
>
> +/* tbmintrin.h */
> +#define __builtin_ia32_bextri_u32 (X, Y) __builtin_ia32_bextri_u32 (X, 1)
> +#define __builtin_ia32_bextri_u64 (X, Y) __builtin_ia32_bextri_u64 (X, 1)
> +
>  #include <x86intrin.h>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c
> b/gcc/testsuite/gcc.target/i386/sse-14.c
> index d256e68..41bde1b 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-14.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-14.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8
> -m3dnow -mavx -mxop -msse4a -maes -mpclmul -mpopcnt -mabm -mlwp
> -mfsgsbase -mrdrnd -mf16c" } */
> +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8
> -m3dnow -mavx -mxop -msse4a -maes -mpclmul -mpopcnt -mabm -mtbm -mlwp
> -mfsgsbase -mrdrnd -mf16c" } */
>
>  #include <mm_malloc.h>
>
> @@ -177,3 +177,9 @@ test_2 ( __lwpins32, unsigned char, unsigned int,
> unsigned int, 1)
>  test_2 ( __lwpval64, void, unsigned long long, unsigned int, 1)
>  test_2 ( __lwpins64, unsigned char, unsigned long long, unsigned int, 1)
>  #endif
> +
> +/* tbmintrin.h */
> +test_1 ( __bextri_u32, unsigned int, unsigned int, 1)
> +#ifdef __x86_64__
> +test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1)
> +#endif
> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c
> b/gcc/testsuite/gcc.target/i386/sse-22.c
> index bb0472d..e940ef8 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-22.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-22.c
> @@ -6,10 +6,10 @@
>
>  /* Test that the intrinsics compile without optimization.  All of them
>    are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h,
> -   xopintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h that
> -   reference the proper builtin functions.  Defining away "extern" and
> -   "__inline" results in all of them being compiled as proper
> -   functions.  */
> +   xopintrin.h, abmintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h
> +   and mm3dnow.h that reference the proper builtin functions.  Defining
> +   away "extern" and "__inline" results in all of them being compiled as
> +   proper functions.  */
>
>  #define extern
>  #define __inline
> @@ -39,7 +39,7 @@
>
>
>  #ifndef DIFFERENT_PRAGMAS
> -#pragma GCC target
> ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,lwp,fsgsbase,rdrnd,f16c")
> +#pragma GCC target
> ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,tbm,lwp,fsgsbase,rdrnd,f16c")
>  #endif
>
>  /* Following intrinsics require immediate arguments.  They
> @@ -188,3 +188,14 @@ test_2 ( __lwpins64, unsigned char, unsigned long
> long, unsigned int, 1)
>  test_1 (_cvtss_sh, unsigned short, float, 1)
>  test_1 (_mm_cvtps_ph, __m128i, __m128, 1)
>  test_1 (_mm256_cvtps_ph, __m128i, __m256, 1)
> +
> +
> +/* tbmintrin.h (TBM). */
> +#ifdef DIFFERENT_PRAGMAS
> +#pragma GCC target ("tbm")
> +#endif
> +#include <tbmintrin.h>
> +test_1 ( __bextri_u32, unsigned int, unsigned int, 1)
> +#ifdef __x86_64__
> +test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1)
> +#endif
> diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c
> b/gcc/testsuite/gcc.target/i386/sse-23.c
> index 0e15bb2..6a7b854 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> @@ -5,7 +5,7 @@
>
>  /* Test that the intrinsics compile with optimization.  All of them
>    are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h,
> -   xopintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h that
> +   xopintrin.h, lwpintrin.h, tbmintrin.h, popcntintrin.h and mm3dnow.h that
>    reference the proper builtin functions.  Defining away "extern" and
>    "__inline" results in all of them being compiled as proper
>    functions.  */
> @@ -141,7 +141,11 @@
>  #define __builtin_ia32_lwpins32 (D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1)
>  #define __builtin_ia32_lwpins64 (D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1)
>
> -#pragma GCC target
> ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,fsgsbase,rdrnd,f16c")
> +/* tbmintrin.h */
> +#define __builtin_ia32_bextri_u32 (X, Y) __builtin_ia32_bextr_u32 (X, 1)
> +#define __builtin_ia32_bextri_u64 (X, Y) __builtin_ia32_bextr_u64 (X, 1)
> +
> +#pragma GCC target
> ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,tbm,fsgsbase,rdrnd,f16c")
>  #include <wmmintrin.h>
>  #include <smmintrin.h>
>  #include <mm3dnow.h>
> diff --git a/gcc/testsuite/gcc.target/i386/tbm-1.c
> b/gcc/testsuite/gcc.target/i386/tbm-1.c
> new file mode 100644
> index 0000000..4dddafc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/tbm-1.c
> @@ -0,0 +1,74 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtbm" } */
> +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blcfill\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blci\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blcic\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blcmsk\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blcs\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsfill\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "blsic\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "t1mskc\[^\\n]*(%|)eax" } } */
> +/* { dg-final { scan-assembler "tzmsk\[^\\n]*(%|)eax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned int
> +func_bextri32 (unsigned int X)
> +{
> +  return __bextri_u32 (X, 1);
> +}
> +
> +unsigned int
> +func_blcfill32 (unsigned int X)
> +{
> +  return __blcfill_u32 (X);
> +}
> +
> +unsigned int
> +func_blci32 (unsigned int X)
> +{
> +  return __blci_u32 (X);
> +}
> +
> +unsigned int
> +func_blcic32 (unsigned int X)
> +{
> +  return __blcic_u32 (X);
> +}
> +
> +unsigned int
> +func_blcmsk32 (unsigned int X)
> +{
> +  return __blcmsk_u32 (X);
> +}
> +
> +unsigned int
> +func_blcs32 (unsigned int X)
> +{
> +  return __blcs_u32 (X);
> +}
> +
> +unsigned int
> +func_blsfill32 (unsigned int X)
> +{
> +  return __blsfill_u32 (X);
> +}
> +
> +unsigned int
> +func_blsic32 (unsigned int X)
> +{
> +  return __blsic_u32 (X);
> +}
> +
> +unsigned int
> +func_t1mskc32 (unsigned int X)
> +{
> +  return __t1mskc_u32 (X);
> +}
> +
> +unsigned int
> +func_tzmsk32 (unsigned int X)
> +{
> +  return __tzmsk_u32 (X);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/tbm-2.c
> b/gcc/testsuite/gcc.target/i386/tbm-2.c
> new file mode 100644
> index 0000000..e3ba375
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/tbm-2.c
> @@ -0,0 +1,74 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mtbm" } */
> +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blci\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blcic\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blcmsk\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blcs\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsfill\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "blsic\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "t1mskc\[^\\n]*(%|)rax" } } */
> +/* { dg-final { scan-assembler "tzmsk\[^\\n]*(%|)rax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned long long
> +func_bextri64 (unsigned long long X)
> +{
> +  return __bextri_u64 (X, 1);
> +}
> +
> +unsigned long long
> +func_blcfill64 (unsigned long long X)
> +{
> +  return __blcfill_u64 (X);
> +}
> +
> +unsigned long long
> +func_blci64 (unsigned long long X)
> +{
> +  return __blci_u64 (X);
> +}
> +
> +unsigned long long
> +func_blcic64 (unsigned long long X)
> +{
> +  return __blcic_u64 (X);
> +}
> +
> +unsigned long long
> +func_blcmsk64 (unsigned long long X)
> +{
> +  return __blcmsk_u64 (X);
> +}
> +
> +unsigned long long
> +func_blcs64 (unsigned long long X)
> +{
> +  return __blcs_u64 (X);
> +}
> +
> +unsigned long long
> +func_blsfill64 (unsigned long long X)
> +{
> +  return __blsfill_u64 (X);
> +}
> +
> +unsigned long long
> +func_blsic64 (unsigned long long X)
> +{
> +  return __blsic_u64 (X);
> +}
> +
> +unsigned long long
> +func_t1mskc64 (unsigned long long X)
> +{
> +  return __t1mskc_u64 (X);
> +}
> +
> +unsigned long long
> +func_tzmsk64 (unsigned long long X)
> +{
> +  return __tzmsk_u64 (X);
> +}
>

This patch also passes "make check -k RUNTESTFLAGS=i386.exp" in
x86-64, working on a full bootstrap test.

Ok to commit?
--
Quentin



More information about the Gcc-patches mailing list