Index: gcc/config/rs6000/rs6000.opt =================================================================== --- gcc/config/rs6000/rs6000.opt (revision 229970) +++ gcc/config/rs6000/rs6000.opt (working copy) @@ -601,6 +601,34 @@ moptimize-swaps Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save Analyze and remove doubleword swaps from VSX computations. +mpower9-fusion +Target Report Mask(P9_FUSION) Var(rs6000_isa_flags) +Fuse certain operations together for better performance on power9. + +mpower9-vector +Target Report Mask(P9_VECTOR) Var(rs6000_isa_flags) +Use/do not use vector and scalar instructions added in ISA 3.0. + +mpower9-dform +Target Undocumented Mask(P9_DFORM) Var(rs6000_isa_flags) +Use/do not use vector and scalar instructions added in ISA 3.0. + +mpower9-minmax +Target Undocumented Mask(P9_MINMAX) Var(rs6000_isa_flags) +Use/do not use the new min/max instructions defined in ISA 3.0. + +mtoc-fusion +Target Undocumented Mask(TOC_FUSION) Var(rs6000_isa_flags) +Fuse medium/large code model toc references with the memory instruction. + +mmodulo +Target Report Mask(MODULO) Var(rs6000_isa_flags) +Generate the integer modulo instructions. + mfloat128 Target Report Mask(FLOAT128) Var(rs6000_isa_flags) Enable/disable IEEE 128-bit floating point via the __float128 keyword. + +mfloat128-hardware +Target Report Mask(FLOAT128_HW) Var(rs6000_isa_flags) +Enable/disable using IEEE 128-bit floating point instructions. Index: gcc/config/rs6000/rs6000-cpus.def =================================================================== --- gcc/config/rs6000/rs6000-cpus.def (revision 229970) +++ gcc/config/rs6000/rs6000-cpus.def (working copy) @@ -60,6 +60,15 @@ | OPTION_MASK_QUAD_MEMORY_ATOMIC \ | OPTION_MASK_UPPER_REGS_SF) +/* Add ISEL back into ISA 3.0, since it is supposed to be a win. Do not add + P9_DFORM or P9_MINMAX until they are fully debugged. */ +#define ISA_3_0_MASKS_SERVER (ISA_2_7_MASKS_SERVER \ + | OPTION_MASK_FLOAT128_HW \ + | OPTION_MASK_ISEL \ + | OPTION_MASK_MODULO \ + | OPTION_MASK_P9_FUSION \ + | OPTION_MASK_P9_VECTOR) + #define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) /* Deal with ports that do not have -mstrict-align. */ @@ -87,10 +96,15 @@ | OPTION_MASK_ISEL \ | OPTION_MASK_MFCRF \ | OPTION_MASK_MFPGPR \ + | OPTION_MASK_MODULO \ | OPTION_MASK_MULHW \ | OPTION_MASK_NO_UPDATE \ | OPTION_MASK_P8_FUSION \ | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_P9_DFORM \ + | OPTION_MASK_P9_FUSION \ + | OPTION_MASK_P9_MINMAX \ + | OPTION_MASK_P9_VECTOR \ | OPTION_MASK_POPCNTB \ | OPTION_MASK_POPCNTD \ | OPTION_MASK_POWERPC64 \ @@ -101,6 +115,7 @@ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_SOFT_FLOAT \ | OPTION_MASK_STRICT_ALIGN_OPTIONAL \ + | OPTION_MASK_TOC_FUSION \ | OPTION_MASK_UPPER_REGS_DF \ | OPTION_MASK_UPPER_REGS_SF \ | OPTION_MASK_VSX \ @@ -195,6 +210,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7, | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD | MASK_VSX | MASK_RECIP_PRECISION | OPTION_MASK_UPPER_REGS_DF) RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) +RS6000_CPU ("power9", PROCESSOR_POWER9, MASK_POWERPC64 | ISA_3_0_MASKS_SERVER) RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0) RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64) RS6000_CPU ("powerpc64le", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) Index: gcc/config/rs6000/rs6000.h =================================================================== --- gcc/config/rs6000/rs6000.h (revision 229970) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -95,6 +95,12 @@ #define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC #endif +#ifdef HAVE_AS_POWER9 +#define ASM_CPU_POWER9_SPEC "-mpower9" +#else +#define ASM_CPU_POWER9_SPEC ASM_CPU_POWER8_SPEC +#endif + #ifdef HAVE_AS_DCI #define ASM_CPU_476_SPEC "-m476" #else @@ -119,6 +125,7 @@ %{mcpu=power6x: %(asm_cpu_power6) -maltivec} \ %{mcpu=power7: %(asm_cpu_power7)} \ %{mcpu=power8: %(asm_cpu_power8)} \ +%{mcpu=power9: %(asm_cpu_power9)} \ %{mcpu=a2: -ma2} \ %{mcpu=powerpc: -mppc} \ %{mcpu=rs64a: -mppc64} \ @@ -193,6 +200,7 @@ { "asm_cpu_power6", ASM_CPU_POWER6_SPEC }, \ { "asm_cpu_power7", ASM_CPU_POWER7_SPEC }, \ { "asm_cpu_power8", ASM_CPU_POWER8_SPEC }, \ + { "asm_cpu_power9", ASM_CPU_POWER9_SPEC }, \ { "asm_cpu_476", ASM_CPU_476_SPEC }, \ SUBTARGET_EXTRA_SPECS Index: gcc/config/rs6000/rs6000-opts.h =================================================================== --- gcc/config/rs6000/rs6000-opts.h (revision 229970) +++ gcc/config/rs6000/rs6000-opts.h (working copy) @@ -60,6 +60,7 @@ enum processor_type PROCESSOR_POWER6, PROCESSOR_POWER7, PROCESSOR_POWER8, + PROCESSOR_POWER9, PROCESSOR_RS64A, PROCESSOR_MPCCORE, Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 229970) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -985,6 +985,26 @@ struct processor_costs power8_cost = { COSTS_N_INSNS (3), /* SF->DF convert */ }; +/* Instruction costs on POWER9 processors. */ +static const +struct processor_costs power9_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (14), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 12, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ +}; + /* Instruction costs on POWER A2 processors. */ static const struct processor_costs ppca2_cost = { @@ -2423,8 +2443,18 @@ rs6000_debug_reg_global (void) fprintf (stderr, DEBUG_FMT_S, "lra", "true"); if (TARGET_P8_FUSION) - fprintf (stderr, DEBUG_FMT_S, "p8 fusion", - (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero"); + { + char options[80]; + + strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8"); + if (TARGET_TOC_FUSION) + strcat (options, ", toc"); + + if (TARGET_P8_FUSION_SIGN) + strcat (options, ", sign"); + + fprintf (stderr, DEBUG_FMT_S, "fusion", options); + } fprintf (stderr, DEBUG_FMT_S, "plt-format", TARGET_SECURE_PLT ? "secure" : "bss"); @@ -2463,6 +2493,7 @@ rs6000_setup_reg_addr_masks (void) for (m = 0; m < NUM_MACHINE_MODES; ++m) { machine_mode m2 = (machine_mode)m; + unsigned short msize = GET_MODE_SIZE (m2); /* SDmode is special in that we want to access it only via REG+REG addressing on power7 and above, since we want to use the LFIWZX and @@ -2492,16 +2523,18 @@ rs6000_setup_reg_addr_masks (void) /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY addressing. Restrict addressing on SPE for 64-bit types because of the SUBREG hackery used to address 64-bit floats in - '32-bit' GPRs. */ + '32-bit' GPRs. If we allow scalars into Altivec registers, + don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */ if (TARGET_UPDATE && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) - && GET_MODE_SIZE (m2) <= 8 + && msize <= 8 && !VECTOR_MODE_P (m2) && !FLOAT128_VECTOR_P (m2) && !COMPLEX_MODE_P (m2) - && !indexed_only_p - && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8)) + && (m2 != DFmode || !TARGET_UPPER_REGS_DF) + && (m2 != SFmode || !TARGET_UPPER_REGS_SF) + && !(TARGET_E500_DOUBLE && msize == 8)) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -2536,7 +2569,7 @@ rs6000_setup_reg_addr_masks (void) /* VMX registers can do (REG & -16) and ((REG+REG) & -16) addressing on 128-bit types. */ - if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16 + if (rc == RELOAD_REG_VMX && msize == 16 && (addr_mask & RELOAD_REG_VALID) != 0) addr_mask |= RELOAD_REG_AND_M16; @@ -3382,7 +3415,22 @@ rs6000_option_override_internal (bool gl if (rs6000_tune_index >= 0) tune_index = rs6000_tune_index; else if (have_cpu) - rs6000_tune_index = tune_index = cpu_index; + { + /* Until power9 tuning is available, use power8 tuning if -mcpu=power9. */ + if (processor_target_table[cpu_index].processor != PROCESSOR_POWER9) + rs6000_tune_index = tune_index = cpu_index; + else + { + size_t i; + tune_index = -1; + for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) + if (processor_target_table[i].processor == PROCESSOR_POWER8) + { + rs6000_tune_index = tune_index = i; + break; + } + } + } else { size_t i; @@ -3557,7 +3605,9 @@ rs6000_option_override_internal (bool gl /* For the newer switches (vsx, dfp, etc.) set some of the older options, unless the user explicitly used the -mno-