[PATCH 1/3] Add power10 zero cycle moves for switches & indirect jumps
Bill Schmidt
wschmidt@linux.ibm.com
Mon Nov 22 16:36:13 GMT 2021
Hi Mike,
Thanks for this patch!
On 11/19/21 8:53 AM, Michael Meissner wrote:
> Add power10 zero cycle moves for switches.
>
> Power10 will fuse adjacenet 'mtctr' and 'bctr' instructions to form zero
> cycle moves. This code exploits this fusion opportunity.
>
> I have built bootstrapped compilers with this patch on little endian power9 and
> power10 systems with no regressions. Can I install this into the master
> branch?
>
> 2021-11-19 Michael Meissner <meissner@the-meissners.org>
>
> * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
> support for -mpower10-fusion-zero-cycle.
> (POWERPC_MASKS): Likewise.
> * config/rs6000/rs6000.c (rs6000_option_override_internal):
> Likewise.
> * config/rs6000/rs6000.md (indirect_jump): Support zero cycle
> moves.
> (indirect_jump<mode>_zero_cycle): New insns.
> (tablejump<mode>_normal): Likewise.
> (tablejump<mode>_absolute): Likewise.
> (tablejump<mode>_insn_zero_cycle): New insn.
> * config/rs6000/rs6000.opt (-mpower10-fusion-zero-cycle): New
> debug switch.
> ---
> gcc/config/rs6000/rs6000-cpus.def | 4 ++-
> gcc/config/rs6000/rs6000.c | 4 +++
> gcc/config/rs6000/rs6000.md | 52 ++++++++++++++++++++++++++++---
> gcc/config/rs6000/rs6000.opt | 4 +++
> 4 files changed, 59 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
> index f5812da0184..cc072ee94ea 100644
> --- a/gcc/config/rs6000/rs6000-cpus.def
> +++ b/gcc/config/rs6000/rs6000-cpus.def
> @@ -91,7 +91,8 @@
> | OPTION_MASK_P10_FUSION_LOGADD \
> | OPTION_MASK_P10_FUSION_ADDLOG \
> | OPTION_MASK_P10_FUSION_2ADD \
> - | OPTION_MASK_P10_FUSION_2STORE)
> + | OPTION_MASK_P10_FUSION_2STORE \
> + | OPTION_MASK_P10_FUSION_ZERO_CYCLE)
I guess it's fine to introduce one more for now, but ultimately we want
all these to get collapsed down to one. No worries from me.
>
> /* Flags that need to be turned off if -mno-power9-vector. */
> #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \
> @@ -145,6 +146,7 @@
> | OPTION_MASK_P10_FUSION_ADDLOG \
> | OPTION_MASK_P10_FUSION_2ADD \
> | OPTION_MASK_P10_FUSION_2STORE \
> + | OPTION_MASK_P10_FUSION_ZERO_CYCLE \
> | OPTION_MASK_HTM \
> | OPTION_MASK_ISEL \
> | OPTION_MASK_MFCRF \
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index e4843eb0f1c..6780304a5eb 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4497,6 +4497,10 @@ rs6000_option_override_internal (bool global_init_p)
> && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
> rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
>
> + if (TARGET_POWER10
> + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == 0)
> + rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE;
> +
> /* Turn off vector pair/mma options on non-power10 systems. */
> else if (!TARGET_POWER10 && TARGET_MMA)
> {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 6bec2bddbde..ea41eb4ada3 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump"
> emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg));
> DONE;
> }
> + if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)
> + {
> + emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0]));
> + DONE;
> + }
> })
>
> (define_insn "*indirect_jump<mode>"
> [(set (pc)
> (match_operand:P 0 "register_operand" "c,*l"))]
> - "rs6000_speculate_indirect_jumps"
> + "rs6000_speculate_indirect_jumps
> + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
> "b%T0"
> [(set_attr "type" "jmpreg")])
>
> +(define_insn "@indirect_jump<mode>_zero_cycle"
I don't know why this is an "@" pattern, but honestly I don't
know why @indirect_jump<mode>_nospec is an "@" pattern either.
The documentation for such things is hard for me to understand,
so I'm probably just missing something obvious, but I don't
immediately see why we would need the @ here.
> + [(set (pc)
> + (match_operand:P 0 "register_operand" "r,r,!cl"))
> + (clobber (match_scratch:P 1 "=c,*l,X"))]
Do we need the *l and X alternatives if we're only doing this for
mtctr/bctr?
> + "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
> + && TARGET_P10_FUSION_ZERO_CYCLE"
> + "@
> + mt%T1 %0\;b%T1
> + mt%T1 %0\;b%T1
> + b%T0"
> + [(set_attr "type" "jmpreg")
> + (set_attr "length" "8,8,4")])
> +
> (define_insn "@indirect_jump<mode>_nospec"
> [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))
> (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))]
> @@ -13050,7 +13069,11 @@ (define_expand "@tablejump<mode>_normal"
> rtx addr = gen_reg_rtx (Pmode);
>
> emit_insn (gen_add<mode>3 (addr, off, lab));
> - emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> + rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE
> + ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1])
> + : gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +
> + emit_jump_insn (insn);
> DONE;
> })
>
> @@ -13062,7 +13085,11 @@ (define_expand "@tablejump<mode>_absolute"
> rtx addr = gen_reg_rtx (Pmode);
> emit_move_insn (addr, operands[0]);
>
> - emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> + rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE
> + ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1])
> + : gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +
> + emit_jump_insn (insn);
> DONE;
> })
>
> @@ -13107,10 +13134,27 @@ (define_insn "@tablejump<mode>_insn_normal"
> [(set (pc)
> (match_operand:P 0 "register_operand" "c,*l"))
> (use (label_ref (match_operand 1)))]
> - "rs6000_speculate_indirect_jumps"
> + "rs6000_speculate_indirect_jumps
> + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
> "b%T0"
> [(set_attr "type" "jmpreg")])
>
> +;; Version of indirect jump that fuses the mtctr to bctr to achieve 0 cycle
> +;; moves on Power10.
> +(define_insn "@tablejump<mode>_insn_zero_cycle"
Same question about @.
> + [(set (pc)
> + (match_operand:P 0 "register_operand" "r,r,!cl"))
> + (use (label_ref (match_operand 1)))
> + (clobber (match_scratch:P 2 "=c,*l,X"))]
Same question about 2nd and 3rd alternatives.
Otherwise LGTM... over to the maintainers. :)
Thanks!
Bill
> + "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
> + && TARGET_P10_FUSION_ZERO_CYCLE"
> + "@
> + mt%T2 %0\;b%T2
> + mt%T2 %0\;b%T2
> + b%T0"
> + [(set_attr "type" "jmpreg")
> + (set_attr "length" "8,8,4")])
> +
> (define_insn "@tablejump<mode>_insn_nospec"
> [(set (pc)
> (match_operand:P 0 "register_operand" "c,*l"))
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index 9d7878f144a..ba674947557 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -518,6 +518,10 @@ mpower10-fusion-2store
> Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags)
> Fuse certain store operations together for better performance on power10.
>
> +mpower10-fusion-zero-cycle
> +Target Undocumented Mask(P10_FUSION_ZERO_CYCLE) Var(rs6000_isa_flags)
> +Fuse move to special register and jump for better performance on power10.
> +
> mcrypto
> Target Mask(CRYPTO) Var(rs6000_isa_flags)
> Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
More information about the Gcc-patches
mailing list