This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
RE: [PATCH][MIPS] Add support for P6600
- From: Matthew Fortune <Matthew dot Fortune at imgtec dot com>
- To: Robert Suchanek <Robert dot Suchanek at imgtec dot com>, "Catherine_Moore at mentor dot com" <Catherine_Moore at mentor dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 24 May 2016 16:21:26 +0000
- Subject: RE: [PATCH][MIPS] Add support for P6600
- Authentication-results: sourceware.org; auth=none
- References: <B5E67142681B53468FAF6B7C313565624F4ED2EF at hhmail02 dot hh dot imgtec dot org>
Hi Robert,
A few comments inlined. I'd like to review again and/or let Catherine
comment before commit.
Thanks,
Matthew
> diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
> index 06acd30..cbe1007 100644
> --- a/gcc/config/mips/mips.c
> +++ b/gcc/config/mips/mips.c
> @@ -18496,6 +18519,28 @@ mips_orphaned_high_part_p (mips_offset_table *htab, rtx_insn
> *insn)
> return false;
> }
>
> +/* Subroutine of mips_avoid_hazard. We classify unconditional branches
> + of interest for the P6600 for performance reasons. We are interested
> + in differentiating BALC from JIC, JIALC and BC. */
> +
> +static enum mips_ucbranch_type
> +mips_classify_branch_p6600 (rtx_insn *insn)
> +{
> + if (!(insn
> + && USEFUL_INSN_P (insn)
> + && GET_CODE (PATTERN (insn)) != SEQUENCE))
> + return UC_UNDEFINED;
This might be easier to read if you move the not inside.
> + if (get_attr_jal (insn) == JAL_INDIRECT /* JIC and JIALC. */
> + || get_attr_type (insn) == TYPE_JUMP) /* BC as it is a loose jump. */
> + return UC_OTHER;
I think I understand what 'loose jump' means here. It is saying that
this is a TYPE_JUMP instruction which does not in a sequence which
means it must not have a delay slot. I would just say /* BC. */ in the
comment though.
> + if (CALL_P (insn) && get_attr_jal (insn) == JAL_DIRECT)
> + return UC_BALC;
> +
> + return UC_UNDEFINED;
> +}
> +
> /* Subroutine of mips_reorg_process_insns. If there is a hazard between
> INSN and a previous instruction, avoid it by inserting nops after
> instruction AFTER.
> @@ -18548,14 +18593,36 @@ mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int
> *hilo_delay,
> && GET_CODE (pattern) != ASM_INPUT
> && asm_noperands (pattern) < 0)
> nops = 1;
> + /* The P6600's branch predictor does not handle certain static
> + sequences of back-to-back jumps well. Inserting a no-op only
> + costs space as the dispatch unit will disregard the nop.
> + Here we handle the cases of back to back unconditional branches
> + that are inefficient. */
> + else if (TUNE_P6600 && TARGET_CB_MAYBE && !optimize_size
> + && ((mips_classify_branch_p6600 (after) == UC_BALC
> + && mips_classify_branch_p6600 (insn) == UC_OTHER)
> + || (mips_classify_branch_p6600 (insn) == UC_BALC
> + && (mips_classify_branch_p6600 (after) == UC_OTHER))))
> + nops = 1;
> else
> nops = 0;
>
> /* Insert the nops between this instruction and the previous one.
> Each new nop takes us further from the last hilo hazard. */
> *hilo_delay += nops;
> +
> + /* If we're tuning for the P6600, we come across an annoying GCC
> + assumption that debug information always follows a call. Move
> + past any debug information in that case. */
> + rtx_insn *real_after = after;
> + if (real_after && nops && CALL_P (real_after))
> + while (TUNE_P6600 && real_after
> + && (NOTE_P (NEXT_INSN (real_after))
> + || BARRIER_P (NEXT_INSN (real_after))))
> + real_after = NEXT_INSN (real_after);
> +
As Bernhard pointed out this could be improved by hoisting the TUNE_P6600
up to the if.
The comment on this doesn't really say what is going on here. We have
to move to the next real instruction because we are going to insert a
NOP and the current instruction is a call which will have debug
information. We can't separate the call from the debug info so move
past it. As to whether this should only happen for the P6600 specific
hazard is subjective. It should be save all the time so TUNE_P6600 could
be deleted entirely.
> while (nops-- > 0)
> - emit_insn_after (gen_hazard_nop (), after);
> + emit_insn_after (gen_hazard_nop (), real_after);
>
> /* Set up the state for the next instruction. */
> *hilo_delay += ninsns;
> @@ -18565,6 +18632,14 @@ mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int
> *hilo_delay,
> switch (get_attr_hazard (insn))
> {
> case HAZARD_NONE:
> + /* For the P6600, flag some unconditional branches as having
> + a pseudo-forbidden slot. This will cause additional nop insertion
> + or SEQUENCE breaking as required. */
This should explain that the addition of nops is for performance reasons
not correctness.
> + if (TUNE_P6600
> + && !optimize_size
> + && TARGET_CB_MAYBE
> + && mips_classify_branch_p6600 (insn) == UC_OTHER)
> + *fs_delay = true;
> break;
>
> case HAZARD_FORBIDDEN_SLOT:
> @@ -18806,7 +18881,10 @@ mips_reorg_process_insns (void)
> sequence and replace it with the delay slot instruction
> then the jump to clear the forbidden slot hazard. */
>
> - if (fs_delay)
> + if (fs_delay || (TUNE_P6600
> + && TARGET_CB_MAYBE
> + && mips_classify_branch_p6600 (insn)
> + == UC_BALC))
> {
> /* Search onwards from the current position looking for
> a SEQUENCE. We are looking for pipeline hazards here
> diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
> index e8897d1..5020208 100644
> --- a/gcc/config/mips/mips.h
> +++ b/gcc/config/mips/mips.h
> @@ -307,6 +307,7 @@ struct mips_cpu_info {
> || mips_tune == PROCESSOR_SB1A)
> #define TUNE_P5600 (mips_tune == PROCESSOR_P5600)
> #define TUNE_I6400 (mips_tune == PROCESSOR_I6400)
> +#define TUNE_P6600 (mips_tune == PROCESSOR_P6600)
>
> /* Whether vector modes and intrinsics for ST Microelectronics
> Loongson-2E/2F processors should be enabled. In o32 pairs of
> @@ -768,7 +769,7 @@ struct mips_cpu_info {
> %{march=mips64r2|march=loongson3a|march=octeon|march=xlp: -mips64r2} \
> %{march=mips64r3: -mips64r3} \
> %{march=mips64r5: -mips64r5} \
> - %{march=mips64r6|march=i6400: -mips64r6}}"
> + %{march=mips64r6|march=i6400|march=p6600: -mips64r6}}"
>
> /* A spec that injects the default multilib ISA if no architecture is
> specified. */
> @@ -3411,5 +3412,6 @@ struct GTY(()) machine_function {
> performance can be degraded for those targets. Hence, do not bond for
> micromips or fix_24k. */
> #define ENABLE_LD_ST_PAIRS \
> - (TARGET_LOAD_STORE_PAIRS && (TUNE_P5600 || TUNE_I6400) \
> + (TARGET_LOAD_STORE_PAIRS \
> + && (TUNE_P5600 || TUNE_I6400 || TUNE_P6600) \
> && !TARGET_MICROMIPS && !TARGET_FIX_24K)
> diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
> index d8d564f..527f2e1 100644
> --- a/gcc/config/mips/mips.md
> +++ b/gcc/config/mips/mips.md
> @@ -69,6 +69,7 @@ (define_enum "processor" [
> p5600
> m5100
> i6400
> + p6600
> ])
>
> (define_c_enum "unspec" [
> @@ -789,6 +790,7 @@ (define_mode_iterator MOVEP1 [SI SF])
> (define_mode_iterator MOVEP2 [SI SF])
> (define_mode_iterator JOIN_MODE [HI
> SI
> + (DI "TARGET_64BIT")
> (SF "TARGET_HARD_FLOAT")
> (DF "TARGET_HARD_FLOAT
> && TARGET_DOUBLE_FLOAT")])
> @@ -1142,6 +1144,7 @@ (define_insn_reservation "ghost" 0
>
> (include "i6400.md")
> (include "p5600.md")
> +(include "p6600.md")
> (include "m5100.md")
> (include "4k.md")
> (include "5k.md")
> diff --git a/gcc/config/mips/p6600.md b/gcc/config/mips/p6600.md
> new file mode 100644
> index 0000000..b66d5e4
> --- /dev/null
> +++ b/gcc/config/mips/p6600.md
> @@ -0,0 +1,349 @@
...
> +;;
> +;; FPU pipe
> +;;
> +
> +;; fadd, fsub
> +(define_insn_reservation "p6600_fpu_fadd" 4
> + (and (eq_attr "cpu" "p6600")
> + (eq_attr "type" "fadd,fabs,fneg"))
> + "p6600_fpu_long, p6600_fpu_apu")
Please fix the bug from p5600 which is where this model originated.
Is this model different in any significant way to p5600? I.e. could
the p5600 model just be modified to apply to p6600 as well perhaps
with one or two differences?